| 1 | //===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This implements feature and label extraction for offline supervised learning |
| 10 | // of a IR to native size model. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" |
| 14 | |
| 15 | #ifdef LLVM_HAVE_TFLITE |
| 16 | #include "llvm/Analysis/Utils/TFUtils.h" |
| 17 | #endif |
| 18 | #include "llvm/IR/Function.h" |
| 19 | #include "llvm/IR/PassManager.h" |
| 20 | #include "llvm/Support/raw_ostream.h" |
| 21 | |
| 22 | using namespace llvm; |
| 23 | |
| 24 | AnalysisKey InlineSizeEstimatorAnalysis::Key; |
| 25 | |
| 26 | #ifdef LLVM_HAVE_TFLITE |
| 27 | #include "llvm/Analysis/LoopInfo.h" |
| 28 | #include "llvm/Analysis/TargetLibraryInfo.h" |
| 29 | #include "llvm/Analysis/TargetTransformInfo.h" |
| 30 | #include "llvm/IR/BasicBlock.h" |
| 31 | #include "llvm/IR/Dominators.h" |
| 32 | #include "llvm/IR/Instructions.h" |
| 33 | #include "llvm/Support/Casting.h" |
| 34 | #include "llvm/Support/CommandLine.h" |
| 35 | #include <algorithm> |
| 36 | #include <deque> |
| 37 | #include <optional> |
| 38 | |
| 39 | static cl::opt<std::string> TFIR2NativeModelPath( |
| 40 | "ml-inliner-ir2native-model" , cl::Hidden, |
| 41 | cl::desc("Path to saved model evaluating native size from IR." )); |
| 42 | |
| 43 | #define DEBUG_TYPE "inline-size-estimator" |
| 44 | namespace { |
| 45 | unsigned getMaxInstructionID() { |
| 46 | #define LAST_OTHER_INST(NR) return NR; |
| 47 | #include "llvm/IR/Instruction.def" |
| 48 | } |
| 49 | |
| 50 | class IRToNativeSizeLearning { |
| 51 | public: |
| 52 | enum class NamedFeatureIndex : size_t { |
| 53 | InitialSize, |
| 54 | Blocks, |
| 55 | Calls, |
| 56 | IsLocal, |
| 57 | IsLinkOnceODR, |
| 58 | IsLinkOnce, |
| 59 | Loops, |
| 60 | MaxLoopDepth, |
| 61 | MaxDomTreeLevel, |
| 62 | |
| 63 | NumNamedFeatures |
| 64 | }; |
| 65 | static const size_t NumNamedFeatures = |
| 66 | static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures); |
| 67 | struct FunctionFeatures { |
| 68 | static const size_t FeatureCount; |
| 69 | |
| 70 | std::array<int32_t, NumNamedFeatures> NamedFeatures = {0}; |
| 71 | std::vector<int32_t> InstructionHistogram; |
| 72 | std::vector<int32_t> InstructionPairHistogram; |
| 73 | |
| 74 | void fillTensor(int32_t *Ptr) const; |
| 75 | int32_t &operator[](NamedFeatureIndex Pos) { |
| 76 | return NamedFeatures[static_cast<size_t>(Pos)]; |
| 77 | } |
| 78 | }; |
| 79 | IRToNativeSizeLearning() = default; |
| 80 | |
| 81 | static FunctionFeatures getFunctionFeatures(Function &F, |
| 82 | FunctionAnalysisManager &FAM); |
| 83 | }; |
| 84 | |
| 85 | // This is a point in time - we determined including these pairs of |
| 86 | // consecutive instructions (in the IR layout available at inline time) as |
| 87 | // features improves the model performance. We want to move away from manual |
| 88 | // feature selection. |
| 89 | // The array is given in opcode pairs rather than labels because 1) labels |
| 90 | // weren't readily available, and 2) the successions were hand - extracted. |
| 91 | // |
| 92 | // This array must be sorted. |
| 93 | static const std::array<std::pair<size_t, size_t>, 137> |
| 94 | ImportantInstructionSuccessions{ |
| 95 | {{1, 1}, {1, 4}, {1, 5}, {1, 7}, {1, 8}, {1, 9}, {1, 11}, |
| 96 | {1, 12}, {1, 13}, {1, 14}, {1, 18}, {1, 20}, {1, 22}, {1, 24}, |
| 97 | {1, 25}, {1, 26}, {1, 27}, {1, 28}, {1, 29}, {1, 30}, {1, 31}, |
| 98 | {1, 32}, {1, 33}, {1, 34}, {1, 39}, {1, 40}, {1, 42}, {1, 45}, |
| 99 | {2, 1}, {2, 2}, {2, 13}, {2, 28}, {2, 29}, {2, 32}, {2, 33}, |
| 100 | {2, 34}, {2, 38}, {2, 48}, {2, 49}, {2, 53}, {2, 55}, {2, 56}, |
| 101 | {13, 2}, {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27}, |
| 102 | {28, 2}, {28, 48}, {28, 53}, {29, 2}, {29, 33}, {29, 56}, {31, 31}, |
| 103 | {31, 33}, {31, 34}, {31, 49}, {32, 1}, {32, 2}, {32, 13}, {32, 15}, |
| 104 | {32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40}, |
| 105 | {32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1}, {33, 2}, {33, 32}, |
| 106 | {33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1}, {34, 2}, |
| 107 | {34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34}, |
| 108 | {39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2}, {48, 34}, {48, 56}, |
| 109 | {49, 1}, {49, 2}, {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39}, |
| 110 | {49, 49}, {49, 56}, {53, 1}, {53, 2}, {53, 28}, {53, 34}, {53, 53}, |
| 111 | {53, 57}, {55, 1}, {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56}, |
| 112 | {56, 1}, {56, 2}, {56, 7}, {56, 13}, {56, 32}, {56, 33}, {56, 34}, |
| 113 | {56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57}, |
| 114 | {64, 1}, {64, 64}, {65, 1}, {65, 65}}}; |
| 115 | |
| 116 | // We have: 9 calculated features (the features here); 1 feature for each |
| 117 | // instruction opcode; and 1 feature for each manually-identified sequence. |
| 118 | // For the latter 2, we build a histogram: we count the number of |
| 119 | // occurrences of each instruction opcode or succession of instructions, |
| 120 | // respectively. |
| 121 | // Note that instruction opcodes start from 1. For convenience, we also have an |
| 122 | // always 0 feature for the '0' opcode, hence the extra 1. |
| 123 | const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount = |
| 124 | ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 + |
| 125 | IRToNativeSizeLearning::NumNamedFeatures; |
| 126 | |
| 127 | size_t getSize(Function &F, TargetTransformInfo &TTI) { |
| 128 | size_t Ret = 0; |
| 129 | for (const auto &BB : F) |
| 130 | for (const auto &I : BB) |
| 131 | Ret += TTI.getInstructionCost( |
| 132 | &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize) |
| 133 | .getValue(); |
| 134 | return Ret; |
| 135 | } |
| 136 | |
| 137 | size_t getSize(Function &F, FunctionAnalysisManager &FAM) { |
| 138 | auto &TTI = FAM.getResult<TargetIRAnalysis>(F); |
| 139 | return getSize(F, TTI); |
| 140 | } |
| 141 | |
| 142 | unsigned getMaxDominatorTreeDepth(const Function &F, |
| 143 | const DominatorTree &Tree) { |
| 144 | unsigned Ret = 0; |
| 145 | for (const auto &BB : F) |
| 146 | if (const auto *TN = Tree.getNode(&BB)) |
| 147 | Ret = std::max(Ret, TN->getLevel()); |
| 148 | return Ret; |
| 149 | } |
| 150 | } // namespace |
| 151 | |
| 152 | IRToNativeSizeLearning::FunctionFeatures |
| 153 | IRToNativeSizeLearning::getFunctionFeatures(Function &F, |
| 154 | FunctionAnalysisManager &FAM) { |
| 155 | assert(llvm::is_sorted(ImportantInstructionSuccessions) && |
| 156 | "expected function features are sorted" ); |
| 157 | |
| 158 | auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F); |
| 159 | FunctionFeatures FF; |
| 160 | size_t InstrCount = getMaxInstructionID() + 1; |
| 161 | FF.InstructionHistogram.resize(InstrCount); |
| 162 | |
| 163 | FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size()); |
| 164 | |
| 165 | int StartID = 0; |
| 166 | int LastID = StartID; |
| 167 | auto getPairIndex = [](size_t a, size_t b) { |
| 168 | auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b)); |
| 169 | if (I == ImportantInstructionSuccessions.end()) |
| 170 | return -1; |
| 171 | return static_cast<int>( |
| 172 | std::distance(ImportantInstructionSuccessions.begin(), I)); |
| 173 | }; |
| 174 | |
| 175 | // We don't want debug calls, because they'd just add noise. |
| 176 | for (const auto &BB : F) { |
| 177 | for (const auto &I : BB.instructionsWithoutDebug()) { |
| 178 | auto ID = I.getOpcode(); |
| 179 | |
| 180 | ++FF.InstructionHistogram[ID]; |
| 181 | int PairIndex = getPairIndex(LastID, ID); |
| 182 | if (PairIndex >= 0) |
| 183 | ++FF.InstructionPairHistogram[PairIndex]; |
| 184 | LastID = ID; |
| 185 | if (isa<CallBase>(I)) |
| 186 | ++FF[NamedFeatureIndex::Calls]; |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM); |
| 191 | FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage(); |
| 192 | FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage(); |
| 193 | FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage(); |
| 194 | FF[NamedFeatureIndex::Blocks] = F.size(); |
| 195 | auto &LI = FAM.getResult<LoopAnalysis>(F); |
| 196 | FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end()); |
| 197 | for (auto &L : LI) |
| 198 | FF[NamedFeatureIndex::MaxLoopDepth] = |
| 199 | std::max(FF[NamedFeatureIndex::MaxLoopDepth], |
| 200 | static_cast<int32_t>(L->getLoopDepth())); |
| 201 | FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree); |
| 202 | return FF; |
| 203 | } |
| 204 | |
| 205 | void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const { |
| 206 | std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr); |
| 207 | Ptr += NamedFeatures.size(); |
| 208 | std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr); |
| 209 | Ptr += InstructionHistogram.size(); |
| 210 | std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(), |
| 211 | Ptr); |
| 212 | } |
| 213 | |
| 214 | bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { |
| 215 | return !TFIR2NativeModelPath.empty(); |
| 216 | } |
| 217 | |
| 218 | InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() { |
| 219 | if (!isEvaluatorRequested()) { |
| 220 | return; |
| 221 | } |
| 222 | std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>( |
| 223 | "serving_default_input_1" , |
| 224 | {1, static_cast<int64_t>( |
| 225 | IRToNativeSizeLearning::FunctionFeatures::FeatureCount)})}; |
| 226 | std::vector<TensorSpec> OutputSpecs{ |
| 227 | TensorSpec::createSpec<float>("StatefulPartitionedCall" , {1})}; |
| 228 | Evaluator = std::make_unique<TFModelEvaluator>( |
| 229 | TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs); |
| 230 | if (!Evaluator || !Evaluator->isValid()) { |
| 231 | Evaluator.reset(); |
| 232 | return; |
| 233 | } |
| 234 | } |
| 235 | |
| 236 | InlineSizeEstimatorAnalysis::Result |
| 237 | InlineSizeEstimatorAnalysis::run(const Function &F, |
| 238 | FunctionAnalysisManager &FAM) { |
| 239 | if (!Evaluator) |
| 240 | return std::nullopt; |
| 241 | auto Features = IRToNativeSizeLearning::getFunctionFeatures( |
| 242 | const_cast<Function &>(F), FAM); |
| 243 | int32_t *V = Evaluator->getInput<int32_t>(0); |
| 244 | Features.fillTensor(V); |
| 245 | auto ER = Evaluator->evaluate(); |
| 246 | if (!ER) |
| 247 | return std::nullopt; |
| 248 | float Ret = *ER->getTensorValue<float>(0); |
| 249 | if (Ret < 0.0) |
| 250 | Ret = 0.0; |
| 251 | return static_cast<size_t>(Ret); |
| 252 | } |
| 253 | |
| 254 | InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {} |
| 255 | InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis( |
| 256 | InlineSizeEstimatorAnalysis &&Other) |
| 257 | : Evaluator(std::move(Other.Evaluator)) {} |
| 258 | |
| 259 | #else |
| 260 | namespace llvm { |
| 261 | class TFModelEvaluator {}; |
| 262 | } // namespace llvm |
| 263 | InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() = default; |
| 264 | InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis( |
| 265 | InlineSizeEstimatorAnalysis &&) {} |
| 266 | InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() = default; |
| 267 | InlineSizeEstimatorAnalysis::Result |
| 268 | InlineSizeEstimatorAnalysis::run(const Function &F, |
| 269 | FunctionAnalysisManager &FAM) { |
| 270 | return std::nullopt; |
| 271 | } |
| 272 | bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; } |
| 273 | #endif |
| 274 | |
| 275 | PreservedAnalyses |
| 276 | InlineSizeEstimatorAnalysisPrinterPass::run(Function &F, |
| 277 | FunctionAnalysisManager &AM) { |
| 278 | OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName() |
| 279 | << ": " << AM.getResult<InlineSizeEstimatorAnalysis>(IR&: F) << "\n" ; |
| 280 | return PreservedAnalyses::all(); |
| 281 | } |
| 282 | |