| 1 | //===- FunctionSpecialization.h - Function Specialization -----------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Overview: |
| 10 | // --------- |
| 11 | // Function Specialization is a transformation which propagates the constant |
| 12 | // parameters of a function call from the caller to the callee. It is part of |
| 13 | // the Inter-Procedural Sparse Conditional Constant Propagation (IPSCCP) pass. |
| 14 | // The transformation runs iteratively a number of times which is controlled |
| 15 | // by the option `funcspec-max-iters`. Running it multiple times is needed |
| 16 | // for specializing recursive functions, but also exposes new opportunities |
| 17 | // arising from specializations which return constant values or contain calls |
| 18 | // which can be specialized. |
| 19 | // |
| 20 | // Function Specialization supports propagating constant parameters like |
| 21 | // function pointers, literal constants and addresses of global variables. |
| 22 | // By propagating function pointers, indirect calls become direct calls. This |
| 23 | // exposes inlining opportunities which we would have otherwise missed. That's |
| 24 | // why function specialization is run before the inliner in the optimization |
| 25 | // pipeline; that is by design. |
| 26 | // |
| 27 | // Cost Model: |
| 28 | // ----------- |
| 29 | // The cost model facilitates a utility for estimating the specialization bonus |
| 30 | // from propagating a constant argument. This is the InstCostVisitor, a class |
| 31 | // that inherits from the InstVisitor. The bonus itself is expressed as codesize |
| 32 | // and latency savings. Codesize savings means the amount of code that becomes |
| 33 | // dead in the specialization from propagating the constant, whereas latency |
| 34 | // savings represents the cycles we are saving from replacing instructions with |
| 35 | // constant values. The InstCostVisitor overrides a set of `visit*` methods to |
| 36 | // be able to handle different types of instructions. These attempt to constant- |
| 37 | // fold the instruction in which case a constant is returned and propagated |
| 38 | // further. |
| 39 | // |
| 40 | // Function pointers are not handled by the InstCostVisitor. They are treated |
| 41 | // separately as they could expose inlining opportunities via indirect call |
| 42 | // promotion. The inlining bonus contributes to the total specialization score. |
| 43 | // |
| 44 | // For a specialization to be profitable its bonus needs to exceed a minimum |
| 45 | // threshold. There are three options for controlling the threshold which are |
| 46 | // expressed as percentages of the original function size: |
| 47 | // * funcspec-min-codesize-savings |
| 48 | // * funcspec-min-latency-savings |
| 49 | // * funcspec-min-inlining-bonus |
| 50 | // There's also an option for controlling the codesize growth from recursive |
| 51 | // specializations. That is `funcspec-max-codesize-growth`. |
| 52 | // |
| 53 | // Once we have all the potential specializations with their score we need to |
| 54 | // choose the best ones, which fit in the module specialization budget. That |
| 55 | // is controlled by the option `funcspec-max-clones`. To find the best `NSpec` |
| 56 | // specializations we use a max-heap. For more details refer to D139346. |
| 57 | // |
| 58 | // Ideas: |
| 59 | // ------ |
| 60 | // - With a function specialization attribute for arguments, we could have |
| 61 | // a direct way to steer function specialization, avoiding the cost-model, |
| 62 | // and thus control compile-times / code-size. |
| 63 | // |
| 64 | // - Perhaps a post-inlining function specialization pass could be more |
| 65 | // aggressive on literal constants. |
| 66 | // |
| 67 | // Limitations: |
| 68 | // ------------ |
| 69 | // - We are unable to consider specializations of functions called from indirect |
| 70 | // callsites whose pointer operand has a lattice value that is known to be |
| 71 | // constant, either from IPSCCP or previous iterations of FuncSpec. This is |
| 72 | // because SCCP has not yet replaced the uses of the known constant. |
| 73 | // |
| 74 | // References: |
| 75 | // ----------- |
| 76 | // 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable |
| 77 | // it by default?”, https://www.youtube.com/watch?v=zJiCjeXgV5Q |
| 78 | // |
| 79 | //===----------------------------------------------------------------------===// |
| 80 | |
| 81 | #ifndef LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H |
| 82 | #define LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H |
| 83 | |
| 84 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
| 85 | #include "llvm/Analysis/CodeMetrics.h" |
| 86 | #include "llvm/Analysis/InlineCost.h" |
| 87 | #include "llvm/Analysis/TargetTransformInfo.h" |
| 88 | #include "llvm/IR/InstVisitor.h" |
| 89 | #include "llvm/Support/Compiler.h" |
| 90 | #include "llvm/Transforms/Scalar/SCCP.h" |
| 91 | #include "llvm/Transforms/Utils/Cloning.h" |
| 92 | #include "llvm/Transforms/Utils/SCCPSolver.h" |
| 93 | #include "llvm/Transforms/Utils/SizeOpts.h" |
| 94 | |
| 95 | namespace llvm { |
| 96 | // Map of potential specializations for each function. The FunctionSpecializer |
| 97 | // keeps the discovered specialisation opportunities for the module in a single |
| 98 | // vector, where the specialisations of each function form a contiguous range. |
| 99 | // This map's value is the beginning and the end of that range. |
| 100 | using SpecMap = DenseMap<Function *, std::pair<unsigned, unsigned>>; |
| 101 | |
| 102 | // Just a shorter abbreviation to improve indentation. |
| 103 | using Cost = InstructionCost; |
| 104 | |
| 105 | // Map of known constants found during the specialization bonus estimation. |
| 106 | using ConstMap = DenseMap<Value *, Constant *>; |
| 107 | |
| 108 | // Specialization signature, used to uniquely designate a specialization within |
| 109 | // a function. |
| 110 | struct SpecSig { |
| 111 | // Hashing support, used to distinguish between ordinary, empty, or tombstone |
| 112 | // keys. |
| 113 | unsigned Key = 0; |
| 114 | SmallVector<ArgInfo, 4> Args; |
| 115 | |
| 116 | bool operator==(const SpecSig &Other) const { |
| 117 | if (Key != Other.Key) |
| 118 | return false; |
| 119 | return Args == Other.Args; |
| 120 | } |
| 121 | |
| 122 | friend hash_code hash_value(const SpecSig &S) { |
| 123 | return hash_combine(args: hash_value(value: S.Key), args: hash_combine_range(R: S.Args)); |
| 124 | } |
| 125 | }; |
| 126 | |
| 127 | // Specialization instance. |
| 128 | struct Spec { |
| 129 | // Original function. |
| 130 | Function *F; |
| 131 | |
| 132 | // Cloned function, a specialized version of the original one. |
| 133 | Function *Clone = nullptr; |
| 134 | |
| 135 | // Specialization signature. |
| 136 | SpecSig Sig; |
| 137 | |
| 138 | // Profitability of the specialization. |
| 139 | unsigned Score; |
| 140 | |
| 141 | // Number of instructions in the specialization. |
| 142 | unsigned CodeSize; |
| 143 | |
| 144 | // List of call sites, matching this specialization. |
| 145 | SmallVector<CallBase *> CallSites; |
| 146 | |
| 147 | Spec(Function *F, const SpecSig &S, unsigned Score, unsigned CodeSize) |
| 148 | : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {} |
| 149 | Spec(Function *F, const SpecSig &&S, unsigned Score, unsigned CodeSize) |
| 150 | : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {} |
| 151 | }; |
| 152 | |
| 153 | class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> { |
| 154 | std::function<BlockFrequencyInfo &(Function &)> GetBFI; |
| 155 | Function *F; |
| 156 | const DataLayout &DL; |
| 157 | TargetTransformInfo &TTI; |
| 158 | const SCCPSolver &Solver; |
| 159 | |
| 160 | ConstMap KnownConstants; |
| 161 | // Basic blocks known to be unreachable after constant propagation. |
| 162 | DenseSet<BasicBlock *> DeadBlocks; |
| 163 | // PHI nodes we have visited before. |
| 164 | DenseSet<Instruction *> VisitedPHIs; |
| 165 | // PHI nodes we have visited once without successfully constant folding them. |
| 166 | // Once the InstCostVisitor has processed all the specialization arguments, |
| 167 | // it should be possible to determine whether those PHIs can be folded |
| 168 | // (some of their incoming values may have become constant or dead). |
| 169 | SmallVector<Instruction *> PendingPHIs; |
| 170 | |
| 171 | ConstMap::iterator LastVisited; |
| 172 | |
| 173 | public: |
| 174 | InstCostVisitor(std::function<BlockFrequencyInfo &(Function &)> GetBFI, |
| 175 | Function *F, const DataLayout &DL, TargetTransformInfo &TTI, |
| 176 | SCCPSolver &Solver) |
| 177 | : GetBFI(GetBFI), F(F), DL(DL), TTI(TTI), Solver(Solver) {} |
| 178 | |
| 179 | bool isBlockExecutable(BasicBlock *BB) const { |
| 180 | return Solver.isBlockExecutable(BB) && !DeadBlocks.contains(V: BB); |
| 181 | } |
| 182 | |
| 183 | LLVM_ABI Cost getCodeSizeSavingsForArg(Argument *A, Constant *C); |
| 184 | |
| 185 | LLVM_ABI Cost getCodeSizeSavingsFromPendingPHIs(); |
| 186 | |
| 187 | LLVM_ABI Cost getLatencySavingsForKnownConstants(); |
| 188 | |
| 189 | private: |
| 190 | friend class InstVisitor<InstCostVisitor, Constant *>; |
| 191 | |
| 192 | Constant *findConstantFor(Value *V) const; |
| 193 | |
| 194 | bool canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ) const; |
| 195 | |
| 196 | Cost getCodeSizeSavingsForUser(Instruction *User, Value *Use = nullptr, |
| 197 | Constant *C = nullptr); |
| 198 | |
| 199 | Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList); |
| 200 | Cost estimateSwitchInst(SwitchInst &I); |
| 201 | Cost estimateBranchInst(BranchInst &I); |
| 202 | |
| 203 | // Transitively Incoming Values (TIV) is a set of Values that can "feed" a |
| 204 | // value to the initial PHI-node. It is defined like this: |
| 205 | // |
| 206 | // * the initial PHI-node belongs to TIV. |
| 207 | // |
| 208 | // * for every PHI-node in TIV, its operands belong to TIV |
| 209 | // |
| 210 | // If TIV for the initial PHI-node (P) contains more than one constant or a |
| 211 | // value that is not a PHI-node, then P cannot be folded to a constant. |
| 212 | // |
| 213 | // As soon as we detect these cases, we bail, without constructing the |
| 214 | // full TIV. |
| 215 | // Otherwise P can be folded to the one constant in TIV. |
| 216 | bool discoverTransitivelyIncomingValues(Constant *Const, PHINode *Root, |
| 217 | DenseSet<PHINode *> &TransitivePHIs); |
| 218 | |
| 219 | Constant *visitInstruction(Instruction &I) { return nullptr; } |
| 220 | Constant *visitPHINode(PHINode &I); |
| 221 | Constant *visitFreezeInst(FreezeInst &I); |
| 222 | Constant *visitCallBase(CallBase &I); |
| 223 | Constant *visitLoadInst(LoadInst &I); |
| 224 | Constant *visitGetElementPtrInst(GetElementPtrInst &I); |
| 225 | Constant *visitSelectInst(SelectInst &I); |
| 226 | Constant *visitCastInst(CastInst &I); |
| 227 | Constant *visitCmpInst(CmpInst &I); |
| 228 | Constant *visitUnaryOperator(UnaryOperator &I); |
| 229 | Constant *visitBinaryOperator(BinaryOperator &I); |
| 230 | }; |
| 231 | |
| 232 | class FunctionSpecializer { |
| 233 | |
| 234 | /// The IPSCCP Solver. |
| 235 | SCCPSolver &Solver; |
| 236 | |
| 237 | Module &M; |
| 238 | |
| 239 | /// Analysis manager, needed to invalidate analyses. |
| 240 | FunctionAnalysisManager *FAM; |
| 241 | |
| 242 | /// Analyses used to help determine if a function should be specialized. |
| 243 | std::function<BlockFrequencyInfo &(Function &)> GetBFI; |
| 244 | std::function<const TargetLibraryInfo &(Function &)> GetTLI; |
| 245 | std::function<TargetTransformInfo &(Function &)> GetTTI; |
| 246 | std::function<AssumptionCache &(Function &)> GetAC; |
| 247 | |
| 248 | SmallPtrSet<Function *, 32> Specializations; |
| 249 | SmallPtrSet<Function *, 32> FullySpecialized; |
| 250 | DenseMap<Function *, CodeMetrics> FunctionMetrics; |
| 251 | DenseMap<Function *, unsigned> FunctionGrowth; |
| 252 | unsigned NGlobals = 0; |
| 253 | |
| 254 | public: |
| 255 | FunctionSpecializer( |
| 256 | SCCPSolver &Solver, Module &M, FunctionAnalysisManager *FAM, |
| 257 | std::function<BlockFrequencyInfo &(Function &)> GetBFI, |
| 258 | std::function<const TargetLibraryInfo &(Function &)> GetTLI, |
| 259 | std::function<TargetTransformInfo &(Function &)> GetTTI, |
| 260 | std::function<AssumptionCache &(Function &)> GetAC) |
| 261 | : Solver(Solver), M(M), FAM(FAM), GetBFI(GetBFI), GetTLI(GetTLI), |
| 262 | GetTTI(GetTTI), GetAC(GetAC) {} |
| 263 | |
| 264 | LLVM_ABI ~FunctionSpecializer(); |
| 265 | |
| 266 | LLVM_ABI bool run(); |
| 267 | |
| 268 | InstCostVisitor getInstCostVisitorFor(Function *F) { |
| 269 | auto &TTI = GetTTI(*F); |
| 270 | return InstCostVisitor(GetBFI, F, M.getDataLayout(), TTI, Solver); |
| 271 | } |
| 272 | |
| 273 | private: |
| 274 | Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call); |
| 275 | |
| 276 | /// A constant stack value is an AllocaInst that has a single constant |
| 277 | /// value stored to it. Return this constant if such an alloca stack value |
| 278 | /// is a function argument. |
| 279 | Constant *getConstantStackValue(CallInst *Call, Value *Val); |
| 280 | |
| 281 | /// See if there are any new constant values for the callers of \p F via |
| 282 | /// stack variables and promote them to global variables. |
| 283 | void promoteConstantStackValues(Function *F); |
| 284 | |
| 285 | /// Clean up fully specialized functions. |
| 286 | void removeDeadFunctions(); |
| 287 | |
| 288 | /// Remove any ssa_copy intrinsics that may have been introduced. |
| 289 | void cleanUpSSA(); |
| 290 | |
| 291 | /// @brief Find potential specialization opportunities. |
| 292 | /// @param F Function to specialize |
| 293 | /// @param FuncSize Cost of specializing a function. |
| 294 | /// @param AllSpecs A vector to add potential specializations to. |
| 295 | /// @param SM A map for a function's specialisation range |
| 296 | /// @return True, if any potential specializations were found |
| 297 | bool findSpecializations(Function *F, unsigned FuncSize, |
| 298 | SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM); |
| 299 | |
| 300 | /// Compute the inlining bonus for replacing argument \p A with constant \p C. |
| 301 | unsigned getInliningBonus(Argument *A, Constant *C); |
| 302 | |
| 303 | bool isCandidateFunction(Function *F); |
| 304 | |
| 305 | /// @brief Create a specialization of \p F and prime the SCCPSolver |
| 306 | /// @param F Function to specialize |
| 307 | /// @param S Which specialization to create |
| 308 | /// @return The new, cloned function |
| 309 | Function *createSpecialization(Function *F, const SpecSig &S); |
| 310 | |
| 311 | /// Determine if it is possible to specialise the function for constant values |
| 312 | /// of the formal parameter \p A. |
| 313 | bool isArgumentInteresting(Argument *A); |
| 314 | |
| 315 | /// Check if the value \p V (an actual argument) is a constant or can only |
| 316 | /// have a constant value. Return that constant. |
| 317 | Constant *getCandidateConstant(Value *V); |
| 318 | |
| 319 | /// @brief Find and update calls to \p F, which match a specialization |
| 320 | /// @param F Orginal function |
| 321 | /// @param Begin Start of a range of possibly matching specialisations |
| 322 | /// @param End End of a range (exclusive) of possibly matching specialisations |
| 323 | void updateCallSites(Function *F, const Spec *Begin, const Spec *End); |
| 324 | }; |
| 325 | } // namespace llvm |
| 326 | |
| 327 | #endif // LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H |
| 328 | |