1 | //===- FunctionSpecialization.h - Function Specialization -----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Overview: |
10 | // --------- |
11 | // Function Specialization is a transformation which propagates the constant |
12 | // parameters of a function call from the caller to the callee. It is part of |
13 | // the Inter-Procedural Sparse Conditional Constant Propagation (IPSCCP) pass. |
14 | // The transformation runs iteratively a number of times which is controlled |
15 | // by the option `funcspec-max-iters`. Running it multiple times is needed |
16 | // for specializing recursive functions, but also exposes new opportunities |
17 | // arising from specializations which return constant values or contain calls |
18 | // which can be specialized. |
19 | // |
20 | // Function Specialization supports propagating constant parameters like |
21 | // function pointers, literal constants and addresses of global variables. |
22 | // By propagating function pointers, indirect calls become direct calls. This |
23 | // exposes inlining opportunities which we would have otherwise missed. That's |
24 | // why function specialization is run before the inliner in the optimization |
25 | // pipeline; that is by design. |
26 | // |
27 | // Cost Model: |
28 | // ----------- |
29 | // The cost model facilitates a utility for estimating the specialization bonus |
30 | // from propagating a constant argument. This is the InstCostVisitor, a class |
31 | // that inherits from the InstVisitor. The bonus itself is expressed as codesize |
32 | // and latency savings. Codesize savings means the amount of code that becomes |
33 | // dead in the specialization from propagating the constant, whereas latency |
34 | // savings represents the cycles we are saving from replacing instructions with |
35 | // constant values. The InstCostVisitor overrides a set of `visit*` methods to |
36 | // be able to handle different types of instructions. These attempt to constant- |
37 | // fold the instruction in which case a constant is returned and propagated |
38 | // further. |
39 | // |
40 | // Function pointers are not handled by the InstCostVisitor. They are treated |
41 | // separately as they could expose inlining opportunities via indirect call |
42 | // promotion. The inlining bonus contributes to the total specialization score. |
43 | // |
44 | // For a specialization to be profitable its bonus needs to exceed a minimum |
45 | // threshold. There are three options for controlling the threshold which are |
46 | // expressed as percentages of the original function size: |
47 | // * funcspec-min-codesize-savings |
48 | // * funcspec-min-latency-savings |
49 | // * funcspec-min-inlining-bonus |
50 | // There's also an option for controlling the codesize growth from recursive |
51 | // specializations. That is `funcspec-max-codesize-growth`. |
52 | // |
53 | // Once we have all the potential specializations with their score we need to |
54 | // choose the best ones, which fit in the module specialization budget. That |
55 | // is controlled by the option `funcspec-max-clones`. To find the best `NSpec` |
56 | // specializations we use a max-heap. For more details refer to D139346. |
57 | // |
58 | // Ideas: |
59 | // ------ |
60 | // - With a function specialization attribute for arguments, we could have |
61 | // a direct way to steer function specialization, avoiding the cost-model, |
62 | // and thus control compile-times / code-size. |
63 | // |
64 | // - Perhaps a post-inlining function specialization pass could be more |
65 | // aggressive on literal constants. |
66 | // |
67 | // Limitations: |
68 | // ------------ |
69 | // - We are unable to consider specializations of functions called from indirect |
70 | // callsites whose pointer operand has a lattice value that is known to be |
71 | // constant, either from IPSCCP or previous iterations of FuncSpec. This is |
72 | // because SCCP has not yet replaced the uses of the known constant. |
73 | // |
74 | // References: |
75 | // ----------- |
76 | // 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable |
77 | // it by default?”, https://www.youtube.com/watch?v=zJiCjeXgV5Q |
78 | // |
79 | //===----------------------------------------------------------------------===// |
80 | |
81 | #ifndef LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H |
82 | #define LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H |
83 | |
84 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
85 | #include "llvm/Analysis/CodeMetrics.h" |
86 | #include "llvm/Analysis/InlineCost.h" |
87 | #include "llvm/Analysis/TargetTransformInfo.h" |
88 | #include "llvm/IR/InstVisitor.h" |
89 | #include "llvm/Support/Compiler.h" |
90 | #include "llvm/Transforms/Scalar/SCCP.h" |
91 | #include "llvm/Transforms/Utils/Cloning.h" |
92 | #include "llvm/Transforms/Utils/SCCPSolver.h" |
93 | #include "llvm/Transforms/Utils/SizeOpts.h" |
94 | |
95 | namespace llvm { |
96 | // Map of potential specializations for each function. The FunctionSpecializer |
97 | // keeps the discovered specialisation opportunities for the module in a single |
98 | // vector, where the specialisations of each function form a contiguous range. |
99 | // This map's value is the beginning and the end of that range. |
100 | using SpecMap = DenseMap<Function *, std::pair<unsigned, unsigned>>; |
101 | |
102 | // Just a shorter abbreviation to improve indentation. |
103 | using Cost = InstructionCost; |
104 | |
105 | // Map of known constants found during the specialization bonus estimation. |
106 | using ConstMap = DenseMap<Value *, Constant *>; |
107 | |
108 | // Specialization signature, used to uniquely designate a specialization within |
109 | // a function. |
110 | struct SpecSig { |
111 | // Hashing support, used to distinguish between ordinary, empty, or tombstone |
112 | // keys. |
113 | unsigned Key = 0; |
114 | SmallVector<ArgInfo, 4> Args; |
115 | |
116 | bool operator==(const SpecSig &Other) const { |
117 | if (Key != Other.Key) |
118 | return false; |
119 | return Args == Other.Args; |
120 | } |
121 | |
122 | friend hash_code hash_value(const SpecSig &S) { |
123 | return hash_combine(args: hash_value(value: S.Key), args: hash_combine_range(R: S.Args)); |
124 | } |
125 | }; |
126 | |
127 | // Specialization instance. |
128 | struct Spec { |
129 | // Original function. |
130 | Function *F; |
131 | |
132 | // Cloned function, a specialized version of the original one. |
133 | Function *Clone = nullptr; |
134 | |
135 | // Specialization signature. |
136 | SpecSig Sig; |
137 | |
138 | // Profitability of the specialization. |
139 | unsigned Score; |
140 | |
141 | // Number of instructions in the specialization. |
142 | unsigned CodeSize; |
143 | |
144 | // List of call sites, matching this specialization. |
145 | SmallVector<CallBase *> CallSites; |
146 | |
147 | Spec(Function *F, const SpecSig &S, unsigned Score, unsigned CodeSize) |
148 | : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {} |
149 | Spec(Function *F, const SpecSig &&S, unsigned Score, unsigned CodeSize) |
150 | : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {} |
151 | }; |
152 | |
153 | class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> { |
154 | std::function<BlockFrequencyInfo &(Function &)> GetBFI; |
155 | Function *F; |
156 | const DataLayout &DL; |
157 | TargetTransformInfo &TTI; |
158 | const SCCPSolver &Solver; |
159 | |
160 | ConstMap KnownConstants; |
161 | // Basic blocks known to be unreachable after constant propagation. |
162 | DenseSet<BasicBlock *> DeadBlocks; |
163 | // PHI nodes we have visited before. |
164 | DenseSet<Instruction *> VisitedPHIs; |
165 | // PHI nodes we have visited once without successfully constant folding them. |
166 | // Once the InstCostVisitor has processed all the specialization arguments, |
167 | // it should be possible to determine whether those PHIs can be folded |
168 | // (some of their incoming values may have become constant or dead). |
169 | SmallVector<Instruction *> PendingPHIs; |
170 | |
171 | ConstMap::iterator LastVisited; |
172 | |
173 | public: |
174 | InstCostVisitor(std::function<BlockFrequencyInfo &(Function &)> GetBFI, |
175 | Function *F, const DataLayout &DL, TargetTransformInfo &TTI, |
176 | SCCPSolver &Solver) |
177 | : GetBFI(GetBFI), F(F), DL(DL), TTI(TTI), Solver(Solver) {} |
178 | |
179 | bool isBlockExecutable(BasicBlock *BB) const { |
180 | return Solver.isBlockExecutable(BB) && !DeadBlocks.contains(V: BB); |
181 | } |
182 | |
183 | LLVM_ABI Cost getCodeSizeSavingsForArg(Argument *A, Constant *C); |
184 | |
185 | LLVM_ABI Cost getCodeSizeSavingsFromPendingPHIs(); |
186 | |
187 | LLVM_ABI Cost getLatencySavingsForKnownConstants(); |
188 | |
189 | private: |
190 | friend class InstVisitor<InstCostVisitor, Constant *>; |
191 | |
192 | Constant *findConstantFor(Value *V) const; |
193 | |
194 | bool canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ) const; |
195 | |
196 | Cost getCodeSizeSavingsForUser(Instruction *User, Value *Use = nullptr, |
197 | Constant *C = nullptr); |
198 | |
199 | Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList); |
200 | Cost estimateSwitchInst(SwitchInst &I); |
201 | Cost estimateBranchInst(BranchInst &I); |
202 | |
203 | // Transitively Incoming Values (TIV) is a set of Values that can "feed" a |
204 | // value to the initial PHI-node. It is defined like this: |
205 | // |
206 | // * the initial PHI-node belongs to TIV. |
207 | // |
208 | // * for every PHI-node in TIV, its operands belong to TIV |
209 | // |
210 | // If TIV for the initial PHI-node (P) contains more than one constant or a |
211 | // value that is not a PHI-node, then P cannot be folded to a constant. |
212 | // |
213 | // As soon as we detect these cases, we bail, without constructing the |
214 | // full TIV. |
215 | // Otherwise P can be folded to the one constant in TIV. |
216 | bool discoverTransitivelyIncomingValues(Constant *Const, PHINode *Root, |
217 | DenseSet<PHINode *> &TransitivePHIs); |
218 | |
219 | Constant *visitInstruction(Instruction &I) { return nullptr; } |
220 | Constant *visitPHINode(PHINode &I); |
221 | Constant *visitFreezeInst(FreezeInst &I); |
222 | Constant *visitCallBase(CallBase &I); |
223 | Constant *visitLoadInst(LoadInst &I); |
224 | Constant *visitGetElementPtrInst(GetElementPtrInst &I); |
225 | Constant *visitSelectInst(SelectInst &I); |
226 | Constant *visitCastInst(CastInst &I); |
227 | Constant *visitCmpInst(CmpInst &I); |
228 | Constant *visitUnaryOperator(UnaryOperator &I); |
229 | Constant *visitBinaryOperator(BinaryOperator &I); |
230 | }; |
231 | |
232 | class FunctionSpecializer { |
233 | |
234 | /// The IPSCCP Solver. |
235 | SCCPSolver &Solver; |
236 | |
237 | Module &M; |
238 | |
239 | /// Analysis manager, needed to invalidate analyses. |
240 | FunctionAnalysisManager *FAM; |
241 | |
242 | /// Analyses used to help determine if a function should be specialized. |
243 | std::function<BlockFrequencyInfo &(Function &)> GetBFI; |
244 | std::function<const TargetLibraryInfo &(Function &)> GetTLI; |
245 | std::function<TargetTransformInfo &(Function &)> GetTTI; |
246 | std::function<AssumptionCache &(Function &)> GetAC; |
247 | |
248 | SmallPtrSet<Function *, 32> Specializations; |
249 | SmallPtrSet<Function *, 32> FullySpecialized; |
250 | DenseMap<Function *, CodeMetrics> FunctionMetrics; |
251 | DenseMap<Function *, unsigned> FunctionGrowth; |
252 | unsigned NGlobals = 0; |
253 | |
254 | public: |
255 | FunctionSpecializer( |
256 | SCCPSolver &Solver, Module &M, FunctionAnalysisManager *FAM, |
257 | std::function<BlockFrequencyInfo &(Function &)> GetBFI, |
258 | std::function<const TargetLibraryInfo &(Function &)> GetTLI, |
259 | std::function<TargetTransformInfo &(Function &)> GetTTI, |
260 | std::function<AssumptionCache &(Function &)> GetAC) |
261 | : Solver(Solver), M(M), FAM(FAM), GetBFI(GetBFI), GetTLI(GetTLI), |
262 | GetTTI(GetTTI), GetAC(GetAC) {} |
263 | |
264 | LLVM_ABI ~FunctionSpecializer(); |
265 | |
266 | LLVM_ABI bool run(); |
267 | |
268 | InstCostVisitor getInstCostVisitorFor(Function *F) { |
269 | auto &TTI = GetTTI(*F); |
270 | return InstCostVisitor(GetBFI, F, M.getDataLayout(), TTI, Solver); |
271 | } |
272 | |
273 | private: |
274 | Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call); |
275 | |
276 | /// A constant stack value is an AllocaInst that has a single constant |
277 | /// value stored to it. Return this constant if such an alloca stack value |
278 | /// is a function argument. |
279 | Constant *getConstantStackValue(CallInst *Call, Value *Val); |
280 | |
281 | /// See if there are any new constant values for the callers of \p F via |
282 | /// stack variables and promote them to global variables. |
283 | void promoteConstantStackValues(Function *F); |
284 | |
285 | /// Clean up fully specialized functions. |
286 | void removeDeadFunctions(); |
287 | |
288 | /// Remove any ssa_copy intrinsics that may have been introduced. |
289 | void cleanUpSSA(); |
290 | |
291 | /// @brief Find potential specialization opportunities. |
292 | /// @param F Function to specialize |
293 | /// @param FuncSize Cost of specializing a function. |
294 | /// @param AllSpecs A vector to add potential specializations to. |
295 | /// @param SM A map for a function's specialisation range |
296 | /// @return True, if any potential specializations were found |
297 | bool findSpecializations(Function *F, unsigned FuncSize, |
298 | SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM); |
299 | |
300 | /// Compute the inlining bonus for replacing argument \p A with constant \p C. |
301 | unsigned getInliningBonus(Argument *A, Constant *C); |
302 | |
303 | bool isCandidateFunction(Function *F); |
304 | |
305 | /// @brief Create a specialization of \p F and prime the SCCPSolver |
306 | /// @param F Function to specialize |
307 | /// @param S Which specialization to create |
308 | /// @return The new, cloned function |
309 | Function *createSpecialization(Function *F, const SpecSig &S); |
310 | |
311 | /// Determine if it is possible to specialise the function for constant values |
312 | /// of the formal parameter \p A. |
313 | bool isArgumentInteresting(Argument *A); |
314 | |
315 | /// Check if the value \p V (an actual argument) is a constant or can only |
316 | /// have a constant value. Return that constant. |
317 | Constant *getCandidateConstant(Value *V); |
318 | |
319 | /// @brief Find and update calls to \p F, which match a specialization |
320 | /// @param F Orginal function |
321 | /// @param Begin Start of a range of possibly matching specialisations |
322 | /// @param End End of a range (exclusive) of possibly matching specialisations |
323 | void updateCallSites(Function *F, const Spec *Begin, const Spec *End); |
324 | }; |
325 | } // namespace llvm |
326 | |
327 | #endif // LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H |
328 | |