1 | //===- FunctionSpecialization.cpp - Function Specialization ---------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/Transforms/IPO/FunctionSpecialization.h" |
10 | #include "llvm/ADT/Statistic.h" |
11 | #include "llvm/Analysis/CodeMetrics.h" |
12 | #include "llvm/Analysis/ConstantFolding.h" |
13 | #include "llvm/Analysis/InlineCost.h" |
14 | #include "llvm/Analysis/InstructionSimplify.h" |
15 | #include "llvm/Analysis/TargetTransformInfo.h" |
16 | #include "llvm/Analysis/ValueLattice.h" |
17 | #include "llvm/Analysis/ValueLatticeUtils.h" |
18 | #include "llvm/Analysis/ValueTracking.h" |
19 | #include "llvm/IR/IntrinsicInst.h" |
20 | #include "llvm/Transforms/Scalar/SCCP.h" |
21 | #include "llvm/Transforms/Utils/Cloning.h" |
22 | #include "llvm/Transforms/Utils/SCCPSolver.h" |
23 | #include "llvm/Transforms/Utils/SizeOpts.h" |
24 | #include <cmath> |
25 | |
26 | using namespace llvm; |
27 | |
28 | #define DEBUG_TYPE "function-specialization" |
29 | |
30 | STATISTIC(NumSpecsCreated, "Number of specializations created" ); |
31 | |
32 | static cl::opt<bool> ForceSpecialization( |
33 | "force-specialization" , cl::init(Val: false), cl::Hidden, cl::desc( |
34 | "Force function specialization for every call site with a constant " |
35 | "argument" )); |
36 | |
37 | static cl::opt<unsigned> MaxClones( |
38 | "funcspec-max-clones" , cl::init(Val: 3), cl::Hidden, cl::desc( |
39 | "The maximum number of clones allowed for a single function " |
40 | "specialization" )); |
41 | |
42 | static cl::opt<unsigned> |
43 | MaxDiscoveryIterations("funcspec-max-discovery-iterations" , cl::init(Val: 100), |
44 | cl::Hidden, |
45 | cl::desc("The maximum number of iterations allowed " |
46 | "when searching for transitive " |
47 | "phis" )); |
48 | |
49 | static cl::opt<unsigned> MaxIncomingPhiValues( |
50 | "funcspec-max-incoming-phi-values" , cl::init(Val: 8), cl::Hidden, |
51 | cl::desc("The maximum number of incoming values a PHI node can have to be " |
52 | "considered during the specialization bonus estimation" )); |
53 | |
54 | static cl::opt<unsigned> MaxBlockPredecessors( |
55 | "funcspec-max-block-predecessors" , cl::init(Val: 2), cl::Hidden, cl::desc( |
56 | "The maximum number of predecessors a basic block can have to be " |
57 | "considered during the estimation of dead code" )); |
58 | |
59 | static cl::opt<unsigned> MinFunctionSize( |
60 | "funcspec-min-function-size" , cl::init(Val: 300), cl::Hidden, cl::desc( |
61 | "Don't specialize functions that have less than this number of " |
62 | "instructions" )); |
63 | |
64 | static cl::opt<unsigned> MaxCodeSizeGrowth( |
65 | "funcspec-max-codesize-growth" , cl::init(Val: 3), cl::Hidden, cl::desc( |
66 | "Maximum codesize growth allowed per function" )); |
67 | |
68 | static cl::opt<unsigned> MinCodeSizeSavings( |
69 | "funcspec-min-codesize-savings" , cl::init(Val: 20), cl::Hidden, cl::desc( |
70 | "Reject specializations whose codesize savings are less than this" |
71 | "much percent of the original function size" )); |
72 | |
73 | static cl::opt<unsigned> MinLatencySavings( |
74 | "funcspec-min-latency-savings" , cl::init(Val: 40), cl::Hidden, |
75 | cl::desc("Reject specializations whose latency savings are less than this" |
76 | "much percent of the original function size" )); |
77 | |
78 | static cl::opt<unsigned> MinInliningBonus( |
79 | "funcspec-min-inlining-bonus" , cl::init(Val: 300), cl::Hidden, cl::desc( |
80 | "Reject specializations whose inlining bonus is less than this" |
81 | "much percent of the original function size" )); |
82 | |
83 | static cl::opt<bool> SpecializeOnAddress( |
84 | "funcspec-on-address" , cl::init(Val: false), cl::Hidden, cl::desc( |
85 | "Enable function specialization on the address of global values" )); |
86 | |
87 | // Disabled by default as it can significantly increase compilation times. |
88 | // |
89 | // https://llvm-compile-time-tracker.com |
90 | // https://github.com/nikic/llvm-compile-time-tracker |
91 | static cl::opt<bool> SpecializeLiteralConstant( |
92 | "funcspec-for-literal-constant" , cl::init(Val: false), cl::Hidden, cl::desc( |
93 | "Enable specialization of functions that take a literal constant as an " |
94 | "argument" )); |
95 | |
96 | bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ, |
97 | DenseSet<BasicBlock *> &DeadBlocks) { |
98 | unsigned I = 0; |
99 | return all_of(Range: predecessors(BB: Succ), |
100 | P: [&I, BB, Succ, &DeadBlocks] (BasicBlock *Pred) { |
101 | return I++ < MaxBlockPredecessors && |
102 | (Pred == BB || Pred == Succ || DeadBlocks.contains(V: Pred)); |
103 | }); |
104 | } |
105 | |
106 | // Estimates the codesize savings due to dead code after constant propagation. |
107 | // \p WorkList represents the basic blocks of a specialization which will |
108 | // eventually become dead once we replace instructions that are known to be |
109 | // constants. The successors of such blocks are added to the list as long as |
110 | // the \p Solver found they were executable prior to specialization, and only |
111 | // if all their predecessors are dead. |
112 | Cost InstCostVisitor::estimateBasicBlocks( |
113 | SmallVectorImpl<BasicBlock *> &WorkList) { |
114 | Cost CodeSize = 0; |
115 | // Accumulate the instruction cost of each basic block weighted by frequency. |
116 | while (!WorkList.empty()) { |
117 | BasicBlock *BB = WorkList.pop_back_val(); |
118 | |
119 | // These blocks are considered dead as far as the InstCostVisitor |
120 | // is concerned. They haven't been proven dead yet by the Solver, |
121 | // but may become if we propagate the specialization arguments. |
122 | if (!DeadBlocks.insert(V: BB).second) |
123 | continue; |
124 | |
125 | for (Instruction &I : *BB) { |
126 | // Disregard SSA copies. |
127 | if (auto *II = dyn_cast<IntrinsicInst>(Val: &I)) |
128 | if (II->getIntrinsicID() == Intrinsic::ssa_copy) |
129 | continue; |
130 | // If it's a known constant we have already accounted for it. |
131 | if (KnownConstants.contains(Val: &I)) |
132 | continue; |
133 | |
134 | Cost C = TTI.getInstructionCost(U: &I, CostKind: TargetTransformInfo::TCK_CodeSize); |
135 | |
136 | LLVM_DEBUG(dbgs() << "FnSpecialization: CodeSize " << C |
137 | << " for user " << I << "\n" ); |
138 | CodeSize += C; |
139 | } |
140 | |
141 | // Keep adding dead successors to the list as long as they are |
142 | // executable and only reachable from dead blocks. |
143 | for (BasicBlock *SuccBB : successors(BB)) |
144 | if (isBlockExecutable(BB: SuccBB) && |
145 | canEliminateSuccessor(BB, Succ: SuccBB, DeadBlocks)) |
146 | WorkList.push_back(Elt: SuccBB); |
147 | } |
148 | return CodeSize; |
149 | } |
150 | |
151 | static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) { |
152 | if (auto *C = dyn_cast<Constant>(Val: V)) |
153 | return C; |
154 | return KnownConstants.lookup(Val: V); |
155 | } |
156 | |
157 | Bonus InstCostVisitor::getBonusFromPendingPHIs() { |
158 | Bonus B; |
159 | while (!PendingPHIs.empty()) { |
160 | Instruction *Phi = PendingPHIs.pop_back_val(); |
161 | // The pending PHIs could have been proven dead by now. |
162 | if (isBlockExecutable(BB: Phi->getParent())) |
163 | B += getUserBonus(User: Phi); |
164 | } |
165 | return B; |
166 | } |
167 | |
168 | /// Compute a bonus for replacing argument \p A with constant \p C. |
169 | Bonus InstCostVisitor::getSpecializationBonus(Argument *A, Constant *C) { |
170 | LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: " |
171 | << C->getNameOrAsOperand() << "\n" ); |
172 | Bonus B; |
173 | for (auto *U : A->users()) |
174 | if (auto *UI = dyn_cast<Instruction>(Val: U)) |
175 | if (isBlockExecutable(BB: UI->getParent())) |
176 | B += getUserBonus(User: UI, Use: A, C); |
177 | |
178 | LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated bonus {CodeSize = " |
179 | << B.CodeSize << ", Latency = " << B.Latency |
180 | << "} for argument " << *A << "\n" ); |
181 | return B; |
182 | } |
183 | |
184 | Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) { |
185 | // We have already propagated a constant for this user. |
186 | if (KnownConstants.contains(Val: User)) |
187 | return {0, 0}; |
188 | |
189 | // Cache the iterator before visiting. |
190 | LastVisited = Use ? KnownConstants.insert(KV: {Use, C}).first |
191 | : KnownConstants.end(); |
192 | |
193 | Cost CodeSize = 0; |
194 | if (auto *I = dyn_cast<SwitchInst>(Val: User)) { |
195 | CodeSize = estimateSwitchInst(I&: *I); |
196 | } else if (auto *I = dyn_cast<BranchInst>(Val: User)) { |
197 | CodeSize = estimateBranchInst(I&: *I); |
198 | } else { |
199 | C = visit(I&: *User); |
200 | if (!C) |
201 | return {0, 0}; |
202 | } |
203 | |
204 | // Even though it doesn't make sense to bind switch and branch instructions |
205 | // with a constant, unlike any other instruction type, it prevents estimating |
206 | // their bonus multiple times. |
207 | KnownConstants.insert(KV: {User, C}); |
208 | |
209 | CodeSize += TTI.getInstructionCost(U: User, CostKind: TargetTransformInfo::TCK_CodeSize); |
210 | |
211 | uint64_t Weight = BFI.getBlockFreq(BB: User->getParent()).getFrequency() / |
212 | BFI.getEntryFreq().getFrequency(); |
213 | |
214 | Cost Latency = Weight * |
215 | TTI.getInstructionCost(U: User, CostKind: TargetTransformInfo::TCK_Latency); |
216 | |
217 | LLVM_DEBUG(dbgs() << "FnSpecialization: {CodeSize = " << CodeSize |
218 | << ", Latency = " << Latency << "} for user " |
219 | << *User << "\n" ); |
220 | |
221 | Bonus B(CodeSize, Latency); |
222 | for (auto *U : User->users()) |
223 | if (auto *UI = dyn_cast<Instruction>(Val: U)) |
224 | if (UI != User && isBlockExecutable(BB: UI->getParent())) |
225 | B += getUserBonus(User: UI, Use: User, C); |
226 | |
227 | return B; |
228 | } |
229 | |
230 | Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) { |
231 | assert(LastVisited != KnownConstants.end() && "Invalid iterator!" ); |
232 | |
233 | if (I.getCondition() != LastVisited->first) |
234 | return 0; |
235 | |
236 | auto *C = dyn_cast<ConstantInt>(Val: LastVisited->second); |
237 | if (!C) |
238 | return 0; |
239 | |
240 | BasicBlock *Succ = I.findCaseValue(C)->getCaseSuccessor(); |
241 | // Initialize the worklist with the dead basic blocks. These are the |
242 | // destination labels which are different from the one corresponding |
243 | // to \p C. They should be executable and have a unique predecessor. |
244 | SmallVector<BasicBlock *> WorkList; |
245 | for (const auto &Case : I.cases()) { |
246 | BasicBlock *BB = Case.getCaseSuccessor(); |
247 | if (BB != Succ && isBlockExecutable(BB) && |
248 | canEliminateSuccessor(BB: I.getParent(), Succ: BB, DeadBlocks)) |
249 | WorkList.push_back(Elt: BB); |
250 | } |
251 | |
252 | return estimateBasicBlocks(WorkList); |
253 | } |
254 | |
255 | Cost InstCostVisitor::estimateBranchInst(BranchInst &I) { |
256 | assert(LastVisited != KnownConstants.end() && "Invalid iterator!" ); |
257 | |
258 | if (I.getCondition() != LastVisited->first) |
259 | return 0; |
260 | |
261 | BasicBlock *Succ = I.getSuccessor(i: LastVisited->second->isOneValue()); |
262 | // Initialize the worklist with the dead successor as long as |
263 | // it is executable and has a unique predecessor. |
264 | SmallVector<BasicBlock *> WorkList; |
265 | if (isBlockExecutable(BB: Succ) && |
266 | canEliminateSuccessor(BB: I.getParent(), Succ, DeadBlocks)) |
267 | WorkList.push_back(Elt: Succ); |
268 | |
269 | return estimateBasicBlocks(WorkList); |
270 | } |
271 | |
272 | bool InstCostVisitor::discoverTransitivelyIncomingValues( |
273 | Constant *Const, PHINode *Root, DenseSet<PHINode *> &TransitivePHIs) { |
274 | |
275 | SmallVector<PHINode *, 64> WorkList; |
276 | WorkList.push_back(Elt: Root); |
277 | unsigned Iter = 0; |
278 | |
279 | while (!WorkList.empty()) { |
280 | PHINode *PN = WorkList.pop_back_val(); |
281 | |
282 | if (++Iter > MaxDiscoveryIterations || |
283 | PN->getNumIncomingValues() > MaxIncomingPhiValues) |
284 | return false; |
285 | |
286 | if (!TransitivePHIs.insert(V: PN).second) |
287 | continue; |
288 | |
289 | for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { |
290 | Value *V = PN->getIncomingValue(i: I); |
291 | |
292 | // Disregard self-references and dead incoming values. |
293 | if (auto *Inst = dyn_cast<Instruction>(Val: V)) |
294 | if (Inst == PN || DeadBlocks.contains(V: PN->getIncomingBlock(i: I))) |
295 | continue; |
296 | |
297 | if (Constant *C = findConstantFor(V, KnownConstants)) { |
298 | // Not all incoming values are the same constant. Bail immediately. |
299 | if (C != Const) |
300 | return false; |
301 | continue; |
302 | } |
303 | |
304 | if (auto *Phi = dyn_cast<PHINode>(Val: V)) { |
305 | WorkList.push_back(Elt: Phi); |
306 | continue; |
307 | } |
308 | |
309 | // We can't reason about anything else. |
310 | return false; |
311 | } |
312 | } |
313 | return true; |
314 | } |
315 | |
316 | Constant *InstCostVisitor::visitPHINode(PHINode &I) { |
317 | if (I.getNumIncomingValues() > MaxIncomingPhiValues) |
318 | return nullptr; |
319 | |
320 | bool Inserted = VisitedPHIs.insert(V: &I).second; |
321 | Constant *Const = nullptr; |
322 | bool HaveSeenIncomingPHI = false; |
323 | |
324 | for (unsigned Idx = 0, E = I.getNumIncomingValues(); Idx != E; ++Idx) { |
325 | Value *V = I.getIncomingValue(i: Idx); |
326 | |
327 | // Disregard self-references and dead incoming values. |
328 | if (auto *Inst = dyn_cast<Instruction>(Val: V)) |
329 | if (Inst == &I || DeadBlocks.contains(V: I.getIncomingBlock(i: Idx))) |
330 | continue; |
331 | |
332 | if (Constant *C = findConstantFor(V, KnownConstants)) { |
333 | if (!Const) |
334 | Const = C; |
335 | // Not all incoming values are the same constant. Bail immediately. |
336 | if (C != Const) |
337 | return nullptr; |
338 | continue; |
339 | } |
340 | |
341 | if (Inserted) { |
342 | // First time we are seeing this phi. We will retry later, after |
343 | // all the constant arguments have been propagated. Bail for now. |
344 | PendingPHIs.push_back(Elt: &I); |
345 | return nullptr; |
346 | } |
347 | |
348 | if (isa<PHINode>(Val: V)) { |
349 | // Perhaps it is a Transitive Phi. We will confirm later. |
350 | HaveSeenIncomingPHI = true; |
351 | continue; |
352 | } |
353 | |
354 | // We can't reason about anything else. |
355 | return nullptr; |
356 | } |
357 | |
358 | if (!Const) |
359 | return nullptr; |
360 | |
361 | if (!HaveSeenIncomingPHI) |
362 | return Const; |
363 | |
364 | DenseSet<PHINode *> TransitivePHIs; |
365 | if (!discoverTransitivelyIncomingValues(Const, Root: &I, TransitivePHIs)) |
366 | return nullptr; |
367 | |
368 | return Const; |
369 | } |
370 | |
371 | Constant *InstCostVisitor::visitFreezeInst(FreezeInst &I) { |
372 | assert(LastVisited != KnownConstants.end() && "Invalid iterator!" ); |
373 | |
374 | if (isGuaranteedNotToBeUndefOrPoison(V: LastVisited->second)) |
375 | return LastVisited->second; |
376 | return nullptr; |
377 | } |
378 | |
379 | Constant *InstCostVisitor::visitCallBase(CallBase &I) { |
380 | Function *F = I.getCalledFunction(); |
381 | if (!F || !canConstantFoldCallTo(Call: &I, F)) |
382 | return nullptr; |
383 | |
384 | SmallVector<Constant *, 8> Operands; |
385 | Operands.reserve(N: I.getNumOperands()); |
386 | |
387 | for (unsigned Idx = 0, E = I.getNumOperands() - 1; Idx != E; ++Idx) { |
388 | Value *V = I.getOperand(i_nocapture: Idx); |
389 | Constant *C = findConstantFor(V, KnownConstants); |
390 | if (!C) |
391 | return nullptr; |
392 | Operands.push_back(Elt: C); |
393 | } |
394 | |
395 | auto Ops = ArrayRef(Operands.begin(), Operands.end()); |
396 | return ConstantFoldCall(Call: &I, F, Operands: Ops); |
397 | } |
398 | |
399 | Constant *InstCostVisitor::visitLoadInst(LoadInst &I) { |
400 | assert(LastVisited != KnownConstants.end() && "Invalid iterator!" ); |
401 | |
402 | if (isa<ConstantPointerNull>(Val: LastVisited->second)) |
403 | return nullptr; |
404 | return ConstantFoldLoadFromConstPtr(C: LastVisited->second, Ty: I.getType(), DL); |
405 | } |
406 | |
407 | Constant *InstCostVisitor::visitGetElementPtrInst(GetElementPtrInst &I) { |
408 | SmallVector<Constant *, 8> Operands; |
409 | Operands.reserve(N: I.getNumOperands()); |
410 | |
411 | for (unsigned Idx = 0, E = I.getNumOperands(); Idx != E; ++Idx) { |
412 | Value *V = I.getOperand(i_nocapture: Idx); |
413 | Constant *C = findConstantFor(V, KnownConstants); |
414 | if (!C) |
415 | return nullptr; |
416 | Operands.push_back(Elt: C); |
417 | } |
418 | |
419 | auto Ops = ArrayRef(Operands.begin(), Operands.end()); |
420 | return ConstantFoldInstOperands(I: &I, Ops, DL); |
421 | } |
422 | |
423 | Constant *InstCostVisitor::visitSelectInst(SelectInst &I) { |
424 | assert(LastVisited != KnownConstants.end() && "Invalid iterator!" ); |
425 | |
426 | if (I.getCondition() != LastVisited->first) |
427 | return nullptr; |
428 | |
429 | Value *V = LastVisited->second->isZeroValue() ? I.getFalseValue() |
430 | : I.getTrueValue(); |
431 | Constant *C = findConstantFor(V, KnownConstants); |
432 | return C; |
433 | } |
434 | |
435 | Constant *InstCostVisitor::visitCastInst(CastInst &I) { |
436 | return ConstantFoldCastOperand(Opcode: I.getOpcode(), C: LastVisited->second, |
437 | DestTy: I.getType(), DL); |
438 | } |
439 | |
440 | Constant *InstCostVisitor::visitCmpInst(CmpInst &I) { |
441 | assert(LastVisited != KnownConstants.end() && "Invalid iterator!" ); |
442 | |
443 | bool Swap = I.getOperand(i_nocapture: 1) == LastVisited->first; |
444 | Value *V = Swap ? I.getOperand(i_nocapture: 0) : I.getOperand(i_nocapture: 1); |
445 | Constant *Other = findConstantFor(V, KnownConstants); |
446 | if (!Other) |
447 | return nullptr; |
448 | |
449 | Constant *Const = LastVisited->second; |
450 | return Swap ? |
451 | ConstantFoldCompareInstOperands(Predicate: I.getPredicate(), LHS: Other, RHS: Const, DL) |
452 | : ConstantFoldCompareInstOperands(Predicate: I.getPredicate(), LHS: Const, RHS: Other, DL); |
453 | } |
454 | |
455 | Constant *InstCostVisitor::visitUnaryOperator(UnaryOperator &I) { |
456 | assert(LastVisited != KnownConstants.end() && "Invalid iterator!" ); |
457 | |
458 | return ConstantFoldUnaryOpOperand(Opcode: I.getOpcode(), Op: LastVisited->second, DL); |
459 | } |
460 | |
461 | Constant *InstCostVisitor::visitBinaryOperator(BinaryOperator &I) { |
462 | assert(LastVisited != KnownConstants.end() && "Invalid iterator!" ); |
463 | |
464 | bool Swap = I.getOperand(i_nocapture: 1) == LastVisited->first; |
465 | Value *V = Swap ? I.getOperand(i_nocapture: 0) : I.getOperand(i_nocapture: 1); |
466 | Constant *Other = findConstantFor(V, KnownConstants); |
467 | if (!Other) |
468 | return nullptr; |
469 | |
470 | Constant *Const = LastVisited->second; |
471 | return dyn_cast_or_null<Constant>(Val: Swap ? |
472 | simplifyBinOp(Opcode: I.getOpcode(), LHS: Other, RHS: Const, Q: SimplifyQuery(DL)) |
473 | : simplifyBinOp(Opcode: I.getOpcode(), LHS: Const, RHS: Other, Q: SimplifyQuery(DL))); |
474 | } |
475 | |
476 | Constant *FunctionSpecializer::getPromotableAlloca(AllocaInst *Alloca, |
477 | CallInst *Call) { |
478 | Value *StoreValue = nullptr; |
479 | for (auto *User : Alloca->users()) { |
480 | // We can't use llvm::isAllocaPromotable() as that would fail because of |
481 | // the usage in the CallInst, which is what we check here. |
482 | if (User == Call) |
483 | continue; |
484 | if (auto *Bitcast = dyn_cast<BitCastInst>(Val: User)) { |
485 | if (!Bitcast->hasOneUse() || *Bitcast->user_begin() != Call) |
486 | return nullptr; |
487 | continue; |
488 | } |
489 | |
490 | if (auto *Store = dyn_cast<StoreInst>(Val: User)) { |
491 | // This is a duplicate store, bail out. |
492 | if (StoreValue || Store->isVolatile()) |
493 | return nullptr; |
494 | StoreValue = Store->getValueOperand(); |
495 | continue; |
496 | } |
497 | // Bail if there is any other unknown usage. |
498 | return nullptr; |
499 | } |
500 | |
501 | if (!StoreValue) |
502 | return nullptr; |
503 | |
504 | return getCandidateConstant(V: StoreValue); |
505 | } |
506 | |
507 | // A constant stack value is an AllocaInst that has a single constant |
508 | // value stored to it. Return this constant if such an alloca stack value |
509 | // is a function argument. |
510 | Constant *FunctionSpecializer::getConstantStackValue(CallInst *Call, |
511 | Value *Val) { |
512 | if (!Val) |
513 | return nullptr; |
514 | Val = Val->stripPointerCasts(); |
515 | if (auto *ConstVal = dyn_cast<ConstantInt>(Val)) |
516 | return ConstVal; |
517 | auto *Alloca = dyn_cast<AllocaInst>(Val); |
518 | if (!Alloca || !Alloca->getAllocatedType()->isIntegerTy()) |
519 | return nullptr; |
520 | return getPromotableAlloca(Alloca, Call); |
521 | } |
522 | |
523 | // To support specializing recursive functions, it is important to propagate |
524 | // constant arguments because after a first iteration of specialisation, a |
525 | // reduced example may look like this: |
526 | // |
527 | // define internal void @RecursiveFn(i32* arg1) { |
528 | // %temp = alloca i32, align 4 |
529 | // store i32 2 i32* %temp, align 4 |
530 | // call void @RecursiveFn.1(i32* nonnull %temp) |
531 | // ret void |
532 | // } |
533 | // |
534 | // Before a next iteration, we need to propagate the constant like so |
535 | // which allows further specialization in next iterations. |
536 | // |
537 | // @funcspec.arg = internal constant i32 2 |
538 | // |
539 | // define internal void @someFunc(i32* arg1) { |
540 | // call void @otherFunc(i32* nonnull @funcspec.arg) |
541 | // ret void |
542 | // } |
543 | // |
544 | // See if there are any new constant values for the callers of \p F via |
545 | // stack variables and promote them to global variables. |
546 | void FunctionSpecializer::promoteConstantStackValues(Function *F) { |
547 | for (User *U : F->users()) { |
548 | |
549 | auto *Call = dyn_cast<CallInst>(Val: U); |
550 | if (!Call) |
551 | continue; |
552 | |
553 | if (!Solver.isBlockExecutable(BB: Call->getParent())) |
554 | continue; |
555 | |
556 | for (const Use &U : Call->args()) { |
557 | unsigned Idx = Call->getArgOperandNo(U: &U); |
558 | Value *ArgOp = Call->getArgOperand(i: Idx); |
559 | Type *ArgOpType = ArgOp->getType(); |
560 | |
561 | if (!Call->onlyReadsMemory(OpNo: Idx) || !ArgOpType->isPointerTy()) |
562 | continue; |
563 | |
564 | auto *ConstVal = getConstantStackValue(Call, Val: ArgOp); |
565 | if (!ConstVal) |
566 | continue; |
567 | |
568 | Value *GV = new GlobalVariable(M, ConstVal->getType(), true, |
569 | GlobalValue::InternalLinkage, ConstVal, |
570 | "specialized.arg." + Twine(++NGlobals)); |
571 | Call->setArgOperand(i: Idx, v: GV); |
572 | } |
573 | } |
574 | } |
575 | |
576 | // ssa_copy intrinsics are introduced by the SCCP solver. These intrinsics |
577 | // interfere with the promoteConstantStackValues() optimization. |
578 | static void removeSSACopy(Function &F) { |
579 | for (BasicBlock &BB : F) { |
580 | for (Instruction &Inst : llvm::make_early_inc_range(Range&: BB)) { |
581 | auto *II = dyn_cast<IntrinsicInst>(Val: &Inst); |
582 | if (!II) |
583 | continue; |
584 | if (II->getIntrinsicID() != Intrinsic::ssa_copy) |
585 | continue; |
586 | Inst.replaceAllUsesWith(V: II->getOperand(i_nocapture: 0)); |
587 | Inst.eraseFromParent(); |
588 | } |
589 | } |
590 | } |
591 | |
592 | /// Remove any ssa_copy intrinsics that may have been introduced. |
593 | void FunctionSpecializer::cleanUpSSA() { |
594 | for (Function *F : Specializations) |
595 | removeSSACopy(F&: *F); |
596 | } |
597 | |
598 | |
599 | template <> struct llvm::DenseMapInfo<SpecSig> { |
600 | static inline SpecSig getEmptyKey() { return {.Key: ~0U, .Args: {}}; } |
601 | |
602 | static inline SpecSig getTombstoneKey() { return {.Key: ~1U, .Args: {}}; } |
603 | |
604 | static unsigned getHashValue(const SpecSig &S) { |
605 | return static_cast<unsigned>(hash_value(S)); |
606 | } |
607 | |
608 | static bool isEqual(const SpecSig &LHS, const SpecSig &RHS) { |
609 | return LHS == RHS; |
610 | } |
611 | }; |
612 | |
613 | FunctionSpecializer::~FunctionSpecializer() { |
614 | LLVM_DEBUG( |
615 | if (NumSpecsCreated > 0) |
616 | dbgs() << "FnSpecialization: Created " << NumSpecsCreated |
617 | << " specializations in module " << M.getName() << "\n" ); |
618 | // Eliminate dead code. |
619 | removeDeadFunctions(); |
620 | cleanUpSSA(); |
621 | } |
622 | |
623 | /// Attempt to specialize functions in the module to enable constant |
624 | /// propagation across function boundaries. |
625 | /// |
626 | /// \returns true if at least one function is specialized. |
627 | bool FunctionSpecializer::run() { |
628 | // Find possible specializations for each function. |
629 | SpecMap SM; |
630 | SmallVector<Spec, 32> AllSpecs; |
631 | unsigned NumCandidates = 0; |
632 | for (Function &F : M) { |
633 | if (!isCandidateFunction(F: &F)) |
634 | continue; |
635 | |
636 | auto [It, Inserted] = FunctionMetrics.try_emplace(Key: &F); |
637 | CodeMetrics &Metrics = It->second; |
638 | //Analyze the function. |
639 | if (Inserted) { |
640 | SmallPtrSet<const Value *, 32> EphValues; |
641 | CodeMetrics::collectEphemeralValues(L: &F, AC: &GetAC(F), EphValues); |
642 | for (BasicBlock &BB : F) |
643 | Metrics.analyzeBasicBlock(BB: &BB, TTI: GetTTI(F), EphValues); |
644 | } |
645 | |
646 | // If the code metrics reveal that we shouldn't duplicate the function, |
647 | // or if the code size implies that this function is easy to get inlined, |
648 | // then we shouldn't specialize it. |
649 | if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() || |
650 | (!ForceSpecialization && !F.hasFnAttribute(Kind: Attribute::NoInline) && |
651 | Metrics.NumInsts < MinFunctionSize)) |
652 | continue; |
653 | |
654 | // TODO: For now only consider recursive functions when running multiple |
655 | // times. This should change if specialization on literal constants gets |
656 | // enabled. |
657 | if (!Inserted && !Metrics.isRecursive && !SpecializeLiteralConstant) |
658 | continue; |
659 | |
660 | int64_t Sz = *Metrics.NumInsts.getValue(); |
661 | assert(Sz > 0 && "CodeSize should be positive" ); |
662 | // It is safe to down cast from int64_t, NumInsts is always positive. |
663 | unsigned FuncSize = static_cast<unsigned>(Sz); |
664 | |
665 | LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for " |
666 | << F.getName() << " is " << FuncSize << "\n" ); |
667 | |
668 | if (Inserted && Metrics.isRecursive) |
669 | promoteConstantStackValues(F: &F); |
670 | |
671 | if (!findSpecializations(F: &F, FuncSize, AllSpecs, SM)) { |
672 | LLVM_DEBUG( |
673 | dbgs() << "FnSpecialization: No possible specializations found for " |
674 | << F.getName() << "\n" ); |
675 | continue; |
676 | } |
677 | |
678 | ++NumCandidates; |
679 | } |
680 | |
681 | if (!NumCandidates) { |
682 | LLVM_DEBUG( |
683 | dbgs() |
684 | << "FnSpecialization: No possible specializations found in module\n" ); |
685 | return false; |
686 | } |
687 | |
688 | // Choose the most profitable specialisations, which fit in the module |
689 | // specialization budget, which is derived from maximum number of |
690 | // specializations per specialization candidate function. |
691 | auto CompareScore = [&AllSpecs](unsigned I, unsigned J) { |
692 | if (AllSpecs[I].Score != AllSpecs[J].Score) |
693 | return AllSpecs[I].Score > AllSpecs[J].Score; |
694 | return I > J; |
695 | }; |
696 | const unsigned NSpecs = |
697 | std::min(a: NumCandidates * MaxClones, b: unsigned(AllSpecs.size())); |
698 | SmallVector<unsigned> BestSpecs(NSpecs + 1); |
699 | std::iota(first: BestSpecs.begin(), last: BestSpecs.begin() + NSpecs, value: 0); |
700 | if (AllSpecs.size() > NSpecs) { |
701 | LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed " |
702 | << "the maximum number of clones threshold.\n" |
703 | << "FnSpecialization: Specializing the " |
704 | << NSpecs |
705 | << " most profitable candidates.\n" ); |
706 | std::make_heap(first: BestSpecs.begin(), last: BestSpecs.begin() + NSpecs, comp: CompareScore); |
707 | for (unsigned I = NSpecs, N = AllSpecs.size(); I < N; ++I) { |
708 | BestSpecs[NSpecs] = I; |
709 | std::push_heap(first: BestSpecs.begin(), last: BestSpecs.end(), comp: CompareScore); |
710 | std::pop_heap(first: BestSpecs.begin(), last: BestSpecs.end(), comp: CompareScore); |
711 | } |
712 | } |
713 | |
714 | LLVM_DEBUG(dbgs() << "FnSpecialization: List of specializations \n" ; |
715 | for (unsigned I = 0; I < NSpecs; ++I) { |
716 | const Spec &S = AllSpecs[BestSpecs[I]]; |
717 | dbgs() << "FnSpecialization: Function " << S.F->getName() |
718 | << " , score " << S.Score << "\n" ; |
719 | for (const ArgInfo &Arg : S.Sig.Args) |
720 | dbgs() << "FnSpecialization: FormalArg = " |
721 | << Arg.Formal->getNameOrAsOperand() |
722 | << ", ActualArg = " << Arg.Actual->getNameOrAsOperand() |
723 | << "\n" ; |
724 | }); |
725 | |
726 | // Create the chosen specializations. |
727 | SmallPtrSet<Function *, 8> OriginalFuncs; |
728 | SmallVector<Function *> Clones; |
729 | for (unsigned I = 0; I < NSpecs; ++I) { |
730 | Spec &S = AllSpecs[BestSpecs[I]]; |
731 | S.Clone = createSpecialization(F: S.F, S: S.Sig); |
732 | |
733 | // Update the known call sites to call the clone. |
734 | for (CallBase *Call : S.CallSites) { |
735 | LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *Call |
736 | << " to call " << S.Clone->getName() << "\n" ); |
737 | Call->setCalledFunction(S.Clone); |
738 | } |
739 | |
740 | Clones.push_back(Elt: S.Clone); |
741 | OriginalFuncs.insert(Ptr: S.F); |
742 | } |
743 | |
744 | Solver.solveWhileResolvedUndefsIn(WorkList&: Clones); |
745 | |
746 | // Update the rest of the call sites - these are the recursive calls, calls |
747 | // to discarded specialisations and calls that may match a specialisation |
748 | // after the solver runs. |
749 | for (Function *F : OriginalFuncs) { |
750 | auto [Begin, End] = SM[F]; |
751 | updateCallSites(F, Begin: AllSpecs.begin() + Begin, End: AllSpecs.begin() + End); |
752 | } |
753 | |
754 | for (Function *F : Clones) { |
755 | if (F->getReturnType()->isVoidTy()) |
756 | continue; |
757 | if (F->getReturnType()->isStructTy()) { |
758 | auto *STy = cast<StructType>(Val: F->getReturnType()); |
759 | if (!Solver.isStructLatticeConstant(F, STy)) |
760 | continue; |
761 | } else { |
762 | auto It = Solver.getTrackedRetVals().find(Key: F); |
763 | assert(It != Solver.getTrackedRetVals().end() && |
764 | "Return value ought to be tracked" ); |
765 | if (SCCPSolver::isOverdefined(LV: It->second)) |
766 | continue; |
767 | } |
768 | for (User *U : F->users()) { |
769 | if (auto *CS = dyn_cast<CallBase>(Val: U)) { |
770 | //The user instruction does not call our function. |
771 | if (CS->getCalledFunction() != F) |
772 | continue; |
773 | Solver.resetLatticeValueFor(Call: CS); |
774 | } |
775 | } |
776 | } |
777 | |
778 | // Rerun the solver to notify the users of the modified callsites. |
779 | Solver.solveWhileResolvedUndefs(); |
780 | |
781 | for (Function *F : OriginalFuncs) |
782 | if (FunctionMetrics[F].isRecursive) |
783 | promoteConstantStackValues(F); |
784 | |
785 | return true; |
786 | } |
787 | |
788 | void FunctionSpecializer::removeDeadFunctions() { |
789 | for (Function *F : FullySpecialized) { |
790 | LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead function " |
791 | << F->getName() << "\n" ); |
792 | if (FAM) |
793 | FAM->clear(IR&: *F, Name: F->getName()); |
794 | F->eraseFromParent(); |
795 | } |
796 | FullySpecialized.clear(); |
797 | } |
798 | |
799 | /// Clone the function \p F and remove the ssa_copy intrinsics added by |
800 | /// the SCCPSolver in the cloned version. |
801 | static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) { |
802 | ValueToValueMapTy Mappings; |
803 | Function *Clone = CloneFunction(F, VMap&: Mappings); |
804 | Clone->setName(F->getName() + ".specialized." + Twine(NSpecs)); |
805 | removeSSACopy(F&: *Clone); |
806 | return Clone; |
807 | } |
808 | |
809 | bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, |
810 | SmallVectorImpl<Spec> &AllSpecs, |
811 | SpecMap &SM) { |
812 | // A mapping from a specialisation signature to the index of the respective |
813 | // entry in the all specialisation array. Used to ensure uniqueness of |
814 | // specialisations. |
815 | DenseMap<SpecSig, unsigned> UniqueSpecs; |
816 | |
817 | // Get a list of interesting arguments. |
818 | SmallVector<Argument *> Args; |
819 | for (Argument &Arg : F->args()) |
820 | if (isArgumentInteresting(A: &Arg)) |
821 | Args.push_back(Elt: &Arg); |
822 | |
823 | if (Args.empty()) |
824 | return false; |
825 | |
826 | for (User *U : F->users()) { |
827 | if (!isa<CallInst>(Val: U) && !isa<InvokeInst>(Val: U)) |
828 | continue; |
829 | auto &CS = *cast<CallBase>(Val: U); |
830 | |
831 | // The user instruction does not call our function. |
832 | if (CS.getCalledFunction() != F) |
833 | continue; |
834 | |
835 | // If the call site has attribute minsize set, that callsite won't be |
836 | // specialized. |
837 | if (CS.hasFnAttr(Kind: Attribute::MinSize)) |
838 | continue; |
839 | |
840 | // If the parent of the call site will never be executed, we don't need |
841 | // to worry about the passed value. |
842 | if (!Solver.isBlockExecutable(BB: CS.getParent())) |
843 | continue; |
844 | |
845 | // Examine arguments and create a specialisation candidate from the |
846 | // constant operands of this call site. |
847 | SpecSig S; |
848 | for (Argument *A : Args) { |
849 | Constant *C = getCandidateConstant(V: CS.getArgOperand(i: A->getArgNo())); |
850 | if (!C) |
851 | continue; |
852 | LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument " |
853 | << A->getName() << " : " << C->getNameOrAsOperand() |
854 | << "\n" ); |
855 | S.Args.push_back(Elt: {A, C}); |
856 | } |
857 | |
858 | if (S.Args.empty()) |
859 | continue; |
860 | |
861 | // Check if we have encountered the same specialisation already. |
862 | if (auto It = UniqueSpecs.find(Val: S); It != UniqueSpecs.end()) { |
863 | // Existing specialisation. Add the call to the list to rewrite, unless |
864 | // it's a recursive call. A specialisation, generated because of a |
865 | // recursive call may end up as not the best specialisation for all |
866 | // the cloned instances of this call, which result from specialising |
867 | // functions. Hence we don't rewrite the call directly, but match it with |
868 | // the best specialisation once all specialisations are known. |
869 | if (CS.getFunction() == F) |
870 | continue; |
871 | const unsigned Index = It->second; |
872 | AllSpecs[Index].CallSites.push_back(Elt: &CS); |
873 | } else { |
874 | // Calculate the specialisation gain. |
875 | Bonus B; |
876 | unsigned Score = 0; |
877 | InstCostVisitor Visitor = getInstCostVisitorFor(F); |
878 | for (ArgInfo &A : S.Args) { |
879 | B += Visitor.getSpecializationBonus(A: A.Formal, C: A.Actual); |
880 | Score += getInliningBonus(A: A.Formal, C: A.Actual); |
881 | } |
882 | B += Visitor.getBonusFromPendingPHIs(); |
883 | |
884 | |
885 | LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization bonus {CodeSize = " |
886 | << B.CodeSize << ", Latency = " << B.Latency |
887 | << ", Inlining = " << Score << "}\n" ); |
888 | |
889 | FunctionGrowth[F] += FuncSize - B.CodeSize; |
890 | |
891 | auto IsProfitable = [](Bonus &B, unsigned Score, unsigned FuncSize, |
892 | unsigned FuncGrowth) -> bool { |
893 | // No check required. |
894 | if (ForceSpecialization) |
895 | return true; |
896 | // Minimum inlining bonus. |
897 | if (Score > MinInliningBonus * FuncSize / 100) |
898 | return true; |
899 | // Minimum codesize savings. |
900 | if (B.CodeSize < MinCodeSizeSavings * FuncSize / 100) |
901 | return false; |
902 | // Minimum latency savings. |
903 | if (B.Latency < MinLatencySavings * FuncSize / 100) |
904 | return false; |
905 | // Maximum codesize growth. |
906 | if (FuncGrowth / FuncSize > MaxCodeSizeGrowth) |
907 | return false; |
908 | return true; |
909 | }; |
910 | |
911 | // Discard unprofitable specialisations. |
912 | if (!IsProfitable(B, Score, FuncSize, FunctionGrowth[F])) |
913 | continue; |
914 | |
915 | // Create a new specialisation entry. |
916 | Score += std::max(a: B.CodeSize, b: B.Latency); |
917 | auto &Spec = AllSpecs.emplace_back(Args&: F, Args&: S, Args&: Score); |
918 | if (CS.getFunction() != F) |
919 | Spec.CallSites.push_back(Elt: &CS); |
920 | const unsigned Index = AllSpecs.size() - 1; |
921 | UniqueSpecs[S] = Index; |
922 | if (auto [It, Inserted] = SM.try_emplace(Key: F, Args: Index, Args: Index + 1); !Inserted) |
923 | It->second.second = Index + 1; |
924 | } |
925 | } |
926 | |
927 | return !UniqueSpecs.empty(); |
928 | } |
929 | |
930 | bool FunctionSpecializer::isCandidateFunction(Function *F) { |
931 | if (F->isDeclaration() || F->arg_empty()) |
932 | return false; |
933 | |
934 | if (F->hasFnAttribute(Kind: Attribute::NoDuplicate)) |
935 | return false; |
936 | |
937 | // Do not specialize the cloned function again. |
938 | if (Specializations.contains(Ptr: F)) |
939 | return false; |
940 | |
941 | // If we're optimizing the function for size, we shouldn't specialize it. |
942 | if (F->hasOptSize() || |
943 | shouldOptimizeForSize(F, PSI: nullptr, BFI: nullptr, QueryType: PGSOQueryType::IRPass)) |
944 | return false; |
945 | |
946 | // Exit if the function is not executable. There's no point in specializing |
947 | // a dead function. |
948 | if (!Solver.isBlockExecutable(BB: &F->getEntryBlock())) |
949 | return false; |
950 | |
951 | // It wastes time to specialize a function which would get inlined finally. |
952 | if (F->hasFnAttribute(Kind: Attribute::AlwaysInline)) |
953 | return false; |
954 | |
955 | LLVM_DEBUG(dbgs() << "FnSpecialization: Try function: " << F->getName() |
956 | << "\n" ); |
957 | return true; |
958 | } |
959 | |
960 | Function *FunctionSpecializer::createSpecialization(Function *F, |
961 | const SpecSig &S) { |
962 | Function *Clone = cloneCandidateFunction(F, NSpecs: Specializations.size() + 1); |
963 | |
964 | // The original function does not neccessarily have internal linkage, but the |
965 | // clone must. |
966 | Clone->setLinkage(GlobalValue::InternalLinkage); |
967 | |
968 | // Initialize the lattice state of the arguments of the function clone, |
969 | // marking the argument on which we specialized the function constant |
970 | // with the given value. |
971 | Solver.setLatticeValueForSpecializationArguments(F: Clone, Args: S.Args); |
972 | Solver.markBlockExecutable(BB: &Clone->front()); |
973 | Solver.addArgumentTrackedFunction(F: Clone); |
974 | Solver.addTrackedFunction(F: Clone); |
975 | |
976 | // Mark all the specialized functions |
977 | Specializations.insert(Ptr: Clone); |
978 | ++NumSpecsCreated; |
979 | |
980 | return Clone; |
981 | } |
982 | |
983 | /// Compute the inlining bonus for replacing argument \p A with constant \p C. |
984 | /// The below heuristic is only concerned with exposing inlining |
985 | /// opportunities via indirect call promotion. If the argument is not a |
986 | /// (potentially casted) function pointer, give up. |
987 | unsigned FunctionSpecializer::getInliningBonus(Argument *A, Constant *C) { |
988 | Function *CalledFunction = dyn_cast<Function>(Val: C->stripPointerCasts()); |
989 | if (!CalledFunction) |
990 | return 0; |
991 | |
992 | // Get TTI for the called function (used for the inline cost). |
993 | auto &CalleeTTI = (GetTTI)(*CalledFunction); |
994 | |
995 | // Look at all the call sites whose called value is the argument. |
996 | // Specializing the function on the argument would allow these indirect |
997 | // calls to be promoted to direct calls. If the indirect call promotion |
998 | // would likely enable the called function to be inlined, specializing is a |
999 | // good idea. |
1000 | int InliningBonus = 0; |
1001 | for (User *U : A->users()) { |
1002 | if (!isa<CallInst>(Val: U) && !isa<InvokeInst>(Val: U)) |
1003 | continue; |
1004 | auto *CS = cast<CallBase>(Val: U); |
1005 | if (CS->getCalledOperand() != A) |
1006 | continue; |
1007 | if (CS->getFunctionType() != CalledFunction->getFunctionType()) |
1008 | continue; |
1009 | |
1010 | // Get the cost of inlining the called function at this call site. Note |
1011 | // that this is only an estimate. The called function may eventually |
1012 | // change in a way that leads to it not being inlined here, even though |
1013 | // inlining looks profitable now. For example, one of its called |
1014 | // functions may be inlined into it, making the called function too large |
1015 | // to be inlined into this call site. |
1016 | // |
1017 | // We apply a boost for performing indirect call promotion by increasing |
1018 | // the default threshold by the threshold for indirect calls. |
1019 | auto Params = getInlineParams(); |
1020 | Params.DefaultThreshold += InlineConstants::IndirectCallThreshold; |
1021 | InlineCost IC = |
1022 | getInlineCost(Call&: *CS, Callee: CalledFunction, Params, CalleeTTI, GetAssumptionCache: GetAC, GetTLI); |
1023 | |
1024 | // We clamp the bonus for this call to be between zero and the default |
1025 | // threshold. |
1026 | if (IC.isAlways()) |
1027 | InliningBonus += Params.DefaultThreshold; |
1028 | else if (IC.isVariable() && IC.getCostDelta() > 0) |
1029 | InliningBonus += IC.getCostDelta(); |
1030 | |
1031 | LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << InliningBonus |
1032 | << " for user " << *U << "\n" ); |
1033 | } |
1034 | |
1035 | return InliningBonus > 0 ? static_cast<unsigned>(InliningBonus) : 0; |
1036 | } |
1037 | |
1038 | /// Determine if it is possible to specialise the function for constant values |
1039 | /// of the formal parameter \p A. |
1040 | bool FunctionSpecializer::isArgumentInteresting(Argument *A) { |
1041 | // No point in specialization if the argument is unused. |
1042 | if (A->user_empty()) |
1043 | return false; |
1044 | |
1045 | Type *Ty = A->getType(); |
1046 | if (!Ty->isPointerTy() && (!SpecializeLiteralConstant || |
1047 | (!Ty->isIntegerTy() && !Ty->isFloatingPointTy() && !Ty->isStructTy()))) |
1048 | return false; |
1049 | |
1050 | // SCCP solver does not record an argument that will be constructed on |
1051 | // stack. |
1052 | if (A->hasByValAttr() && !A->getParent()->onlyReadsMemory()) |
1053 | return false; |
1054 | |
1055 | // For non-argument-tracked functions every argument is overdefined. |
1056 | if (!Solver.isArgumentTrackedFunction(F: A->getParent())) |
1057 | return true; |
1058 | |
1059 | // Check the lattice value and decide if we should attemt to specialize, |
1060 | // based on this argument. No point in specialization, if the lattice value |
1061 | // is already a constant. |
1062 | bool IsOverdefined = Ty->isStructTy() |
1063 | ? any_of(Range: Solver.getStructLatticeValueFor(V: A), P: SCCPSolver::isOverdefined) |
1064 | : SCCPSolver::isOverdefined(LV: Solver.getLatticeValueFor(V: A)); |
1065 | |
1066 | LLVM_DEBUG( |
1067 | if (IsOverdefined) |
1068 | dbgs() << "FnSpecialization: Found interesting parameter " |
1069 | << A->getNameOrAsOperand() << "\n" ; |
1070 | else |
1071 | dbgs() << "FnSpecialization: Nothing to do, parameter " |
1072 | << A->getNameOrAsOperand() << " is already constant\n" ; |
1073 | ); |
1074 | return IsOverdefined; |
1075 | } |
1076 | |
1077 | /// Check if the value \p V (an actual argument) is a constant or can only |
1078 | /// have a constant value. Return that constant. |
1079 | Constant *FunctionSpecializer::getCandidateConstant(Value *V) { |
1080 | if (isa<PoisonValue>(Val: V)) |
1081 | return nullptr; |
1082 | |
1083 | // Select for possible specialisation values that are constants or |
1084 | // are deduced to be constants or constant ranges with a single element. |
1085 | Constant *C = dyn_cast<Constant>(Val: V); |
1086 | if (!C) |
1087 | C = Solver.getConstantOrNull(V); |
1088 | |
1089 | // Don't specialize on (anything derived from) the address of a non-constant |
1090 | // global variable, unless explicitly enabled. |
1091 | if (C && C->getType()->isPointerTy() && !C->isNullValue()) |
1092 | if (auto *GV = dyn_cast<GlobalVariable>(Val: getUnderlyingObject(V: C)); |
1093 | GV && !(GV->isConstant() || SpecializeOnAddress)) |
1094 | return nullptr; |
1095 | |
1096 | return C; |
1097 | } |
1098 | |
1099 | void FunctionSpecializer::updateCallSites(Function *F, const Spec *Begin, |
1100 | const Spec *End) { |
1101 | // Collect the call sites that need updating. |
1102 | SmallVector<CallBase *> ToUpdate; |
1103 | for (User *U : F->users()) |
1104 | if (auto *CS = dyn_cast<CallBase>(Val: U); |
1105 | CS && CS->getCalledFunction() == F && |
1106 | Solver.isBlockExecutable(BB: CS->getParent())) |
1107 | ToUpdate.push_back(Elt: CS); |
1108 | |
1109 | unsigned NCallsLeft = ToUpdate.size(); |
1110 | for (CallBase *CS : ToUpdate) { |
1111 | bool ShouldDecrementCount = CS->getFunction() == F; |
1112 | |
1113 | // Find the best matching specialisation. |
1114 | const Spec *BestSpec = nullptr; |
1115 | for (const Spec &S : make_range(x: Begin, y: End)) { |
1116 | if (!S.Clone || (BestSpec && S.Score <= BestSpec->Score)) |
1117 | continue; |
1118 | |
1119 | if (any_of(Range: S.Sig.Args, P: [CS, this](const ArgInfo &Arg) { |
1120 | unsigned ArgNo = Arg.Formal->getArgNo(); |
1121 | return getCandidateConstant(V: CS->getArgOperand(i: ArgNo)) != Arg.Actual; |
1122 | })) |
1123 | continue; |
1124 | |
1125 | BestSpec = &S; |
1126 | } |
1127 | |
1128 | if (BestSpec) { |
1129 | LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *CS |
1130 | << " to call " << BestSpec->Clone->getName() << "\n" ); |
1131 | CS->setCalledFunction(BestSpec->Clone); |
1132 | ShouldDecrementCount = true; |
1133 | } |
1134 | |
1135 | if (ShouldDecrementCount) |
1136 | --NCallsLeft; |
1137 | } |
1138 | |
1139 | // If the function has been completely specialized, the original function |
1140 | // is no longer needed. Mark it unreachable. |
1141 | if (NCallsLeft == 0 && Solver.isArgumentTrackedFunction(F)) { |
1142 | Solver.markFunctionUnreachable(F); |
1143 | FullySpecialized.insert(Ptr: F); |
1144 | } |
1145 | } |
1146 | |