1 | //===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the transformation that promotes indirect calls to |
10 | // conditional direct calls when the indirect-call value profile metadata is |
11 | // available. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "llvm/ADT/ArrayRef.h" |
16 | #include "llvm/ADT/DenseMap.h" |
17 | #include "llvm/ADT/Statistic.h" |
18 | #include "llvm/ADT/StringRef.h" |
19 | #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" |
20 | #include "llvm/Analysis/IndirectCallVisitor.h" |
21 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
22 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
23 | #include "llvm/Analysis/TypeMetadataUtils.h" |
24 | #include "llvm/IR/DiagnosticInfo.h" |
25 | #include "llvm/IR/Dominators.h" |
26 | #include "llvm/IR/Function.h" |
27 | #include "llvm/IR/InstrTypes.h" |
28 | #include "llvm/IR/Instructions.h" |
29 | #include "llvm/IR/LLVMContext.h" |
30 | #include "llvm/IR/MDBuilder.h" |
31 | #include "llvm/IR/PassManager.h" |
32 | #include "llvm/IR/ProfDataUtils.h" |
33 | #include "llvm/IR/Value.h" |
34 | #include "llvm/ProfileData/InstrProf.h" |
35 | #include "llvm/Support/Casting.h" |
36 | #include "llvm/Support/CommandLine.h" |
37 | #include "llvm/Support/Debug.h" |
38 | #include "llvm/Support/Error.h" |
39 | #include "llvm/Support/raw_ostream.h" |
40 | #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" |
41 | #include "llvm/Transforms/Utils/CallPromotionUtils.h" |
42 | #include "llvm/Transforms/Utils/Instrumentation.h" |
43 | #include <cassert> |
44 | #include <cstdint> |
45 | #include <set> |
46 | #include <string> |
47 | #include <unordered_map> |
48 | #include <utility> |
49 | #include <vector> |
50 | |
51 | using namespace llvm; |
52 | |
53 | #define DEBUG_TYPE "pgo-icall-prom" |
54 | |
55 | STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions." ); |
56 | STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites." ); |
57 | |
58 | extern cl::opt<unsigned> MaxNumVTableAnnotations; |
59 | |
60 | namespace llvm { |
61 | extern cl::opt<bool> EnableVTableProfileUse; |
62 | } |
63 | |
64 | // Command line option to disable indirect-call promotion with the default as |
65 | // false. This is for debug purpose. |
66 | static cl::opt<bool> DisableICP("disable-icp" , cl::init(Val: false), cl::Hidden, |
67 | cl::desc("Disable indirect call promotion" )); |
68 | |
69 | // Set the cutoff value for the promotion. If the value is other than 0, we |
70 | // stop the transformation once the total number of promotions equals the cutoff |
71 | // value. |
72 | // For debug use only. |
73 | static cl::opt<unsigned> |
74 | ICPCutOff("icp-cutoff" , cl::init(Val: 0), cl::Hidden, |
75 | cl::desc("Max number of promotions for this compilation" )); |
76 | |
77 | // If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped. |
78 | // For debug use only. |
79 | static cl::opt<unsigned> |
80 | ICPCSSkip("icp-csskip" , cl::init(Val: 0), cl::Hidden, |
81 | cl::desc("Skip Callsite up to this number for this compilation" )); |
82 | |
83 | // Set if the pass is called in LTO optimization. The difference for LTO mode |
84 | // is the pass won't prefix the source module name to the internal linkage |
85 | // symbols. |
86 | static cl::opt<bool> ICPLTOMode("icp-lto" , cl::init(Val: false), cl::Hidden, |
87 | cl::desc("Run indirect-call promotion in LTO " |
88 | "mode" )); |
89 | |
90 | // Set if the pass is called in SamplePGO mode. The difference for SamplePGO |
91 | // mode is it will add prof metadatato the created direct call. |
92 | static cl::opt<bool> |
93 | ICPSamplePGOMode("icp-samplepgo" , cl::init(Val: false), cl::Hidden, |
94 | cl::desc("Run indirect-call promotion in SamplePGO mode" )); |
95 | |
96 | // If the option is set to true, only call instructions will be considered for |
97 | // transformation -- invoke instructions will be ignored. |
98 | static cl::opt<bool> |
99 | ICPCallOnly("icp-call-only" , cl::init(Val: false), cl::Hidden, |
100 | cl::desc("Run indirect-call promotion for call instructions " |
101 | "only" )); |
102 | |
103 | // If the option is set to true, only invoke instructions will be considered for |
104 | // transformation -- call instructions will be ignored. |
105 | static cl::opt<bool> ICPInvokeOnly("icp-invoke-only" , cl::init(Val: false), |
106 | cl::Hidden, |
107 | cl::desc("Run indirect-call promotion for " |
108 | "invoke instruction only" )); |
109 | |
110 | // Dump the function level IR if the transformation happened in this |
111 | // function. For debug use only. |
112 | static cl::opt<bool> |
113 | ICPDUMPAFTER("icp-dumpafter" , cl::init(Val: false), cl::Hidden, |
114 | cl::desc("Dump IR after transformation happens" )); |
115 | |
116 | // Indirect call promotion pass will fall back to function-based comparison if |
117 | // vtable-count / function-count is smaller than this threshold. |
118 | static cl::opt<float> ICPVTablePercentageThreshold( |
119 | "icp-vtable-percentage-threshold" , cl::init(Val: 0.995), cl::Hidden, |
120 | cl::desc("The percentage threshold of vtable-count / function-count for " |
121 | "cost-benefit analysis." )); |
122 | |
123 | // Although comparing vtables can save a vtable load, we may need to compare |
124 | // vtable pointer with multiple vtable address points due to class inheritance. |
125 | // Comparing with multiple vtables inserts additional instructions on hot code |
126 | // path, and doing so for an earlier candidate delays the comparisons for later |
127 | // candidates. For the last candidate, only the fallback path is affected. |
128 | // We allow multiple vtable comparison for the last function candidate and use |
129 | // the option below to cap the number of vtables. |
130 | static cl::opt<int> ICPMaxNumVTableLastCandidate( |
131 | "icp-max-num-vtable-last-candidate" , cl::init(Val: 1), cl::Hidden, |
132 | cl::desc("The maximum number of vtable for the last candidate." )); |
133 | |
134 | static cl::list<std::string> ICPIgnoredBaseTypes( |
135 | "icp-ignored-base-types" , cl::Hidden, |
136 | cl::desc( |
137 | "A list of mangled vtable type info names. Classes specified by the " |
138 | "type info names and their derived ones will not be vtable-ICP'ed. " |
139 | "Useful when the profiled types and actual types in the optimized " |
140 | "binary could be different due to profiling limitations. Type info " |
141 | "names are those string literals used in LLVM type metadata" )); |
142 | |
143 | namespace { |
144 | |
145 | // The key is a vtable global variable, and the value is a map. |
146 | // In the inner map, the key represents address point offsets and the value is a |
147 | // constant for this address point. |
148 | using VTableAddressPointOffsetValMap = |
149 | SmallDenseMap<const GlobalVariable *, std::unordered_map<int, Constant *>>; |
150 | |
151 | // A struct to collect type information for a virtual call site. |
152 | struct VirtualCallSiteInfo { |
153 | // The offset from the address point to virtual function in the vtable. |
154 | uint64_t FunctionOffset; |
155 | // The instruction that computes the address point of vtable. |
156 | Instruction *VPtr; |
157 | // The compatible type used in LLVM type intrinsics. |
158 | StringRef CompatibleTypeStr; |
159 | }; |
160 | |
161 | // The key is a virtual call, and value is its type information. |
162 | using VirtualCallSiteTypeInfoMap = |
163 | SmallDenseMap<const CallBase *, VirtualCallSiteInfo>; |
164 | |
165 | // The key is vtable GUID, and value is its value profile count. |
166 | using VTableGUIDCountsMap = SmallDenseMap<uint64_t, uint64_t, 16>; |
167 | |
168 | // Return the address point offset of the given compatible type. |
169 | // |
170 | // Type metadata of a vtable specifies the types that can contain a pointer to |
171 | // this vtable, for example, `Base*` can be a pointer to an derived type |
172 | // but not vice versa. See also https://llvm.org/docs/TypeMetadata.html |
173 | static std::optional<uint64_t> |
174 | getAddressPointOffset(const GlobalVariable &VTableVar, |
175 | StringRef CompatibleType) { |
176 | SmallVector<MDNode *> Types; |
177 | VTableVar.getMetadata(KindID: LLVMContext::MD_type, MDs&: Types); |
178 | |
179 | for (MDNode *Type : Types) |
180 | if (auto *TypeId = dyn_cast<MDString>(Val: Type->getOperand(I: 1).get()); |
181 | TypeId && TypeId->getString() == CompatibleType) |
182 | return cast<ConstantInt>( |
183 | Val: cast<ConstantAsMetadata>(Val: Type->getOperand(I: 0))->getValue()) |
184 | ->getZExtValue(); |
185 | |
186 | return std::nullopt; |
187 | } |
188 | |
189 | // Return a constant representing the vtable's address point specified by the |
190 | // offset. |
191 | static Constant *getVTableAddressPointOffset(GlobalVariable *VTable, |
192 | uint32_t AddressPointOffset) { |
193 | Module &M = *VTable->getParent(); |
194 | LLVMContext &Context = M.getContext(); |
195 | assert(AddressPointOffset < |
196 | M.getDataLayout().getTypeAllocSize(VTable->getValueType()) && |
197 | "Out-of-bound access" ); |
198 | |
199 | return ConstantExpr::getInBoundsGetElementPtr( |
200 | Ty: Type::getInt8Ty(C&: Context), C: VTable, |
201 | Idx: llvm::ConstantInt::get(Ty: Type::getInt32Ty(C&: Context), V: AddressPointOffset)); |
202 | } |
203 | |
204 | // Return the basic block in which Use `U` is used via its `UserInst`. |
205 | static BasicBlock *getUserBasicBlock(Use &U, Instruction *UserInst) { |
206 | if (PHINode *PN = dyn_cast<PHINode>(Val: UserInst)) |
207 | return PN->getIncomingBlock(U); |
208 | |
209 | return UserInst->getParent(); |
210 | } |
211 | |
212 | // `DestBB` is a suitable basic block to sink `Inst` into when `Inst` have users |
213 | // and all users are in `DestBB`. The caller guarantees that `Inst->getParent()` |
214 | // is the sole predecessor of `DestBB` and `DestBB` is dominated by |
215 | // `Inst->getParent()`. |
216 | static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) { |
217 | // 'BB' is used only by assert. |
218 | [[maybe_unused]] BasicBlock *BB = Inst->getParent(); |
219 | |
220 | assert(BB != DestBB && BB->getTerminator()->getNumSuccessors() == 2 && |
221 | DestBB->getUniquePredecessor() == BB && |
222 | "Guaranteed by ICP transformation" ); |
223 | |
224 | BasicBlock *UserBB = nullptr; |
225 | for (Use &Use : Inst->uses()) { |
226 | User *User = Use.getUser(); |
227 | // Do checked cast since IR verifier guarantees that the user of an |
228 | // instruction must be an instruction. See `Verifier::visitInstruction`. |
229 | Instruction *UserInst = cast<Instruction>(Val: User); |
230 | // We can sink debug or pseudo instructions together with Inst. |
231 | if (UserInst->isDebugOrPseudoInst()) |
232 | continue; |
233 | UserBB = getUserBasicBlock(U&: Use, UserInst); |
234 | // Do not sink if Inst is used in a basic block that is not DestBB. |
235 | // TODO: Sink to the common dominator of all user blocks. |
236 | if (UserBB != DestBB) |
237 | return false; |
238 | } |
239 | return UserBB != nullptr; |
240 | } |
241 | |
242 | // For the virtual call dispatch sequence, try to sink vtable load instructions |
243 | // to the cold indirect call fallback. |
244 | // FIXME: Move the sink eligibility check below to a utility function in |
245 | // Transforms/Utils/ directory. |
246 | static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { |
247 | if (!isDestBBSuitableForSink(Inst: I, DestBB: DestBlock)) |
248 | return false; |
249 | |
250 | // Do not move control-flow-involving, volatile loads, vaarg, alloca |
251 | // instructions, etc. |
252 | if (isa<PHINode>(Val: I) || I->isEHPad() || I->mayThrow() || !I->willReturn() || |
253 | isa<AllocaInst>(Val: I)) |
254 | return false; |
255 | |
256 | // Do not sink convergent call instructions. |
257 | if (const auto *C = dyn_cast<CallBase>(Val: I)) |
258 | if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent()) |
259 | return false; |
260 | |
261 | // Do not move an instruction that may write to memory. |
262 | if (I->mayWriteToMemory()) |
263 | return false; |
264 | |
265 | // We can only sink load instructions if there is nothing between the load and |
266 | // the end of block that could change the value. |
267 | if (I->mayReadFromMemory()) { |
268 | // We already know that SrcBlock is the unique predecessor of DestBlock. |
269 | for (BasicBlock::iterator Scan = std::next(x: I->getIterator()), |
270 | E = I->getParent()->end(); |
271 | Scan != E; ++Scan) { |
272 | // Note analysis analysis can tell whether two pointers can point to the |
273 | // same object in memory or not thereby find further opportunities to |
274 | // sink. |
275 | if (Scan->mayWriteToMemory()) |
276 | return false; |
277 | } |
278 | } |
279 | |
280 | BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt(); |
281 | I->moveBefore(BB&: *DestBlock, I: InsertPos); |
282 | |
283 | // TODO: Sink debug intrinsic users of I to 'DestBlock'. |
284 | // 'InstCombinerImpl::tryToSinkInstructionDbgValues' and |
285 | // 'InstCombinerImpl::tryToSinkInstructionDbgVariableRecords' already have |
286 | // the core logic to do this. |
287 | return true; |
288 | } |
289 | |
290 | // Try to sink instructions after VPtr to the indirect call fallback. |
291 | // Return the number of sunk IR instructions. |
292 | static int tryToSinkInstructions(BasicBlock *OriginalBB, |
293 | BasicBlock *IndirectCallBB) { |
294 | int SinkCount = 0; |
295 | // Do not sink across a critical edge for simplicity. |
296 | if (IndirectCallBB->getUniquePredecessor() != OriginalBB) |
297 | return SinkCount; |
298 | // Sink all eligible instructions in OriginalBB in reverse order. |
299 | for (Instruction &I : |
300 | llvm::make_early_inc_range(Range: llvm::drop_begin(RangeOrContainer: llvm::reverse(C&: *OriginalBB)))) |
301 | if (tryToSinkInstruction(I: &I, DestBlock: IndirectCallBB)) |
302 | SinkCount++; |
303 | |
304 | return SinkCount; |
305 | } |
306 | |
307 | // Promote indirect calls to conditional direct calls, keeping track of |
308 | // thresholds. |
309 | class IndirectCallPromoter { |
310 | private: |
311 | Function &F; |
312 | Module &M; |
313 | |
314 | // Symtab that maps indirect call profile values to function names and |
315 | // defines. |
316 | InstrProfSymtab *const Symtab; |
317 | |
318 | const bool SamplePGO; |
319 | |
320 | // A map from a virtual call to its type information. |
321 | const VirtualCallSiteTypeInfoMap &VirtualCSInfo; |
322 | |
323 | VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal; |
324 | |
325 | OptimizationRemarkEmitter &ORE; |
326 | |
327 | const DenseSet<StringRef> &IgnoredBaseTypes; |
328 | |
329 | // A struct that records the direct target and it's call count. |
330 | struct PromotionCandidate { |
331 | Function *const TargetFunction; |
332 | const uint64_t Count; |
333 | |
334 | // The following fields only exists for promotion candidates with vtable |
335 | // information. |
336 | // |
337 | // Due to class inheritance, one virtual call candidate can come from |
338 | // multiple vtables. `VTableGUIDAndCounts` tracks the vtable GUIDs and |
339 | // counts for 'TargetFunction'. `AddressPoints` stores the vtable address |
340 | // points for comparison. |
341 | VTableGUIDCountsMap VTableGUIDAndCounts; |
342 | SmallVector<Constant *> AddressPoints; |
343 | |
344 | PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {} |
345 | }; |
346 | |
347 | // Check if the indirect-call call site should be promoted. Return the number |
348 | // of promotions. Inst is the candidate indirect call, ValueDataRef |
349 | // contains the array of value profile data for profiled targets, |
350 | // TotalCount is the total profiled count of call executions, and |
351 | // NumCandidates is the number of candidate entries in ValueDataRef. |
352 | std::vector<PromotionCandidate> getPromotionCandidatesForCallSite( |
353 | const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef, |
354 | uint64_t TotalCount, uint32_t NumCandidates); |
355 | |
356 | // Promote a list of targets for one indirect-call callsite by comparing |
357 | // indirect callee with functions. Return true if there are IR |
358 | // transformations and false otherwise. |
359 | bool tryToPromoteWithFuncCmp(CallBase &CB, Instruction *VPtr, |
360 | ArrayRef<PromotionCandidate> Candidates, |
361 | uint64_t TotalCount, |
362 | ArrayRef<InstrProfValueData> ICallProfDataRef, |
363 | uint32_t NumCandidates, |
364 | VTableGUIDCountsMap &VTableGUIDCounts); |
365 | |
366 | // Promote a list of targets for one indirect call by comparing vtables with |
367 | // functions. Return true if there are IR transformations and false |
368 | // otherwise. |
369 | bool tryToPromoteWithVTableCmp( |
370 | CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates, |
371 | uint64_t TotalFuncCount, uint32_t NumCandidates, |
372 | MutableArrayRef<InstrProfValueData> ICallProfDataRef, |
373 | VTableGUIDCountsMap &VTableGUIDCounts); |
374 | |
375 | // Return true if it's profitable to compare vtables for the callsite. |
376 | bool isProfitableToCompareVTables(const CallBase &CB, |
377 | ArrayRef<PromotionCandidate> Candidates); |
378 | |
379 | // Return true if the vtable corresponding to VTableGUID should be skipped |
380 | // for vtable-based comparison. |
381 | bool shouldSkipVTable(uint64_t VTableGUID); |
382 | |
383 | // Given an indirect callsite and the list of function candidates, compute |
384 | // the following vtable information in output parameters and return vtable |
385 | // pointer if type profiles exist. |
386 | // - Populate `VTableGUIDCounts` with <vtable-guid, count> using !prof |
387 | // metadata attached on the vtable pointer. |
388 | // - For each function candidate, finds out the vtables from which it gets |
389 | // called and stores the <vtable-guid, count> in promotion candidate. |
390 | Instruction *computeVTableInfos(const CallBase *CB, |
391 | VTableGUIDCountsMap &VTableGUIDCounts, |
392 | std::vector<PromotionCandidate> &Candidates); |
393 | |
394 | Constant *getOrCreateVTableAddressPointVar(GlobalVariable *GV, |
395 | uint64_t AddressPointOffset); |
396 | |
397 | void updateFuncValueProfiles(CallBase &CB, ArrayRef<InstrProfValueData> VDs, |
398 | uint64_t Sum, uint32_t MaxMDCount); |
399 | |
400 | void updateVPtrValueProfiles(Instruction *VPtr, |
401 | VTableGUIDCountsMap &VTableGUIDCounts); |
402 | |
403 | public: |
404 | IndirectCallPromoter( |
405 | Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO, |
406 | const VirtualCallSiteTypeInfoMap &VirtualCSInfo, |
407 | VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal, |
408 | const DenseSet<StringRef> &IgnoredBaseTypes, |
409 | OptimizationRemarkEmitter &ORE) |
410 | : F(Func), M(M), Symtab(Symtab), SamplePGO(SamplePGO), |
411 | VirtualCSInfo(VirtualCSInfo), |
412 | VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE), |
413 | IgnoredBaseTypes(IgnoredBaseTypes) {} |
414 | IndirectCallPromoter(const IndirectCallPromoter &) = delete; |
415 | IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete; |
416 | |
417 | bool processFunction(ProfileSummaryInfo *PSI); |
418 | }; |
419 | |
420 | } // end anonymous namespace |
421 | |
422 | // Indirect-call promotion heuristic. The direct targets are sorted based on |
423 | // the count. Stop at the first target that is not promoted. |
424 | std::vector<IndirectCallPromoter::PromotionCandidate> |
425 | IndirectCallPromoter::getPromotionCandidatesForCallSite( |
426 | const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef, |
427 | uint64_t TotalCount, uint32_t NumCandidates) { |
428 | std::vector<PromotionCandidate> Ret; |
429 | |
430 | LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB |
431 | << " Num_targets: " << ValueDataRef.size() |
432 | << " Num_candidates: " << NumCandidates << "\n" ); |
433 | NumOfPGOICallsites++; |
434 | if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) { |
435 | LLVM_DEBUG(dbgs() << " Skip: User options.\n" ); |
436 | return Ret; |
437 | } |
438 | |
439 | for (uint32_t I = 0; I < NumCandidates; I++) { |
440 | uint64_t Count = ValueDataRef[I].Count; |
441 | assert(Count <= TotalCount); |
442 | (void)TotalCount; |
443 | uint64_t Target = ValueDataRef[I].Value; |
444 | LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count |
445 | << " Target_func: " << Target << "\n" ); |
446 | |
447 | if (ICPInvokeOnly && isa<CallInst>(Val: CB)) { |
448 | LLVM_DEBUG(dbgs() << " Not promote: User options.\n" ); |
449 | ORE.emit(RemarkBuilder: [&]() { |
450 | return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions" , &CB) |
451 | << " Not promote: User options" ; |
452 | }); |
453 | break; |
454 | } |
455 | if (ICPCallOnly && isa<InvokeInst>(Val: CB)) { |
456 | LLVM_DEBUG(dbgs() << " Not promote: User option.\n" ); |
457 | ORE.emit(RemarkBuilder: [&]() { |
458 | return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions" , &CB) |
459 | << " Not promote: User options" ; |
460 | }); |
461 | break; |
462 | } |
463 | if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { |
464 | LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n" ); |
465 | ORE.emit(RemarkBuilder: [&]() { |
466 | return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached" , &CB) |
467 | << " Not promote: Cutoff reached" ; |
468 | }); |
469 | break; |
470 | } |
471 | |
472 | // Don't promote if the symbol is not defined in the module. This avoids |
473 | // creating a reference to a symbol that doesn't exist in the module |
474 | // This can happen when we compile with a sample profile collected from |
475 | // one binary but used for another, which may have profiled targets that |
476 | // aren't used in the new binary. We might have a declaration initially in |
477 | // the case where the symbol is globally dead in the binary and removed by |
478 | // ThinLTO. |
479 | Function *TargetFunction = Symtab->getFunction(FuncMD5Hash: Target); |
480 | if (TargetFunction == nullptr || TargetFunction->isDeclaration()) { |
481 | LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n" ); |
482 | ORE.emit(RemarkBuilder: [&]() { |
483 | return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget" , &CB) |
484 | << "Cannot promote indirect call: target with md5sum " |
485 | << ore::NV("target md5sum" , Target) << " not found" ; |
486 | }); |
487 | break; |
488 | } |
489 | |
490 | const char *Reason = nullptr; |
491 | if (!isLegalToPromote(CB, Callee: TargetFunction, FailureReason: &Reason)) { |
492 | using namespace ore; |
493 | |
494 | ORE.emit(RemarkBuilder: [&]() { |
495 | return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote" , &CB) |
496 | << "Cannot promote indirect call to " |
497 | << NV("TargetFunction" , TargetFunction) << " with count of " |
498 | << NV("Count" , Count) << ": " << Reason; |
499 | }); |
500 | break; |
501 | } |
502 | |
503 | Ret.push_back(x: PromotionCandidate(TargetFunction, Count)); |
504 | TotalCount -= Count; |
505 | } |
506 | return Ret; |
507 | } |
508 | |
509 | Constant *IndirectCallPromoter::getOrCreateVTableAddressPointVar( |
510 | GlobalVariable *GV, uint64_t AddressPointOffset) { |
511 | auto [Iter, Inserted] = |
512 | VTableAddressPointOffsetVal[GV].try_emplace(k: AddressPointOffset, args: nullptr); |
513 | if (Inserted) |
514 | Iter->second = getVTableAddressPointOffset(VTable: GV, AddressPointOffset); |
515 | return Iter->second; |
516 | } |
517 | |
518 | Instruction *IndirectCallPromoter::computeVTableInfos( |
519 | const CallBase *CB, VTableGUIDCountsMap &GUIDCountsMap, |
520 | std::vector<PromotionCandidate> &Candidates) { |
521 | if (!EnableVTableProfileUse) |
522 | return nullptr; |
523 | |
524 | // Take the following code sequence as an example, here is how the code works |
525 | // @vtable1 = {[n x ptr] [... ptr @func1]} |
526 | // @vtable2 = {[m x ptr] [... ptr @func2]} |
527 | // |
528 | // %vptr = load ptr, ptr %d, !prof !0 |
529 | // %0 = tail call i1 @llvm.type.test(ptr %vptr, metadata !"vtable1") |
530 | // tail call void @llvm.assume(i1 %0) |
531 | // %vfn = getelementptr inbounds ptr, ptr %vptr, i64 1 |
532 | // %1 = load ptr, ptr %vfn |
533 | // call void %1(ptr %d), !prof !1 |
534 | // |
535 | // !0 = !{!"VP", i32 2, i64 100, i64 123, i64 50, i64 456, i64 50} |
536 | // !1 = !{!"VP", i32 0, i64 100, i64 789, i64 50, i64 579, i64 50} |
537 | // |
538 | // Step 1. Find out the %vptr instruction for indirect call and use its !prof |
539 | // to populate `GUIDCountsMap`. |
540 | // Step 2. For each vtable-guid, look up its definition from symtab. LTO can |
541 | // make vtable definitions visible across modules. |
542 | // Step 3. Compute the byte offset of the virtual call, by adding vtable |
543 | // address point offset and function's offset relative to vtable address |
544 | // point. For each function candidate, this step tells us the vtable from |
545 | // which it comes from, and the vtable address point to compare %vptr with. |
546 | |
547 | // Only virtual calls have virtual call site info. |
548 | auto Iter = VirtualCSInfo.find(Val: CB); |
549 | if (Iter == VirtualCSInfo.end()) |
550 | return nullptr; |
551 | |
552 | LLVM_DEBUG(dbgs() << "\nComputing vtable infos for callsite #" |
553 | << NumOfPGOICallsites << "\n" ); |
554 | |
555 | const auto &VirtualCallInfo = Iter->second; |
556 | Instruction *VPtr = VirtualCallInfo.VPtr; |
557 | |
558 | SmallDenseMap<Function *, int, 4> CalleeIndexMap; |
559 | for (size_t I = 0; I < Candidates.size(); I++) |
560 | CalleeIndexMap[Candidates[I].TargetFunction] = I; |
561 | |
562 | uint64_t TotalVTableCount = 0; |
563 | auto VTableValueDataArray = |
564 | getValueProfDataFromInst(Inst: *VirtualCallInfo.VPtr, ValueKind: IPVK_VTableTarget, |
565 | MaxNumValueData: MaxNumVTableAnnotations, TotalC&: TotalVTableCount); |
566 | if (VTableValueDataArray.empty()) |
567 | return VPtr; |
568 | |
569 | // Compute the functions and counts from by each vtable. |
570 | for (const auto &V : VTableValueDataArray) { |
571 | uint64_t VTableVal = V.Value; |
572 | GUIDCountsMap[VTableVal] = V.Count; |
573 | GlobalVariable *VTableVar = Symtab->getGlobalVariable(MD5Hash: VTableVal); |
574 | if (!VTableVar) { |
575 | LLVM_DEBUG(dbgs() << " Cannot find vtable definition for " << VTableVal |
576 | << "; maybe the vtable isn't imported\n" ); |
577 | continue; |
578 | } |
579 | |
580 | std::optional<uint64_t> MaybeAddressPointOffset = |
581 | getAddressPointOffset(VTableVar: *VTableVar, CompatibleType: VirtualCallInfo.CompatibleTypeStr); |
582 | if (!MaybeAddressPointOffset) |
583 | continue; |
584 | |
585 | const uint64_t AddressPointOffset = *MaybeAddressPointOffset; |
586 | |
587 | Function *Callee = nullptr; |
588 | std::tie(args&: Callee, args: std::ignore) = getFunctionAtVTableOffset( |
589 | GV: VTableVar, Offset: AddressPointOffset + VirtualCallInfo.FunctionOffset, M); |
590 | if (!Callee) |
591 | continue; |
592 | auto CalleeIndexIter = CalleeIndexMap.find(Val: Callee); |
593 | if (CalleeIndexIter == CalleeIndexMap.end()) |
594 | continue; |
595 | |
596 | auto &Candidate = Candidates[CalleeIndexIter->second]; |
597 | // There shouldn't be duplicate GUIDs in one !prof metadata (except |
598 | // duplicated zeros), so assign counters directly won't cause overwrite or |
599 | // counter loss. |
600 | Candidate.VTableGUIDAndCounts[VTableVal] = V.Count; |
601 | Candidate.AddressPoints.push_back( |
602 | Elt: getOrCreateVTableAddressPointVar(GV: VTableVar, AddressPointOffset)); |
603 | } |
604 | |
605 | return VPtr; |
606 | } |
607 | |
608 | // Creates 'branch_weights' prof metadata using TrueWeight and FalseWeight. |
609 | // Scales uint64_t counters down to uint32_t if necessary to prevent overflow. |
610 | static MDNode *createBranchWeights(LLVMContext &Context, uint64_t TrueWeight, |
611 | uint64_t FalseWeight) { |
612 | MDBuilder MDB(Context); |
613 | uint64_t Scale = calculateCountScale(MaxCount: std::max(a: TrueWeight, b: FalseWeight)); |
614 | return MDB.createBranchWeights(TrueWeight: scaleBranchCount(Count: TrueWeight, Scale), |
615 | FalseWeight: scaleBranchCount(Count: FalseWeight, Scale)); |
616 | } |
617 | |
618 | CallBase &llvm::pgo::(CallBase &CB, Function *DirectCallee, |
619 | uint64_t Count, uint64_t TotalCount, |
620 | bool AttachProfToDirectCall, |
621 | OptimizationRemarkEmitter *ORE) { |
622 | CallBase &NewInst = promoteCallWithIfThenElse( |
623 | CB, Callee: DirectCallee, |
624 | BranchWeights: createBranchWeights(Context&: CB.getContext(), TrueWeight: Count, FalseWeight: TotalCount - Count)); |
625 | |
626 | if (AttachProfToDirectCall) |
627 | setBranchWeights(I&: NewInst, Weights: {static_cast<uint32_t>(Count)}, |
628 | /*IsExpected=*/false); |
629 | |
630 | using namespace ore; |
631 | |
632 | if (ORE) |
633 | ORE->emit(RemarkBuilder: [&]() { |
634 | return OptimizationRemark(DEBUG_TYPE, "Promoted" , &CB) |
635 | << "Promote indirect call to " << NV("DirectCallee" , DirectCallee) |
636 | << " with count " << NV("Count" , Count) << " out of " |
637 | << NV("TotalCount" , TotalCount); |
638 | }); |
639 | return NewInst; |
640 | } |
641 | |
642 | // Promote indirect-call to conditional direct-call for one callsite. |
643 | bool IndirectCallPromoter::tryToPromoteWithFuncCmp( |
644 | CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates, |
645 | uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef, |
646 | uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts) { |
647 | uint32_t NumPromoted = 0; |
648 | |
649 | for (const auto &C : Candidates) { |
650 | uint64_t FuncCount = C.Count; |
651 | pgo::promoteIndirectCall(CB, DirectCallee: C.TargetFunction, Count: FuncCount, TotalCount, |
652 | AttachProfToDirectCall: SamplePGO, ORE: &ORE); |
653 | assert(TotalCount >= FuncCount); |
654 | TotalCount -= FuncCount; |
655 | NumOfPGOICallPromotion++; |
656 | NumPromoted++; |
657 | |
658 | if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty()) |
659 | continue; |
660 | |
661 | // After a virtual call candidate gets promoted, update the vtable's counts |
662 | // proportionally. Each vtable-guid in `C.VTableGUIDAndCounts` represents |
663 | // a vtable from which the virtual call is loaded. Compute the sum and use |
664 | // 128-bit APInt to improve accuracy. |
665 | uint64_t SumVTableCount = 0; |
666 | for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) |
667 | SumVTableCount += VTableCount; |
668 | |
669 | for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) { |
670 | APInt APFuncCount((unsigned)128, FuncCount, false /*signed*/); |
671 | APFuncCount *= VTableCount; |
672 | VTableGUIDCounts[GUID] -= APFuncCount.udiv(RHS: SumVTableCount).getZExtValue(); |
673 | } |
674 | } |
675 | if (NumPromoted == 0) |
676 | return false; |
677 | |
678 | assert(NumPromoted <= ICallProfDataRef.size() && |
679 | "Number of promoted functions should not be greater than the number " |
680 | "of values in profile metadata" ); |
681 | |
682 | // Update value profiles on the indirect call. |
683 | updateFuncValueProfiles(CB, VDs: ICallProfDataRef.slice(N: NumPromoted), Sum: TotalCount, |
684 | MaxMDCount: NumCandidates); |
685 | updateVPtrValueProfiles(VPtr, VTableGUIDCounts); |
686 | return true; |
687 | } |
688 | |
689 | void IndirectCallPromoter::updateFuncValueProfiles( |
690 | CallBase &CB, ArrayRef<InstrProfValueData> CallVDs, uint64_t TotalCount, |
691 | uint32_t MaxMDCount) { |
692 | // First clear the existing !prof. |
693 | CB.setMetadata(KindID: LLVMContext::MD_prof, Node: nullptr); |
694 | // Annotate the remaining value profiles if counter is not zero. |
695 | if (TotalCount != 0) |
696 | annotateValueSite(M, Inst&: CB, VDs: CallVDs, Sum: TotalCount, ValueKind: IPVK_IndirectCallTarget, |
697 | MaxMDCount); |
698 | } |
699 | |
700 | void IndirectCallPromoter::updateVPtrValueProfiles( |
701 | Instruction *VPtr, VTableGUIDCountsMap &VTableGUIDCounts) { |
702 | if (!EnableVTableProfileUse || VPtr == nullptr || |
703 | !VPtr->getMetadata(KindID: LLVMContext::MD_prof)) |
704 | return; |
705 | VPtr->setMetadata(KindID: LLVMContext::MD_prof, Node: nullptr); |
706 | std::vector<InstrProfValueData> VTableValueProfiles; |
707 | uint64_t TotalVTableCount = 0; |
708 | for (auto [GUID, Count] : VTableGUIDCounts) { |
709 | if (Count == 0) |
710 | continue; |
711 | |
712 | VTableValueProfiles.push_back(x: {.Value: GUID, .Count: Count}); |
713 | TotalVTableCount += Count; |
714 | } |
715 | llvm::sort(C&: VTableValueProfiles, |
716 | Comp: [](const InstrProfValueData &LHS, const InstrProfValueData &RHS) { |
717 | return LHS.Count > RHS.Count; |
718 | }); |
719 | |
720 | annotateValueSite(M, Inst&: *VPtr, VDs: VTableValueProfiles, Sum: TotalVTableCount, |
721 | ValueKind: IPVK_VTableTarget, MaxMDCount: VTableValueProfiles.size()); |
722 | } |
723 | |
724 | bool IndirectCallPromoter::tryToPromoteWithVTableCmp( |
725 | CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates, |
726 | uint64_t TotalFuncCount, uint32_t NumCandidates, |
727 | MutableArrayRef<InstrProfValueData> ICallProfDataRef, |
728 | VTableGUIDCountsMap &VTableGUIDCounts) { |
729 | SmallVector<uint64_t, 4> PromotedFuncCount; |
730 | |
731 | for (const auto &Candidate : Candidates) { |
732 | for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts) |
733 | VTableGUIDCounts[GUID] -= Count; |
734 | |
735 | // 'OriginalBB' is the basic block of indirect call. After each candidate |
736 | // is promoted, a new basic block is created for the indirect fallback basic |
737 | // block and indirect call `CB` is moved into this new BB. |
738 | BasicBlock *OriginalBB = CB.getParent(); |
739 | promoteCallWithVTableCmp( |
740 | CB, VPtr, Callee: Candidate.TargetFunction, AddressPoints: Candidate.AddressPoints, |
741 | BranchWeights: createBranchWeights(Context&: CB.getContext(), TrueWeight: Candidate.Count, |
742 | FalseWeight: TotalFuncCount - Candidate.Count)); |
743 | |
744 | int SinkCount = tryToSinkInstructions(OriginalBB, IndirectCallBB: CB.getParent()); |
745 | |
746 | ORE.emit(RemarkBuilder: [&]() { |
747 | OptimizationRemark (DEBUG_TYPE, "Promoted" , &CB); |
748 | |
749 | const auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts; |
750 | Remark << "Promote indirect call to " |
751 | << ore::NV("DirectCallee" , Candidate.TargetFunction) |
752 | << " with count " << ore::NV("Count" , Candidate.Count) |
753 | << " out of " << ore::NV("TotalCount" , TotalFuncCount) << ", sink " |
754 | << ore::NV("SinkCount" , SinkCount) |
755 | << " instruction(s) and compare " |
756 | << ore::NV("VTable" , VTableGUIDAndCounts.size()) |
757 | << " vtable(s): {" ; |
758 | |
759 | // Sort GUIDs so remark message is deterministic. |
760 | std::set<uint64_t> GUIDSet; |
761 | for (auto [GUID, Count] : VTableGUIDAndCounts) |
762 | GUIDSet.insert(x: GUID); |
763 | for (auto Iter = GUIDSet.begin(); Iter != GUIDSet.end(); Iter++) { |
764 | if (Iter != GUIDSet.begin()) |
765 | Remark << ", " ; |
766 | Remark << ore::NV("VTable" , Symtab->getGlobalVariable(MD5Hash: *Iter)); |
767 | } |
768 | |
769 | Remark << "}" ; |
770 | |
771 | return Remark; |
772 | }); |
773 | |
774 | PromotedFuncCount.push_back(Elt: Candidate.Count); |
775 | |
776 | assert(TotalFuncCount >= Candidate.Count && |
777 | "Within one prof metadata, total count is the sum of counts from " |
778 | "individual <target, count> pairs" ); |
779 | // Use std::min since 'TotalFuncCount' is the saturated sum of individual |
780 | // counts, see |
781 | // https://github.com/llvm/llvm-project/blob/abedb3b8356d5d56f1c575c4f7682fba2cb19787/llvm/lib/ProfileData/InstrProf.cpp#L1281-L1288 |
782 | TotalFuncCount -= std::min(a: TotalFuncCount, b: Candidate.Count); |
783 | NumOfPGOICallPromotion++; |
784 | } |
785 | |
786 | if (PromotedFuncCount.empty()) |
787 | return false; |
788 | |
789 | // Update value profiles for 'CB' and 'VPtr', assuming that each 'CB' has a |
790 | // a distinct 'VPtr'. |
791 | // FIXME: When Clang `-fstrict-vtable-pointers` is enabled, a vtable might be |
792 | // used to load multiple virtual functions. The vtable profiles needs to be |
793 | // updated properly in that case (e.g, for each indirect call annotate both |
794 | // type profiles and function profiles in one !prof). |
795 | for (size_t I = 0; I < PromotedFuncCount.size(); I++) |
796 | ICallProfDataRef[I].Count -= |
797 | std::max(a: PromotedFuncCount[I], b: ICallProfDataRef[I].Count); |
798 | // Sort value profiles by count in descending order. |
799 | llvm::stable_sort(Range&: ICallProfDataRef, C: [](const InstrProfValueData &LHS, |
800 | const InstrProfValueData &RHS) { |
801 | return LHS.Count > RHS.Count; |
802 | }); |
803 | // Drop the <target-value, count> pair if count is zero. |
804 | ArrayRef<InstrProfValueData> VDs( |
805 | ICallProfDataRef.begin(), |
806 | llvm::upper_bound(Range&: ICallProfDataRef, Value: 0U, |
807 | C: [](uint64_t Count, const InstrProfValueData &ProfData) { |
808 | return ProfData.Count <= Count; |
809 | })); |
810 | updateFuncValueProfiles(CB, CallVDs: VDs, TotalCount: TotalFuncCount, MaxMDCount: NumCandidates); |
811 | updateVPtrValueProfiles(VPtr, VTableGUIDCounts); |
812 | return true; |
813 | } |
814 | |
815 | // Traverse all the indirect-call callsite and get the value profile |
816 | // annotation to perform indirect-call promotion. |
817 | bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) { |
818 | bool Changed = false; |
819 | ICallPromotionAnalysis ICallAnalysis; |
820 | for (auto *CB : findIndirectCalls(F)) { |
821 | uint32_t NumCandidates; |
822 | uint64_t TotalCount; |
823 | auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction( |
824 | I: CB, TotalCount, NumCandidates); |
825 | if (!NumCandidates || |
826 | (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(C: TotalCount))) |
827 | continue; |
828 | |
829 | auto PromotionCandidates = getPromotionCandidatesForCallSite( |
830 | CB: *CB, ValueDataRef: ICallProfDataRef, TotalCount, NumCandidates); |
831 | |
832 | VTableGUIDCountsMap VTableGUIDCounts; |
833 | Instruction *VPtr = |
834 | computeVTableInfos(CB, GUIDCountsMap&: VTableGUIDCounts, Candidates&: PromotionCandidates); |
835 | |
836 | if (isProfitableToCompareVTables(CB: *CB, Candidates: PromotionCandidates)) |
837 | Changed |= tryToPromoteWithVTableCmp(CB&: *CB, VPtr, Candidates: PromotionCandidates, |
838 | TotalFuncCount: TotalCount, NumCandidates, |
839 | ICallProfDataRef, VTableGUIDCounts); |
840 | else |
841 | Changed |= tryToPromoteWithFuncCmp(CB&: *CB, VPtr, Candidates: PromotionCandidates, |
842 | TotalCount, ICallProfDataRef, |
843 | NumCandidates, VTableGUIDCounts); |
844 | } |
845 | return Changed; |
846 | } |
847 | |
848 | // TODO: Return false if the function addressing and vtable load instructions |
849 | // cannot sink to indirect fallback. |
850 | bool IndirectCallPromoter::isProfitableToCompareVTables( |
851 | const CallBase &CB, ArrayRef<PromotionCandidate> Candidates) { |
852 | if (!EnableVTableProfileUse || Candidates.empty()) |
853 | return false; |
854 | LLVM_DEBUG(dbgs() << "\nEvaluating vtable profitability for callsite #" |
855 | << NumOfPGOICallsites << CB << "\n" ); |
856 | const size_t CandidateSize = Candidates.size(); |
857 | for (size_t I = 0; I < CandidateSize; I++) { |
858 | auto &Candidate = Candidates[I]; |
859 | auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts; |
860 | |
861 | LLVM_DEBUG({ |
862 | dbgs() << " Candidate " << I << " FunctionCount: " << Candidate.Count |
863 | << ", VTableCounts:" ; |
864 | for (const auto &[GUID, Count] : VTableGUIDAndCounts) |
865 | dbgs() << " {" << Symtab->getGlobalVariable(GUID)->getName() << ", " |
866 | << Count << "}" ; |
867 | dbgs() << "\n" ; |
868 | }); |
869 | |
870 | uint64_t CandidateVTableCount = 0; |
871 | |
872 | for (auto &[GUID, Count] : VTableGUIDAndCounts) { |
873 | CandidateVTableCount += Count; |
874 | |
875 | if (shouldSkipVTable(VTableGUID: GUID)) |
876 | return false; |
877 | } |
878 | |
879 | if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) { |
880 | LLVM_DEBUG( |
881 | dbgs() << " function count " << Candidate.Count |
882 | << " and its vtable sum count " << CandidateVTableCount |
883 | << " have discrepancies. Bail out vtable comparison.\n" ); |
884 | return false; |
885 | } |
886 | |
887 | // 'MaxNumVTable' limits the number of vtables to make vtable comparison |
888 | // profitable. Comparing multiple vtables for one function candidate will |
889 | // insert additional instructions on the hot path, and allowing more than |
890 | // one vtable for non last candidates may or may not elongate the dependency |
891 | // chain for the subsequent candidates. Set its value to 1 for non-last |
892 | // candidate and allow option to override it for the last candidate. |
893 | int MaxNumVTable = 1; |
894 | if (I == CandidateSize - 1) |
895 | MaxNumVTable = ICPMaxNumVTableLastCandidate; |
896 | |
897 | if ((int)Candidate.AddressPoints.size() > MaxNumVTable) { |
898 | LLVM_DEBUG(dbgs() << " allow at most " << MaxNumVTable << " and got " |
899 | << Candidate.AddressPoints.size() |
900 | << " vtables. Bail out for vtable comparison.\n" ); |
901 | return false; |
902 | } |
903 | } |
904 | |
905 | return true; |
906 | } |
907 | |
908 | bool IndirectCallPromoter::shouldSkipVTable(uint64_t VTableGUID) { |
909 | if (IgnoredBaseTypes.empty()) |
910 | return false; |
911 | |
912 | auto *VTableVar = Symtab->getGlobalVariable(MD5Hash: VTableGUID); |
913 | |
914 | assert(VTableVar && "VTableVar must exist for GUID in VTableGUIDAndCounts" ); |
915 | |
916 | SmallVector<MDNode *, 2> Types; |
917 | VTableVar->getMetadata(KindID: LLVMContext::MD_type, MDs&: Types); |
918 | |
919 | for (auto *Type : Types) |
920 | if (auto *TypeId = dyn_cast<MDString>(Val: Type->getOperand(I: 1).get())) |
921 | if (IgnoredBaseTypes.contains(V: TypeId->getString())) { |
922 | LLVM_DEBUG(dbgs() << " vtable profiles should be ignored. Bail " |
923 | "out of vtable comparison." ); |
924 | return true; |
925 | } |
926 | return false; |
927 | } |
928 | |
929 | // For virtual calls in the module, collect per-callsite information which will |
930 | // be used to associate an ICP candidate with a vtable and a specific function |
931 | // in the vtable. With type intrinsics (llvm.type.test), we can find virtual |
932 | // calls in a compile-time efficient manner (by iterating its users) and more |
933 | // importantly use the compatible type later to figure out the function byte |
934 | // offset relative to the start of vtables. |
935 | static void |
936 | computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM, |
937 | VirtualCallSiteTypeInfoMap &VirtualCSInfo) { |
938 | // Right now only llvm.type.test is used to find out virtual call sites. |
939 | // With ThinLTO and whole-program-devirtualization, llvm.type.test and |
940 | // llvm.public.type.test are emitted, and llvm.public.type.test is either |
941 | // refined to llvm.type.test or dropped before indirect-call-promotion pass. |
942 | // |
943 | // FIXME: For fullLTO with VFE, `llvm.type.checked.load intrinsic` is emitted. |
944 | // Find out virtual calls by looking at users of llvm.type.checked.load in |
945 | // that case. |
946 | Function *TypeTestFunc = |
947 | Intrinsic::getDeclarationIfExists(M: &M, id: Intrinsic::type_test); |
948 | if (!TypeTestFunc || TypeTestFunc->use_empty()) |
949 | return; |
950 | |
951 | auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager(); |
952 | auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & { |
953 | return FAM.getResult<DominatorTreeAnalysis>(IR&: F); |
954 | }; |
955 | // Iterate all type.test calls to find all indirect calls. |
956 | for (Use &U : llvm::make_early_inc_range(Range: TypeTestFunc->uses())) { |
957 | auto *CI = dyn_cast<CallInst>(Val: U.getUser()); |
958 | if (!CI) |
959 | continue; |
960 | auto *TypeMDVal = cast<MetadataAsValue>(Val: CI->getArgOperand(i: 1)); |
961 | if (!TypeMDVal) |
962 | continue; |
963 | auto *CompatibleTypeId = dyn_cast<MDString>(Val: TypeMDVal->getMetadata()); |
964 | if (!CompatibleTypeId) |
965 | continue; |
966 | |
967 | // Find out all devirtualizable call sites given a llvm.type.test |
968 | // intrinsic call. |
969 | SmallVector<DevirtCallSite, 1> DevirtCalls; |
970 | SmallVector<CallInst *, 1> Assumes; |
971 | auto &DT = LookupDomTree(*CI->getFunction()); |
972 | findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT); |
973 | |
974 | for (auto &DevirtCall : DevirtCalls) { |
975 | CallBase &CB = DevirtCall.CB; |
976 | // Given an indirect call, try find the instruction which loads a |
977 | // pointer to virtual table. |
978 | Instruction *VTablePtr = |
979 | PGOIndirectCallVisitor::tryGetVTableInstruction(CB: &CB); |
980 | if (!VTablePtr) |
981 | continue; |
982 | VirtualCSInfo[&CB] = {.FunctionOffset: DevirtCall.Offset, .VPtr: VTablePtr, |
983 | .CompatibleTypeStr: CompatibleTypeId->getString()}; |
984 | } |
985 | } |
986 | } |
987 | |
988 | // A wrapper function that does the actual work. |
989 | static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, |
990 | bool SamplePGO, ModuleAnalysisManager &MAM) { |
991 | if (DisableICP) |
992 | return false; |
993 | InstrProfSymtab Symtab; |
994 | if (Error E = Symtab.create(M, InLTO)) { |
995 | std::string SymtabFailure = toString(E: std::move(E)); |
996 | M.getContext().emitError(ErrorStr: "Failed to create symtab: " + SymtabFailure); |
997 | return false; |
998 | } |
999 | bool Changed = false; |
1000 | VirtualCallSiteTypeInfoMap VirtualCSInfo; |
1001 | |
1002 | DenseSet<StringRef> IgnoredBaseTypes; |
1003 | |
1004 | if (EnableVTableProfileUse) { |
1005 | computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo); |
1006 | |
1007 | IgnoredBaseTypes.insert_range(R&: ICPIgnoredBaseTypes); |
1008 | } |
1009 | |
1010 | // VTableAddressPointOffsetVal stores the vtable address points. The vtable |
1011 | // address point of a given <vtable, address point offset> is static (doesn't |
1012 | // change after being computed once). |
1013 | // IndirectCallPromoter::getOrCreateVTableAddressPointVar creates the map |
1014 | // entry the first time a <vtable, offset> pair is seen, as |
1015 | // promoteIndirectCalls processes an IR module and calls IndirectCallPromoter |
1016 | // repeatedly on each function. |
1017 | VTableAddressPointOffsetValMap VTableAddressPointOffsetVal; |
1018 | |
1019 | for (auto &F : M) { |
1020 | if (F.isDeclaration() || F.hasOptNone()) |
1021 | continue; |
1022 | |
1023 | auto &FAM = |
1024 | MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager(); |
1025 | auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F); |
1026 | |
1027 | IndirectCallPromoter CallPromoter(F, M, &Symtab, SamplePGO, VirtualCSInfo, |
1028 | VTableAddressPointOffsetVal, |
1029 | IgnoredBaseTypes, ORE); |
1030 | bool FuncChanged = CallPromoter.processFunction(PSI); |
1031 | if (ICPDUMPAFTER && FuncChanged) { |
1032 | LLVM_DEBUG(dbgs() << "\n== IR Dump After ==" ; F.print(dbgs())); |
1033 | LLVM_DEBUG(dbgs() << "\n" ); |
1034 | } |
1035 | Changed |= FuncChanged; |
1036 | if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { |
1037 | LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n" ); |
1038 | break; |
1039 | } |
1040 | } |
1041 | return Changed; |
1042 | } |
1043 | |
1044 | PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, |
1045 | ModuleAnalysisManager &MAM) { |
1046 | ProfileSummaryInfo *PSI = &MAM.getResult<ProfileSummaryAnalysis>(IR&: M); |
1047 | |
1048 | if (!promoteIndirectCalls(M, PSI, InLTO: InLTO | ICPLTOMode, |
1049 | SamplePGO: SamplePGO | ICPSamplePGOMode, MAM)) |
1050 | return PreservedAnalyses::all(); |
1051 | |
1052 | return PreservedAnalyses::none(); |
1053 | } |
1054 | |