1 | //===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the transformation that promotes indirect calls to |
10 | // conditional direct calls when the indirect-call value profile metadata is |
11 | // available. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "llvm/ADT/ArrayRef.h" |
16 | #include "llvm/ADT/DenseMap.h" |
17 | #include "llvm/ADT/Statistic.h" |
18 | #include "llvm/ADT/StringRef.h" |
19 | #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" |
20 | #include "llvm/Analysis/IndirectCallVisitor.h" |
21 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
22 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
23 | #include "llvm/Analysis/TypeMetadataUtils.h" |
24 | #include "llvm/IR/DiagnosticInfo.h" |
25 | #include "llvm/IR/Dominators.h" |
26 | #include "llvm/IR/Function.h" |
27 | #include "llvm/IR/InstrTypes.h" |
28 | #include "llvm/IR/Instructions.h" |
29 | #include "llvm/IR/LLVMContext.h" |
30 | #include "llvm/IR/MDBuilder.h" |
31 | #include "llvm/IR/PassManager.h" |
32 | #include "llvm/IR/ProfDataUtils.h" |
33 | #include "llvm/IR/Value.h" |
34 | #include "llvm/ProfileData/InstrProf.h" |
35 | #include "llvm/Support/Casting.h" |
36 | #include "llvm/Support/CommandLine.h" |
37 | #include "llvm/Support/Debug.h" |
38 | #include "llvm/Support/Error.h" |
39 | #include "llvm/Support/raw_ostream.h" |
40 | #include "llvm/Transforms/Instrumentation.h" |
41 | #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" |
42 | #include "llvm/Transforms/Utils/CallPromotionUtils.h" |
43 | #include <cassert> |
44 | #include <cstdint> |
45 | #include <memory> |
46 | #include <set> |
47 | #include <string> |
48 | #include <unordered_map> |
49 | #include <utility> |
50 | #include <vector> |
51 | |
52 | using namespace llvm; |
53 | |
54 | #define DEBUG_TYPE "pgo-icall-prom" |
55 | |
56 | STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions." ); |
57 | STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites." ); |
58 | |
59 | extern cl::opt<unsigned> MaxNumVTableAnnotations; |
60 | |
61 | namespace llvm { |
62 | extern cl::opt<bool> EnableVTableProfileUse; |
63 | } |
64 | |
65 | // Command line option to disable indirect-call promotion with the default as |
66 | // false. This is for debug purpose. |
67 | static cl::opt<bool> DisableICP("disable-icp" , cl::init(Val: false), cl::Hidden, |
68 | cl::desc("Disable indirect call promotion" )); |
69 | |
70 | // Set the cutoff value for the promotion. If the value is other than 0, we |
71 | // stop the transformation once the total number of promotions equals the cutoff |
72 | // value. |
73 | // For debug use only. |
74 | static cl::opt<unsigned> |
75 | ICPCutOff("icp-cutoff" , cl::init(Val: 0), cl::Hidden, |
76 | cl::desc("Max number of promotions for this compilation" )); |
77 | |
78 | // If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped. |
79 | // For debug use only. |
80 | static cl::opt<unsigned> |
81 | ICPCSSkip("icp-csskip" , cl::init(Val: 0), cl::Hidden, |
82 | cl::desc("Skip Callsite up to this number for this compilation" )); |
83 | |
84 | // Set if the pass is called in LTO optimization. The difference for LTO mode |
85 | // is the pass won't prefix the source module name to the internal linkage |
86 | // symbols. |
87 | static cl::opt<bool> ICPLTOMode("icp-lto" , cl::init(Val: false), cl::Hidden, |
88 | cl::desc("Run indirect-call promotion in LTO " |
89 | "mode" )); |
90 | |
91 | // Set if the pass is called in SamplePGO mode. The difference for SamplePGO |
92 | // mode is it will add prof metadatato the created direct call. |
93 | static cl::opt<bool> |
94 | ICPSamplePGOMode("icp-samplepgo" , cl::init(Val: false), cl::Hidden, |
95 | cl::desc("Run indirect-call promotion in SamplePGO mode" )); |
96 | |
97 | // If the option is set to true, only call instructions will be considered for |
98 | // transformation -- invoke instructions will be ignored. |
99 | static cl::opt<bool> |
100 | ICPCallOnly("icp-call-only" , cl::init(Val: false), cl::Hidden, |
101 | cl::desc("Run indirect-call promotion for call instructions " |
102 | "only" )); |
103 | |
104 | // If the option is set to true, only invoke instructions will be considered for |
105 | // transformation -- call instructions will be ignored. |
106 | static cl::opt<bool> ICPInvokeOnly("icp-invoke-only" , cl::init(Val: false), |
107 | cl::Hidden, |
108 | cl::desc("Run indirect-call promotion for " |
109 | "invoke instruction only" )); |
110 | |
111 | // Dump the function level IR if the transformation happened in this |
112 | // function. For debug use only. |
113 | static cl::opt<bool> |
114 | ICPDUMPAFTER("icp-dumpafter" , cl::init(Val: false), cl::Hidden, |
115 | cl::desc("Dump IR after transformation happens" )); |
116 | |
117 | // Indirect call promotion pass will fall back to function-based comparison if |
118 | // vtable-count / function-count is smaller than this threshold. |
119 | static cl::opt<float> ICPVTablePercentageThreshold( |
120 | "icp-vtable-percentage-threshold" , cl::init(Val: 0.99), cl::Hidden, |
121 | cl::desc("The percentage threshold of vtable-count / function-count for " |
122 | "cost-benefit analysis." )); |
123 | |
124 | // Although comparing vtables can save a vtable load, we may need to compare |
125 | // vtable pointer with multiple vtable address points due to class inheritance. |
126 | // Comparing with multiple vtables inserts additional instructions on hot code |
127 | // path, and doing so for an earlier candidate delays the comparisons for later |
128 | // candidates. For the last candidate, only the fallback path is affected. |
129 | // We allow multiple vtable comparison for the last function candidate and use |
130 | // the option below to cap the number of vtables. |
131 | static cl::opt<int> ICPMaxNumVTableLastCandidate( |
132 | "icp-max-num-vtable-last-candidate" , cl::init(Val: 1), cl::Hidden, |
133 | cl::desc("The maximum number of vtable for the last candidate." )); |
134 | |
135 | namespace { |
136 | |
137 | // The key is a vtable global variable, and the value is a map. |
138 | // In the inner map, the key represents address point offsets and the value is a |
139 | // constant for this address point. |
140 | using VTableAddressPointOffsetValMap = |
141 | SmallDenseMap<const GlobalVariable *, std::unordered_map<int, Constant *>>; |
142 | |
143 | // A struct to collect type information for a virtual call site. |
144 | struct VirtualCallSiteInfo { |
145 | // The offset from the address point to virtual function in the vtable. |
146 | uint64_t FunctionOffset; |
147 | // The instruction that computes the address point of vtable. |
148 | Instruction *VPtr; |
149 | // The compatible type used in LLVM type intrinsics. |
150 | StringRef CompatibleTypeStr; |
151 | }; |
152 | |
153 | // The key is a virtual call, and value is its type information. |
154 | using VirtualCallSiteTypeInfoMap = |
155 | SmallDenseMap<const CallBase *, VirtualCallSiteInfo>; |
156 | |
157 | // The key is vtable GUID, and value is its value profile count. |
158 | using VTableGUIDCountsMap = SmallDenseMap<uint64_t, uint64_t, 16>; |
159 | |
160 | // Return the address point offset of the given compatible type. |
161 | // |
162 | // Type metadata of a vtable specifies the types that can contain a pointer to |
163 | // this vtable, for example, `Base*` can be a pointer to an derived type |
164 | // but not vice versa. See also https://llvm.org/docs/TypeMetadata.html |
165 | static std::optional<uint64_t> |
166 | getAddressPointOffset(const GlobalVariable &VTableVar, |
167 | StringRef CompatibleType) { |
168 | SmallVector<MDNode *> Types; |
169 | VTableVar.getMetadata(KindID: LLVMContext::MD_type, MDs&: Types); |
170 | |
171 | for (MDNode *Type : Types) |
172 | if (auto *TypeId = dyn_cast<MDString>(Val: Type->getOperand(I: 1).get()); |
173 | TypeId && TypeId->getString() == CompatibleType) |
174 | return cast<ConstantInt>( |
175 | Val: cast<ConstantAsMetadata>(Val: Type->getOperand(I: 0))->getValue()) |
176 | ->getZExtValue(); |
177 | |
178 | return std::nullopt; |
179 | } |
180 | |
181 | // Return a constant representing the vtable's address point specified by the |
182 | // offset. |
183 | static Constant *getVTableAddressPointOffset(GlobalVariable *VTable, |
184 | uint32_t AddressPointOffset) { |
185 | Module &M = *VTable->getParent(); |
186 | LLVMContext &Context = M.getContext(); |
187 | assert(AddressPointOffset < |
188 | M.getDataLayout().getTypeAllocSize(VTable->getValueType()) && |
189 | "Out-of-bound access" ); |
190 | |
191 | return ConstantExpr::getInBoundsGetElementPtr( |
192 | Ty: Type::getInt8Ty(C&: Context), C: VTable, |
193 | Idx: llvm::ConstantInt::get(Ty: Type::getInt32Ty(C&: Context), V: AddressPointOffset)); |
194 | } |
195 | |
196 | // Return the basic block in which Use `U` is used via its `UserInst`. |
197 | static BasicBlock *getUserBasicBlock(Use &U, Instruction *UserInst) { |
198 | if (PHINode *PN = dyn_cast<PHINode>(Val: UserInst)) |
199 | return PN->getIncomingBlock(U); |
200 | |
201 | return UserInst->getParent(); |
202 | } |
203 | |
204 | // `DestBB` is a suitable basic block to sink `Inst` into when `Inst` have users |
205 | // and all users are in `DestBB`. The caller guarantees that `Inst->getParent()` |
206 | // is the sole predecessor of `DestBB` and `DestBB` is dominated by |
207 | // `Inst->getParent()`. |
208 | static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) { |
209 | // 'BB' is used only by assert. |
210 | [[maybe_unused]] BasicBlock *BB = Inst->getParent(); |
211 | |
212 | assert(BB != DestBB && BB->getTerminator()->getNumSuccessors() == 2 && |
213 | DestBB->getUniquePredecessor() == BB && |
214 | "Guaranteed by ICP transformation" ); |
215 | |
216 | BasicBlock *UserBB = nullptr; |
217 | for (Use &Use : Inst->uses()) { |
218 | User *User = Use.getUser(); |
219 | // Do checked cast since IR verifier guarantees that the user of an |
220 | // instruction must be an instruction. See `Verifier::visitInstruction`. |
221 | Instruction *UserInst = cast<Instruction>(Val: User); |
222 | // We can sink debug or pseudo instructions together with Inst. |
223 | if (UserInst->isDebugOrPseudoInst()) |
224 | continue; |
225 | UserBB = getUserBasicBlock(U&: Use, UserInst); |
226 | // Do not sink if Inst is used in a basic block that is not DestBB. |
227 | // TODO: Sink to the common dominator of all user blocks. |
228 | if (UserBB != DestBB) |
229 | return false; |
230 | } |
231 | return UserBB != nullptr; |
232 | } |
233 | |
234 | // For the virtual call dispatch sequence, try to sink vtable load instructions |
235 | // to the cold indirect call fallback. |
236 | // FIXME: Move the sink eligibility check below to a utility function in |
237 | // Transforms/Utils/ directory. |
238 | static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { |
239 | if (!isDestBBSuitableForSink(Inst: I, DestBB: DestBlock)) |
240 | return false; |
241 | |
242 | // Do not move control-flow-involving, volatile loads, vaarg, alloca |
243 | // instructions, etc. |
244 | if (isa<PHINode>(Val: I) || I->isEHPad() || I->mayThrow() || !I->willReturn() || |
245 | isa<AllocaInst>(Val: I)) |
246 | return false; |
247 | |
248 | // Do not sink convergent call instructions. |
249 | if (const auto *C = dyn_cast<CallBase>(Val: I)) |
250 | if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent()) |
251 | return false; |
252 | |
253 | // Do not move an instruction that may write to memory. |
254 | if (I->mayWriteToMemory()) |
255 | return false; |
256 | |
257 | // We can only sink load instructions if there is nothing between the load and |
258 | // the end of block that could change the value. |
259 | if (I->mayReadFromMemory()) { |
260 | // We already know that SrcBlock is the unique predecessor of DestBlock. |
261 | for (BasicBlock::iterator Scan = std::next(x: I->getIterator()), |
262 | E = I->getParent()->end(); |
263 | Scan != E; ++Scan) { |
264 | // Note analysis analysis can tell whether two pointers can point to the |
265 | // same object in memory or not thereby find further opportunities to |
266 | // sink. |
267 | if (Scan->mayWriteToMemory()) |
268 | return false; |
269 | } |
270 | } |
271 | |
272 | BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt(); |
273 | I->moveBefore(BB&: *DestBlock, I: InsertPos); |
274 | |
275 | // TODO: Sink debug intrinsic users of I to 'DestBlock'. |
276 | // 'InstCombinerImpl::tryToSinkInstructionDbgValues' and |
277 | // 'InstCombinerImpl::tryToSinkInstructionDbgVariableRecords' already have |
278 | // the core logic to do this. |
279 | return true; |
280 | } |
281 | |
282 | // Try to sink instructions after VPtr to the indirect call fallback. |
283 | // Return the number of sunk IR instructions. |
284 | static int tryToSinkInstructions(BasicBlock *OriginalBB, |
285 | BasicBlock *IndirectCallBB) { |
286 | int SinkCount = 0; |
287 | // Do not sink across a critical edge for simplicity. |
288 | if (IndirectCallBB->getUniquePredecessor() != OriginalBB) |
289 | return SinkCount; |
290 | // Sink all eligible instructions in OriginalBB in reverse order. |
291 | for (Instruction &I : |
292 | llvm::make_early_inc_range(Range: llvm::drop_begin(RangeOrContainer: llvm::reverse(C&: *OriginalBB)))) |
293 | if (tryToSinkInstruction(I: &I, DestBlock: IndirectCallBB)) |
294 | SinkCount++; |
295 | |
296 | return SinkCount; |
297 | } |
298 | |
299 | // Promote indirect calls to conditional direct calls, keeping track of |
300 | // thresholds. |
301 | class IndirectCallPromoter { |
302 | private: |
303 | Function &F; |
304 | Module &M; |
305 | |
306 | ProfileSummaryInfo *PSI = nullptr; |
307 | |
308 | // Symtab that maps indirect call profile values to function names and |
309 | // defines. |
310 | InstrProfSymtab *const Symtab; |
311 | |
312 | const bool SamplePGO; |
313 | |
314 | // A map from a virtual call to its type information. |
315 | const VirtualCallSiteTypeInfoMap &VirtualCSInfo; |
316 | |
317 | VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal; |
318 | |
319 | OptimizationRemarkEmitter &ORE; |
320 | |
321 | // A struct that records the direct target and it's call count. |
322 | struct PromotionCandidate { |
323 | Function *const TargetFunction; |
324 | const uint64_t Count; |
325 | |
326 | // The following fields only exists for promotion candidates with vtable |
327 | // information. |
328 | // |
329 | // Due to class inheritance, one virtual call candidate can come from |
330 | // multiple vtables. `VTableGUIDAndCounts` tracks the vtable GUIDs and |
331 | // counts for 'TargetFunction'. `AddressPoints` stores the vtable address |
332 | // points for comparison. |
333 | VTableGUIDCountsMap VTableGUIDAndCounts; |
334 | SmallVector<Constant *> AddressPoints; |
335 | |
336 | PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {} |
337 | }; |
338 | |
339 | // Check if the indirect-call call site should be promoted. Return the number |
340 | // of promotions. Inst is the candidate indirect call, ValueDataRef |
341 | // contains the array of value profile data for profiled targets, |
342 | // TotalCount is the total profiled count of call executions, and |
343 | // NumCandidates is the number of candidate entries in ValueDataRef. |
344 | std::vector<PromotionCandidate> getPromotionCandidatesForCallSite( |
345 | const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef, |
346 | uint64_t TotalCount, uint32_t NumCandidates); |
347 | |
348 | // Promote a list of targets for one indirect-call callsite by comparing |
349 | // indirect callee with functions. Return true if there are IR |
350 | // transformations and false otherwise. |
351 | bool tryToPromoteWithFuncCmp(CallBase &CB, Instruction *VPtr, |
352 | ArrayRef<PromotionCandidate> Candidates, |
353 | uint64_t TotalCount, |
354 | ArrayRef<InstrProfValueData> ICallProfDataRef, |
355 | uint32_t NumCandidates, |
356 | VTableGUIDCountsMap &VTableGUIDCounts); |
357 | |
358 | // Promote a list of targets for one indirect call by comparing vtables with |
359 | // functions. Return true if there are IR transformations and false |
360 | // otherwise. |
361 | bool tryToPromoteWithVTableCmp( |
362 | CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates, |
363 | uint64_t TotalFuncCount, uint32_t NumCandidates, |
364 | MutableArrayRef<InstrProfValueData> ICallProfDataRef, |
365 | VTableGUIDCountsMap &VTableGUIDCounts); |
366 | |
367 | // Return true if it's profitable to compare vtables for the callsite. |
368 | bool isProfitableToCompareVTables(const CallBase &CB, |
369 | ArrayRef<PromotionCandidate> Candidates, |
370 | uint64_t TotalCount); |
371 | |
372 | // Given an indirect callsite and the list of function candidates, compute |
373 | // the following vtable information in output parameters and return vtable |
374 | // pointer if type profiles exist. |
375 | // - Populate `VTableGUIDCounts` with <vtable-guid, count> using !prof |
376 | // metadata attached on the vtable pointer. |
377 | // - For each function candidate, finds out the vtables from which it gets |
378 | // called and stores the <vtable-guid, count> in promotion candidate. |
379 | Instruction *computeVTableInfos(const CallBase *CB, |
380 | VTableGUIDCountsMap &VTableGUIDCounts, |
381 | std::vector<PromotionCandidate> &Candidates); |
382 | |
383 | Constant *getOrCreateVTableAddressPointVar(GlobalVariable *GV, |
384 | uint64_t AddressPointOffset); |
385 | |
386 | void updateFuncValueProfiles(CallBase &CB, ArrayRef<InstrProfValueData> VDs, |
387 | uint64_t Sum, uint32_t MaxMDCount); |
388 | |
389 | void updateVPtrValueProfiles(Instruction *VPtr, |
390 | VTableGUIDCountsMap &VTableGUIDCounts); |
391 | |
392 | public: |
393 | IndirectCallPromoter( |
394 | Function &Func, Module &M, ProfileSummaryInfo *PSI, |
395 | InstrProfSymtab *Symtab, bool SamplePGO, |
396 | const VirtualCallSiteTypeInfoMap &VirtualCSInfo, |
397 | VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal, |
398 | OptimizationRemarkEmitter &ORE) |
399 | : F(Func), M(M), PSI(PSI), Symtab(Symtab), SamplePGO(SamplePGO), |
400 | VirtualCSInfo(VirtualCSInfo), |
401 | VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {} |
402 | IndirectCallPromoter(const IndirectCallPromoter &) = delete; |
403 | IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete; |
404 | |
405 | bool processFunction(ProfileSummaryInfo *PSI); |
406 | }; |
407 | |
408 | } // end anonymous namespace |
409 | |
410 | // Indirect-call promotion heuristic. The direct targets are sorted based on |
411 | // the count. Stop at the first target that is not promoted. |
412 | std::vector<IndirectCallPromoter::PromotionCandidate> |
413 | IndirectCallPromoter::getPromotionCandidatesForCallSite( |
414 | const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef, |
415 | uint64_t TotalCount, uint32_t NumCandidates) { |
416 | std::vector<PromotionCandidate> Ret; |
417 | |
418 | LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB |
419 | << " Num_targets: " << ValueDataRef.size() |
420 | << " Num_candidates: " << NumCandidates << "\n" ); |
421 | NumOfPGOICallsites++; |
422 | if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) { |
423 | LLVM_DEBUG(dbgs() << " Skip: User options.\n" ); |
424 | return Ret; |
425 | } |
426 | |
427 | for (uint32_t I = 0; I < NumCandidates; I++) { |
428 | uint64_t Count = ValueDataRef[I].Count; |
429 | assert(Count <= TotalCount); |
430 | (void)TotalCount; |
431 | uint64_t Target = ValueDataRef[I].Value; |
432 | LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count |
433 | << " Target_func: " << Target << "\n" ); |
434 | |
435 | if (ICPInvokeOnly && isa<CallInst>(Val: CB)) { |
436 | LLVM_DEBUG(dbgs() << " Not promote: User options.\n" ); |
437 | ORE.emit(RemarkBuilder: [&]() { |
438 | return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions" , &CB) |
439 | << " Not promote: User options" ; |
440 | }); |
441 | break; |
442 | } |
443 | if (ICPCallOnly && isa<InvokeInst>(Val: CB)) { |
444 | LLVM_DEBUG(dbgs() << " Not promote: User option.\n" ); |
445 | ORE.emit(RemarkBuilder: [&]() { |
446 | return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions" , &CB) |
447 | << " Not promote: User options" ; |
448 | }); |
449 | break; |
450 | } |
451 | if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { |
452 | LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n" ); |
453 | ORE.emit(RemarkBuilder: [&]() { |
454 | return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached" , &CB) |
455 | << " Not promote: Cutoff reached" ; |
456 | }); |
457 | break; |
458 | } |
459 | |
460 | // Don't promote if the symbol is not defined in the module. This avoids |
461 | // creating a reference to a symbol that doesn't exist in the module |
462 | // This can happen when we compile with a sample profile collected from |
463 | // one binary but used for another, which may have profiled targets that |
464 | // aren't used in the new binary. We might have a declaration initially in |
465 | // the case where the symbol is globally dead in the binary and removed by |
466 | // ThinLTO. |
467 | Function *TargetFunction = Symtab->getFunction(FuncMD5Hash: Target); |
468 | if (TargetFunction == nullptr || TargetFunction->isDeclaration()) { |
469 | LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n" ); |
470 | ORE.emit(RemarkBuilder: [&]() { |
471 | return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget" , &CB) |
472 | << "Cannot promote indirect call: target with md5sum " |
473 | << ore::NV("target md5sum" , Target) << " not found" ; |
474 | }); |
475 | break; |
476 | } |
477 | |
478 | const char *Reason = nullptr; |
479 | if (!isLegalToPromote(CB, Callee: TargetFunction, FailureReason: &Reason)) { |
480 | using namespace ore; |
481 | |
482 | ORE.emit(RemarkBuilder: [&]() { |
483 | return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote" , &CB) |
484 | << "Cannot promote indirect call to " |
485 | << NV("TargetFunction" , TargetFunction) << " with count of " |
486 | << NV("Count" , Count) << ": " << Reason; |
487 | }); |
488 | break; |
489 | } |
490 | |
491 | Ret.push_back(x: PromotionCandidate(TargetFunction, Count)); |
492 | TotalCount -= Count; |
493 | } |
494 | return Ret; |
495 | } |
496 | |
497 | Constant *IndirectCallPromoter::getOrCreateVTableAddressPointVar( |
498 | GlobalVariable *GV, uint64_t AddressPointOffset) { |
499 | auto [Iter, Inserted] = |
500 | VTableAddressPointOffsetVal[GV].try_emplace(k: AddressPointOffset, args: nullptr); |
501 | if (Inserted) |
502 | Iter->second = getVTableAddressPointOffset(VTable: GV, AddressPointOffset); |
503 | return Iter->second; |
504 | } |
505 | |
506 | Instruction *IndirectCallPromoter::computeVTableInfos( |
507 | const CallBase *CB, VTableGUIDCountsMap &GUIDCountsMap, |
508 | std::vector<PromotionCandidate> &Candidates) { |
509 | if (!EnableVTableProfileUse) |
510 | return nullptr; |
511 | |
512 | // Take the following code sequence as an example, here is how the code works |
513 | // @vtable1 = {[n x ptr] [... ptr @func1]} |
514 | // @vtable2 = {[m x ptr] [... ptr @func2]} |
515 | // |
516 | // %vptr = load ptr, ptr %d, !prof !0 |
517 | // %0 = tail call i1 @llvm.type.test(ptr %vptr, metadata !"vtable1") |
518 | // tail call void @llvm.assume(i1 %0) |
519 | // %vfn = getelementptr inbounds ptr, ptr %vptr, i64 1 |
520 | // %1 = load ptr, ptr %vfn |
521 | // call void %1(ptr %d), !prof !1 |
522 | // |
523 | // !0 = !{!"VP", i32 2, i64 100, i64 123, i64 50, i64 456, i64 50} |
524 | // !1 = !{!"VP", i32 0, i64 100, i64 789, i64 50, i64 579, i64 50} |
525 | // |
526 | // Step 1. Find out the %vptr instruction for indirect call and use its !prof |
527 | // to populate `GUIDCountsMap`. |
528 | // Step 2. For each vtable-guid, look up its definition from symtab. LTO can |
529 | // make vtable definitions visible across modules. |
530 | // Step 3. Compute the byte offset of the virtual call, by adding vtable |
531 | // address point offset and function's offset relative to vtable address |
532 | // point. For each function candidate, this step tells us the vtable from |
533 | // which it comes from, and the vtable address point to compare %vptr with. |
534 | |
535 | // Only virtual calls have virtual call site info. |
536 | auto Iter = VirtualCSInfo.find(Val: CB); |
537 | if (Iter == VirtualCSInfo.end()) |
538 | return nullptr; |
539 | |
540 | LLVM_DEBUG(dbgs() << "\nComputing vtable infos for callsite #" |
541 | << NumOfPGOICallsites << "\n" ); |
542 | |
543 | const auto &VirtualCallInfo = Iter->second; |
544 | Instruction *VPtr = VirtualCallInfo.VPtr; |
545 | |
546 | SmallDenseMap<Function *, int, 4> CalleeIndexMap; |
547 | for (size_t I = 0; I < Candidates.size(); I++) |
548 | CalleeIndexMap[Candidates[I].TargetFunction] = I; |
549 | |
550 | uint64_t TotalVTableCount = 0; |
551 | auto VTableValueDataArray = |
552 | getValueProfDataFromInst(Inst: *VirtualCallInfo.VPtr, ValueKind: IPVK_VTableTarget, |
553 | MaxNumValueData: MaxNumVTableAnnotations, TotalC&: TotalVTableCount); |
554 | if (VTableValueDataArray.empty()) |
555 | return VPtr; |
556 | |
557 | // Compute the functions and counts from by each vtable. |
558 | for (const auto &V : VTableValueDataArray) { |
559 | uint64_t VTableVal = V.Value; |
560 | GUIDCountsMap[VTableVal] = V.Count; |
561 | GlobalVariable *VTableVar = Symtab->getGlobalVariable(MD5Hash: VTableVal); |
562 | if (!VTableVar) { |
563 | LLVM_DEBUG(dbgs() << " Cannot find vtable definition for " << VTableVal |
564 | << "; maybe the vtable isn't imported\n" ); |
565 | continue; |
566 | } |
567 | |
568 | std::optional<uint64_t> MaybeAddressPointOffset = |
569 | getAddressPointOffset(VTableVar: *VTableVar, CompatibleType: VirtualCallInfo.CompatibleTypeStr); |
570 | if (!MaybeAddressPointOffset) |
571 | continue; |
572 | |
573 | const uint64_t AddressPointOffset = *MaybeAddressPointOffset; |
574 | |
575 | Function *Callee = nullptr; |
576 | std::tie(args&: Callee, args: std::ignore) = getFunctionAtVTableOffset( |
577 | GV: VTableVar, Offset: AddressPointOffset + VirtualCallInfo.FunctionOffset, M); |
578 | if (!Callee) |
579 | continue; |
580 | auto CalleeIndexIter = CalleeIndexMap.find(Val: Callee); |
581 | if (CalleeIndexIter == CalleeIndexMap.end()) |
582 | continue; |
583 | |
584 | auto &Candidate = Candidates[CalleeIndexIter->second]; |
585 | // There shouldn't be duplicate GUIDs in one !prof metadata (except |
586 | // duplicated zeros), so assign counters directly won't cause overwrite or |
587 | // counter loss. |
588 | Candidate.VTableGUIDAndCounts[VTableVal] = V.Count; |
589 | Candidate.AddressPoints.push_back( |
590 | Elt: getOrCreateVTableAddressPointVar(GV: VTableVar, AddressPointOffset)); |
591 | } |
592 | |
593 | return VPtr; |
594 | } |
595 | |
596 | // Creates 'branch_weights' prof metadata using TrueWeight and FalseWeight. |
597 | // Scales uint64_t counters down to uint32_t if necessary to prevent overflow. |
598 | static MDNode *createBranchWeights(LLVMContext &Context, uint64_t TrueWeight, |
599 | uint64_t FalseWeight) { |
600 | MDBuilder MDB(Context); |
601 | uint64_t Scale = calculateCountScale(MaxCount: std::max(a: TrueWeight, b: FalseWeight)); |
602 | return MDB.createBranchWeights(TrueWeight: scaleBranchCount(Count: TrueWeight, Scale), |
603 | FalseWeight: scaleBranchCount(Count: FalseWeight, Scale)); |
604 | } |
605 | |
606 | CallBase &llvm::pgo::(CallBase &CB, Function *DirectCallee, |
607 | uint64_t Count, uint64_t TotalCount, |
608 | bool AttachProfToDirectCall, |
609 | OptimizationRemarkEmitter *ORE) { |
610 | CallBase &NewInst = promoteCallWithIfThenElse( |
611 | CB, Callee: DirectCallee, |
612 | BranchWeights: createBranchWeights(Context&: CB.getContext(), TrueWeight: Count, FalseWeight: TotalCount - Count)); |
613 | |
614 | if (AttachProfToDirectCall) |
615 | setBranchWeights(I&: NewInst, Weights: {static_cast<uint32_t>(Count)}, |
616 | /*IsExpected=*/false); |
617 | |
618 | using namespace ore; |
619 | |
620 | if (ORE) |
621 | ORE->emit(RemarkBuilder: [&]() { |
622 | return OptimizationRemark(DEBUG_TYPE, "Promoted" , &CB) |
623 | << "Promote indirect call to " << NV("DirectCallee" , DirectCallee) |
624 | << " with count " << NV("Count" , Count) << " out of " |
625 | << NV("TotalCount" , TotalCount); |
626 | }); |
627 | return NewInst; |
628 | } |
629 | |
630 | // Promote indirect-call to conditional direct-call for one callsite. |
631 | bool IndirectCallPromoter::tryToPromoteWithFuncCmp( |
632 | CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates, |
633 | uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef, |
634 | uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts) { |
635 | uint32_t NumPromoted = 0; |
636 | |
637 | for (const auto &C : Candidates) { |
638 | uint64_t FuncCount = C.Count; |
639 | pgo::promoteIndirectCall(CB, DirectCallee: C.TargetFunction, Count: FuncCount, TotalCount, |
640 | AttachProfToDirectCall: SamplePGO, ORE: &ORE); |
641 | assert(TotalCount >= FuncCount); |
642 | TotalCount -= FuncCount; |
643 | NumOfPGOICallPromotion++; |
644 | NumPromoted++; |
645 | |
646 | if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty()) |
647 | continue; |
648 | |
649 | // After a virtual call candidate gets promoted, update the vtable's counts |
650 | // proportionally. Each vtable-guid in `C.VTableGUIDAndCounts` represents |
651 | // a vtable from which the virtual call is loaded. Compute the sum and use |
652 | // 128-bit APInt to improve accuracy. |
653 | uint64_t SumVTableCount = 0; |
654 | for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) |
655 | SumVTableCount += VTableCount; |
656 | |
657 | for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) { |
658 | APInt APFuncCount((unsigned)128, FuncCount, false /*signed*/); |
659 | APFuncCount *= VTableCount; |
660 | VTableGUIDCounts[GUID] -= APFuncCount.udiv(RHS: SumVTableCount).getZExtValue(); |
661 | } |
662 | } |
663 | if (NumPromoted == 0) |
664 | return false; |
665 | |
666 | assert(NumPromoted <= ICallProfDataRef.size() && |
667 | "Number of promoted functions should not be greater than the number " |
668 | "of values in profile metadata" ); |
669 | |
670 | // Update value profiles on the indirect call. |
671 | updateFuncValueProfiles(CB, VDs: ICallProfDataRef.slice(N: NumPromoted), Sum: TotalCount, |
672 | MaxMDCount: NumCandidates); |
673 | updateVPtrValueProfiles(VPtr, VTableGUIDCounts); |
674 | return true; |
675 | } |
676 | |
677 | void IndirectCallPromoter::updateFuncValueProfiles( |
678 | CallBase &CB, ArrayRef<InstrProfValueData> CallVDs, uint64_t TotalCount, |
679 | uint32_t MaxMDCount) { |
680 | // First clear the existing !prof. |
681 | CB.setMetadata(KindID: LLVMContext::MD_prof, Node: nullptr); |
682 | // Annotate the remaining value profiles if counter is not zero. |
683 | if (TotalCount != 0) |
684 | annotateValueSite(M, Inst&: CB, VDs: CallVDs, Sum: TotalCount, ValueKind: IPVK_IndirectCallTarget, |
685 | MaxMDCount); |
686 | } |
687 | |
688 | void IndirectCallPromoter::updateVPtrValueProfiles( |
689 | Instruction *VPtr, VTableGUIDCountsMap &VTableGUIDCounts) { |
690 | if (!EnableVTableProfileUse || VPtr == nullptr || |
691 | !VPtr->getMetadata(KindID: LLVMContext::MD_prof)) |
692 | return; |
693 | VPtr->setMetadata(KindID: LLVMContext::MD_prof, Node: nullptr); |
694 | std::vector<InstrProfValueData> VTableValueProfiles; |
695 | uint64_t TotalVTableCount = 0; |
696 | for (auto [GUID, Count] : VTableGUIDCounts) { |
697 | if (Count == 0) |
698 | continue; |
699 | |
700 | VTableValueProfiles.push_back(x: {.Value: GUID, .Count: Count}); |
701 | TotalVTableCount += Count; |
702 | } |
703 | llvm::sort(C&: VTableValueProfiles, |
704 | Comp: [](const InstrProfValueData &LHS, const InstrProfValueData &RHS) { |
705 | return LHS.Count > RHS.Count; |
706 | }); |
707 | |
708 | annotateValueSite(M, Inst&: *VPtr, VDs: VTableValueProfiles, Sum: TotalVTableCount, |
709 | ValueKind: IPVK_VTableTarget, MaxMDCount: VTableValueProfiles.size()); |
710 | } |
711 | |
712 | bool IndirectCallPromoter::tryToPromoteWithVTableCmp( |
713 | CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates, |
714 | uint64_t TotalFuncCount, uint32_t NumCandidates, |
715 | MutableArrayRef<InstrProfValueData> ICallProfDataRef, |
716 | VTableGUIDCountsMap &VTableGUIDCounts) { |
717 | SmallVector<uint64_t, 4> PromotedFuncCount; |
718 | |
719 | for (const auto &Candidate : Candidates) { |
720 | for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts) |
721 | VTableGUIDCounts[GUID] -= Count; |
722 | |
723 | // 'OriginalBB' is the basic block of indirect call. After each candidate |
724 | // is promoted, a new basic block is created for the indirect fallback basic |
725 | // block and indirect call `CB` is moved into this new BB. |
726 | BasicBlock *OriginalBB = CB.getParent(); |
727 | promoteCallWithVTableCmp( |
728 | CB, VPtr, Callee: Candidate.TargetFunction, AddressPoints: Candidate.AddressPoints, |
729 | BranchWeights: createBranchWeights(Context&: CB.getContext(), TrueWeight: Candidate.Count, |
730 | FalseWeight: TotalFuncCount - Candidate.Count)); |
731 | |
732 | int SinkCount = tryToSinkInstructions(OriginalBB, IndirectCallBB: CB.getParent()); |
733 | |
734 | ORE.emit(RemarkBuilder: [&]() { |
735 | OptimizationRemark (DEBUG_TYPE, "Promoted" , &CB); |
736 | |
737 | const auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts; |
738 | Remark << "Promote indirect call to " |
739 | << ore::NV("DirectCallee" , Candidate.TargetFunction) |
740 | << " with count " << ore::NV("Count" , Candidate.Count) |
741 | << " out of " << ore::NV("TotalCount" , TotalFuncCount) << ", sink " |
742 | << ore::NV("SinkCount" , SinkCount) |
743 | << " instruction(s) and compare " |
744 | << ore::NV("VTable" , VTableGUIDAndCounts.size()) |
745 | << " vtable(s): {" ; |
746 | |
747 | // Sort GUIDs so remark message is deterministic. |
748 | std::set<uint64_t> GUIDSet; |
749 | for (auto [GUID, Count] : VTableGUIDAndCounts) |
750 | GUIDSet.insert(x: GUID); |
751 | for (auto Iter = GUIDSet.begin(); Iter != GUIDSet.end(); Iter++) { |
752 | if (Iter != GUIDSet.begin()) |
753 | Remark << ", " ; |
754 | Remark << ore::NV("VTable" , Symtab->getGlobalVariable(MD5Hash: *Iter)); |
755 | } |
756 | |
757 | Remark << "}" ; |
758 | |
759 | return Remark; |
760 | }); |
761 | |
762 | PromotedFuncCount.push_back(Elt: Candidate.Count); |
763 | |
764 | assert(TotalFuncCount >= Candidate.Count && |
765 | "Within one prof metadata, total count is the sum of counts from " |
766 | "individual <target, count> pairs" ); |
767 | // Use std::min since 'TotalFuncCount' is the saturated sum of individual |
768 | // counts, see |
769 | // https://github.com/llvm/llvm-project/blob/abedb3b8356d5d56f1c575c4f7682fba2cb19787/llvm/lib/ProfileData/InstrProf.cpp#L1281-L1288 |
770 | TotalFuncCount -= std::min(a: TotalFuncCount, b: Candidate.Count); |
771 | NumOfPGOICallPromotion++; |
772 | } |
773 | |
774 | if (PromotedFuncCount.empty()) |
775 | return false; |
776 | |
777 | // Update value profiles for 'CB' and 'VPtr', assuming that each 'CB' has a |
778 | // a distinct 'VPtr'. |
779 | // FIXME: When Clang `-fstrict-vtable-pointers` is enabled, a vtable might be |
780 | // used to load multiple virtual functions. The vtable profiles needs to be |
781 | // updated properly in that case (e.g, for each indirect call annotate both |
782 | // type profiles and function profiles in one !prof). |
783 | for (size_t I = 0; I < PromotedFuncCount.size(); I++) |
784 | ICallProfDataRef[I].Count -= |
785 | std::max(a: PromotedFuncCount[I], b: ICallProfDataRef[I].Count); |
786 | // Sort value profiles by count in descending order. |
787 | llvm::stable_sort(Range&: ICallProfDataRef, C: [](const InstrProfValueData &LHS, |
788 | const InstrProfValueData &RHS) { |
789 | return LHS.Count > RHS.Count; |
790 | }); |
791 | // Drop the <target-value, count> pair if count is zero. |
792 | ArrayRef<InstrProfValueData> VDs( |
793 | ICallProfDataRef.begin(), |
794 | llvm::upper_bound(Range&: ICallProfDataRef, Value: 0U, |
795 | C: [](uint64_t Count, const InstrProfValueData &ProfData) { |
796 | return ProfData.Count <= Count; |
797 | })); |
798 | updateFuncValueProfiles(CB, CallVDs: VDs, TotalCount: TotalFuncCount, MaxMDCount: NumCandidates); |
799 | updateVPtrValueProfiles(VPtr, VTableGUIDCounts); |
800 | return true; |
801 | } |
802 | |
803 | // Traverse all the indirect-call callsite and get the value profile |
804 | // annotation to perform indirect-call promotion. |
805 | bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) { |
806 | bool Changed = false; |
807 | ICallPromotionAnalysis ICallAnalysis; |
808 | for (auto *CB : findIndirectCalls(F)) { |
809 | uint32_t NumCandidates; |
810 | uint64_t TotalCount; |
811 | auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction( |
812 | I: CB, TotalCount, NumCandidates); |
813 | if (!NumCandidates || |
814 | (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(C: TotalCount))) |
815 | continue; |
816 | |
817 | auto PromotionCandidates = getPromotionCandidatesForCallSite( |
818 | CB: *CB, ValueDataRef: ICallProfDataRef, TotalCount, NumCandidates); |
819 | |
820 | VTableGUIDCountsMap VTableGUIDCounts; |
821 | Instruction *VPtr = |
822 | computeVTableInfos(CB, GUIDCountsMap&: VTableGUIDCounts, Candidates&: PromotionCandidates); |
823 | |
824 | if (isProfitableToCompareVTables(CB: *CB, Candidates: PromotionCandidates, TotalCount)) |
825 | Changed |= tryToPromoteWithVTableCmp(CB&: *CB, VPtr, Candidates: PromotionCandidates, |
826 | TotalFuncCount: TotalCount, NumCandidates, |
827 | ICallProfDataRef, VTableGUIDCounts); |
828 | else |
829 | Changed |= tryToPromoteWithFuncCmp(CB&: *CB, VPtr, Candidates: PromotionCandidates, |
830 | TotalCount, ICallProfDataRef, |
831 | NumCandidates, VTableGUIDCounts); |
832 | } |
833 | return Changed; |
834 | } |
835 | |
836 | // TODO: Return false if the function addressing and vtable load instructions |
837 | // cannot sink to indirect fallback. |
838 | bool IndirectCallPromoter::isProfitableToCompareVTables( |
839 | const CallBase &CB, ArrayRef<PromotionCandidate> Candidates, |
840 | uint64_t TotalCount) { |
841 | if (!EnableVTableProfileUse || Candidates.empty()) |
842 | return false; |
843 | LLVM_DEBUG(dbgs() << "\nEvaluating vtable profitability for callsite #" |
844 | << NumOfPGOICallsites << CB << "\n" ); |
845 | uint64_t RemainingVTableCount = TotalCount; |
846 | const size_t CandidateSize = Candidates.size(); |
847 | for (size_t I = 0; I < CandidateSize; I++) { |
848 | auto &Candidate = Candidates[I]; |
849 | auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts; |
850 | |
851 | LLVM_DEBUG(dbgs() << " Candidate " << I << " FunctionCount: " |
852 | << Candidate.Count << ", VTableCounts:" ); |
853 | // Add [[maybe_unused]] since <GUID, Count> are only used by LLVM_DEBUG. |
854 | for ([[maybe_unused]] auto &[GUID, Count] : VTableGUIDAndCounts) |
855 | LLVM_DEBUG(dbgs() << " {" << Symtab->getGlobalVariable(GUID)->getName() |
856 | << ", " << Count << "}" ); |
857 | LLVM_DEBUG(dbgs() << "\n" ); |
858 | |
859 | uint64_t CandidateVTableCount = 0; |
860 | for (auto &[GUID, Count] : VTableGUIDAndCounts) |
861 | CandidateVTableCount += Count; |
862 | |
863 | if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) { |
864 | LLVM_DEBUG( |
865 | dbgs() << " function count " << Candidate.Count |
866 | << " and its vtable sum count " << CandidateVTableCount |
867 | << " have discrepancies. Bail out vtable comparison.\n" ); |
868 | return false; |
869 | } |
870 | |
871 | RemainingVTableCount -= Candidate.Count; |
872 | |
873 | // 'MaxNumVTable' limits the number of vtables to make vtable comparison |
874 | // profitable. Comparing multiple vtables for one function candidate will |
875 | // insert additional instructions on the hot path, and allowing more than |
876 | // one vtable for non last candidates may or may not elongate the dependency |
877 | // chain for the subsequent candidates. Set its value to 1 for non-last |
878 | // candidate and allow option to override it for the last candidate. |
879 | int MaxNumVTable = 1; |
880 | if (I == CandidateSize - 1) |
881 | MaxNumVTable = ICPMaxNumVTableLastCandidate; |
882 | |
883 | if ((int)Candidate.AddressPoints.size() > MaxNumVTable) { |
884 | LLVM_DEBUG(dbgs() << " allow at most " << MaxNumVTable << " and got " |
885 | << Candidate.AddressPoints.size() |
886 | << " vtables. Bail out for vtable comparison.\n" ); |
887 | return false; |
888 | } |
889 | } |
890 | |
891 | // If the indirect fallback is not cold, don't compare vtables. |
892 | if (PSI && PSI->hasProfileSummary() && |
893 | !PSI->isColdCount(C: RemainingVTableCount)) { |
894 | LLVM_DEBUG(dbgs() << " Indirect fallback basic block is not cold. Bail " |
895 | "out for vtable comparison.\n" ); |
896 | return false; |
897 | } |
898 | |
899 | return true; |
900 | } |
901 | |
902 | // For virtual calls in the module, collect per-callsite information which will |
903 | // be used to associate an ICP candidate with a vtable and a specific function |
904 | // in the vtable. With type intrinsics (llvm.type.test), we can find virtual |
905 | // calls in a compile-time efficient manner (by iterating its users) and more |
906 | // importantly use the compatible type later to figure out the function byte |
907 | // offset relative to the start of vtables. |
908 | static void |
909 | computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM, |
910 | VirtualCallSiteTypeInfoMap &VirtualCSInfo) { |
911 | // Right now only llvm.type.test is used to find out virtual call sites. |
912 | // With ThinLTO and whole-program-devirtualization, llvm.type.test and |
913 | // llvm.public.type.test are emitted, and llvm.public.type.test is either |
914 | // refined to llvm.type.test or dropped before indirect-call-promotion pass. |
915 | // |
916 | // FIXME: For fullLTO with VFE, `llvm.type.checked.load intrinsic` is emitted. |
917 | // Find out virtual calls by looking at users of llvm.type.checked.load in |
918 | // that case. |
919 | Function *TypeTestFunc = |
920 | M.getFunction(Name: Intrinsic::getName(id: Intrinsic::type_test)); |
921 | if (!TypeTestFunc || TypeTestFunc->use_empty()) |
922 | return; |
923 | |
924 | auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager(); |
925 | auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & { |
926 | return FAM.getResult<DominatorTreeAnalysis>(IR&: F); |
927 | }; |
928 | // Iterate all type.test calls to find all indirect calls. |
929 | for (Use &U : llvm::make_early_inc_range(Range: TypeTestFunc->uses())) { |
930 | auto *CI = dyn_cast<CallInst>(Val: U.getUser()); |
931 | if (!CI) |
932 | continue; |
933 | auto *TypeMDVal = cast<MetadataAsValue>(Val: CI->getArgOperand(i: 1)); |
934 | if (!TypeMDVal) |
935 | continue; |
936 | auto *CompatibleTypeId = dyn_cast<MDString>(Val: TypeMDVal->getMetadata()); |
937 | if (!CompatibleTypeId) |
938 | continue; |
939 | |
940 | // Find out all devirtualizable call sites given a llvm.type.test |
941 | // intrinsic call. |
942 | SmallVector<DevirtCallSite, 1> DevirtCalls; |
943 | SmallVector<CallInst *, 1> Assumes; |
944 | auto &DT = LookupDomTree(*CI->getFunction()); |
945 | findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT); |
946 | |
947 | for (auto &DevirtCall : DevirtCalls) { |
948 | CallBase &CB = DevirtCall.CB; |
949 | // Given an indirect call, try find the instruction which loads a |
950 | // pointer to virtual table. |
951 | Instruction *VTablePtr = |
952 | PGOIndirectCallVisitor::tryGetVTableInstruction(CB: &CB); |
953 | if (!VTablePtr) |
954 | continue; |
955 | VirtualCSInfo[&CB] = {.FunctionOffset: DevirtCall.Offset, .VPtr: VTablePtr, |
956 | .CompatibleTypeStr: CompatibleTypeId->getString()}; |
957 | } |
958 | } |
959 | } |
960 | |
961 | // A wrapper function that does the actual work. |
962 | static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, |
963 | bool SamplePGO, ModuleAnalysisManager &MAM) { |
964 | if (DisableICP) |
965 | return false; |
966 | InstrProfSymtab Symtab; |
967 | if (Error E = Symtab.create(M, InLTO)) { |
968 | std::string SymtabFailure = toString(E: std::move(E)); |
969 | M.getContext().emitError(ErrorStr: "Failed to create symtab: " + SymtabFailure); |
970 | return false; |
971 | } |
972 | bool Changed = false; |
973 | VirtualCallSiteTypeInfoMap VirtualCSInfo; |
974 | |
975 | if (EnableVTableProfileUse) |
976 | computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo); |
977 | |
978 | // VTableAddressPointOffsetVal stores the vtable address points. The vtable |
979 | // address point of a given <vtable, address point offset> is static (doesn't |
980 | // change after being computed once). |
981 | // IndirectCallPromoter::getOrCreateVTableAddressPointVar creates the map |
982 | // entry the first time a <vtable, offset> pair is seen, as |
983 | // promoteIndirectCalls processes an IR module and calls IndirectCallPromoter |
984 | // repeatedly on each function. |
985 | VTableAddressPointOffsetValMap VTableAddressPointOffsetVal; |
986 | |
987 | for (auto &F : M) { |
988 | if (F.isDeclaration() || F.hasOptNone()) |
989 | continue; |
990 | |
991 | auto &FAM = |
992 | MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager(); |
993 | auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F); |
994 | |
995 | IndirectCallPromoter CallPromoter(F, M, PSI, &Symtab, SamplePGO, |
996 | VirtualCSInfo, |
997 | VTableAddressPointOffsetVal, ORE); |
998 | bool FuncChanged = CallPromoter.processFunction(PSI); |
999 | if (ICPDUMPAFTER && FuncChanged) { |
1000 | LLVM_DEBUG(dbgs() << "\n== IR Dump After ==" ; F.print(dbgs())); |
1001 | LLVM_DEBUG(dbgs() << "\n" ); |
1002 | } |
1003 | Changed |= FuncChanged; |
1004 | if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { |
1005 | LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n" ); |
1006 | break; |
1007 | } |
1008 | } |
1009 | return Changed; |
1010 | } |
1011 | |
1012 | PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, |
1013 | ModuleAnalysisManager &MAM) { |
1014 | ProfileSummaryInfo *PSI = &MAM.getResult<ProfileSummaryAnalysis>(IR&: M); |
1015 | |
1016 | if (!promoteIndirectCalls(M, PSI, InLTO: InLTO | ICPLTOMode, |
1017 | SamplePGO: SamplePGO | ICPSamplePGOMode, MAM)) |
1018 | return PreservedAnalyses::all(); |
1019 | |
1020 | return PreservedAnalyses::none(); |
1021 | } |
1022 | |