1//===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the transformation that promotes indirect calls to
10// conditional direct calls when the indirect-call value profile metadata is
11// available.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/Statistic.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
20#include "llvm/Analysis/IndirectCallVisitor.h"
21#include "llvm/Analysis/OptimizationRemarkEmitter.h"
22#include "llvm/Analysis/ProfileSummaryInfo.h"
23#include "llvm/Analysis/TypeMetadataUtils.h"
24#include "llvm/IR/DiagnosticInfo.h"
25#include "llvm/IR/Dominators.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/InstrTypes.h"
28#include "llvm/IR/Instructions.h"
29#include "llvm/IR/LLVMContext.h"
30#include "llvm/IR/MDBuilder.h"
31#include "llvm/IR/PassManager.h"
32#include "llvm/IR/ProfDataUtils.h"
33#include "llvm/IR/Value.h"
34#include "llvm/ProfileData/InstrProf.h"
35#include "llvm/Support/Casting.h"
36#include "llvm/Support/CommandLine.h"
37#include "llvm/Support/Debug.h"
38#include "llvm/Support/Error.h"
39#include "llvm/Support/raw_ostream.h"
40#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
41#include "llvm/Transforms/Utils/CallPromotionUtils.h"
42#include "llvm/Transforms/Utils/Instrumentation.h"
43#include <cassert>
44#include <cstdint>
45#include <set>
46#include <string>
47#include <unordered_map>
48#include <utility>
49#include <vector>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "pgo-icall-prom"
54
55STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
56STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
57
58namespace llvm {
59extern cl::opt<unsigned> MaxNumVTableAnnotations;
60
61extern cl::opt<bool> EnableVTableProfileUse;
62} // namespace llvm
63
64// Command line option to disable indirect-call promotion with the default as
65// false. This is for debug purpose.
66static cl::opt<bool> DisableICP("disable-icp", cl::init(Val: false), cl::Hidden,
67 cl::desc("Disable indirect call promotion"));
68
69// Set the cutoff value for the promotion. If the value is other than 0, we
70// stop the transformation once the total number of promotions equals the cutoff
71// value.
72// For debug use only.
73static cl::opt<unsigned>
74 ICPCutOff("icp-cutoff", cl::init(Val: 0), cl::Hidden,
75 cl::desc("Max number of promotions for this compilation"));
76
77// If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
78// For debug use only.
79static cl::opt<unsigned>
80 ICPCSSkip("icp-csskip", cl::init(Val: 0), cl::Hidden,
81 cl::desc("Skip Callsite up to this number for this compilation"));
82
83// ICP the candidate function even when only a declaration is present.
84static cl::opt<bool> ICPAllowDecls(
85 "icp-allow-decls", cl::init(Val: false), cl::Hidden,
86 cl::desc("Promote the target candidate even when the definition "
87 " is not available"));
88
89// ICP hot candidate functions only. When setting to false, non-cold functions
90// (warm functions) can also be promoted.
91static cl::opt<bool>
92 ICPAllowHotOnly("icp-allow-hot-only", cl::init(Val: true), cl::Hidden,
93 cl::desc("Promote the target candidate only if it is a "
94 "hot function. Otherwise, warm functions can "
95 "also be promoted"));
96
97// If one target cannot be ICP'd, proceed with the remaining targets instead
98// of exiting the callsite.
99static cl::opt<bool> ICPAllowCandidateSkip(
100 "icp-allow-candidate-skip", cl::init(Val: false), cl::Hidden,
101 cl::desc("Continue with the remaining targets instead of exiting "
102 "when failing in a candidate"));
103
104// Set if the pass is called in LTO optimization. The difference for LTO mode
105// is the pass won't prefix the source module name to the internal linkage
106// symbols.
107static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(Val: false), cl::Hidden,
108 cl::desc("Run indirect-call promotion in LTO "
109 "mode"));
110
111// Set if the pass is called in SamplePGO mode. The difference for SamplePGO
112// mode is it will add prof metadatato the created direct call.
113static cl::opt<bool>
114 ICPSamplePGOMode("icp-samplepgo", cl::init(Val: false), cl::Hidden,
115 cl::desc("Run indirect-call promotion in SamplePGO mode"));
116
117// If the option is set to true, only call instructions will be considered for
118// transformation -- invoke instructions will be ignored.
119static cl::opt<bool>
120 ICPCallOnly("icp-call-only", cl::init(Val: false), cl::Hidden,
121 cl::desc("Run indirect-call promotion for call instructions "
122 "only"));
123
124// If the option is set to true, only invoke instructions will be considered for
125// transformation -- call instructions will be ignored.
126static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(Val: false),
127 cl::Hidden,
128 cl::desc("Run indirect-call promotion for "
129 "invoke instruction only"));
130
131// Dump the function level IR if the transformation happened in this
132// function. For debug use only.
133static cl::opt<bool>
134 ICPDUMPAFTER("icp-dumpafter", cl::init(Val: false), cl::Hidden,
135 cl::desc("Dump IR after transformation happens"));
136
137// Indirect call promotion pass will fall back to function-based comparison if
138// vtable-count / function-count is smaller than this threshold.
139static cl::opt<float> ICPVTablePercentageThreshold(
140 "icp-vtable-percentage-threshold", cl::init(Val: 0.995), cl::Hidden,
141 cl::desc("The percentage threshold of vtable-count / function-count for "
142 "cost-benefit analysis."));
143
144// Although comparing vtables can save a vtable load, we may need to compare
145// vtable pointer with multiple vtable address points due to class inheritance.
146// Comparing with multiple vtables inserts additional instructions on hot code
147// path, and doing so for an earlier candidate delays the comparisons for later
148// candidates. For the last candidate, only the fallback path is affected.
149// We allow multiple vtable comparison for the last function candidate and use
150// the option below to cap the number of vtables.
151static cl::opt<int> ICPMaxNumVTableLastCandidate(
152 "icp-max-num-vtable-last-candidate", cl::init(Val: 1), cl::Hidden,
153 cl::desc("The maximum number of vtable for the last candidate."));
154
155static cl::list<std::string> ICPIgnoredBaseTypes(
156 "icp-ignored-base-types", cl::Hidden,
157 cl::desc(
158 "A list of mangled vtable type info names. Classes specified by the "
159 "type info names and their derived ones will not be vtable-ICP'ed. "
160 "Useful when the profiled types and actual types in the optimized "
161 "binary could be different due to profiling limitations. Type info "
162 "names are those string literals used in LLVM type metadata"));
163
164namespace {
165
166// The key is a vtable global variable, and the value is a map.
167// In the inner map, the key represents address point offsets and the value is a
168// constant for this address point.
169using VTableAddressPointOffsetValMap =
170 SmallDenseMap<const GlobalVariable *, std::unordered_map<int, Constant *>>;
171
172// A struct to collect type information for a virtual call site.
173struct VirtualCallSiteInfo {
174 // The offset from the address point to virtual function in the vtable.
175 uint64_t FunctionOffset;
176 // The instruction that computes the address point of vtable.
177 Instruction *VPtr;
178 // The compatible type used in LLVM type intrinsics.
179 StringRef CompatibleTypeStr;
180};
181
182// The key is a virtual call, and value is its type information.
183using VirtualCallSiteTypeInfoMap =
184 SmallDenseMap<const CallBase *, VirtualCallSiteInfo>;
185
186// The key is vtable GUID, and value is its value profile count.
187using VTableGUIDCountsMap = SmallDenseMap<uint64_t, uint64_t, 16>;
188
189// Return the address point offset of the given compatible type.
190//
191// Type metadata of a vtable specifies the types that can contain a pointer to
192// this vtable, for example, `Base*` can be a pointer to an derived type
193// but not vice versa. See also https://llvm.org/docs/TypeMetadata.html
194static std::optional<uint64_t>
195getAddressPointOffset(const GlobalVariable &VTableVar,
196 StringRef CompatibleType) {
197 SmallVector<MDNode *> Types;
198 VTableVar.getMetadata(KindID: LLVMContext::MD_type, MDs&: Types);
199
200 for (MDNode *Type : Types)
201 if (auto *TypeId = dyn_cast<MDString>(Val: Type->getOperand(I: 1).get());
202 TypeId && TypeId->getString() == CompatibleType)
203 return cast<ConstantInt>(
204 Val: cast<ConstantAsMetadata>(Val: Type->getOperand(I: 0))->getValue())
205 ->getZExtValue();
206
207 return std::nullopt;
208}
209
210// Return a constant representing the vtable's address point specified by the
211// offset.
212static Constant *getVTableAddressPointOffset(GlobalVariable *VTable,
213 uint32_t AddressPointOffset) {
214 Module &M = *VTable->getParent();
215 LLVMContext &Context = M.getContext();
216 assert(AddressPointOffset < VTable->getGlobalSize(M.getDataLayout()) &&
217 "Out-of-bound access");
218
219 return ConstantExpr::getInBoundsGetElementPtr(
220 Ty: Type::getInt8Ty(C&: Context), C: VTable,
221 Idx: llvm::ConstantInt::get(Ty: Type::getInt32Ty(C&: Context), V: AddressPointOffset));
222}
223
224// Return the basic block in which Use `U` is used via its `UserInst`.
225static BasicBlock *getUserBasicBlock(Use &U, Instruction *UserInst) {
226 if (PHINode *PN = dyn_cast<PHINode>(Val: UserInst))
227 return PN->getIncomingBlock(U);
228
229 return UserInst->getParent();
230}
231
232// `DestBB` is a suitable basic block to sink `Inst` into when `Inst` have users
233// and all users are in `DestBB`. The caller guarantees that `Inst->getParent()`
234// is the sole predecessor of `DestBB` and `DestBB` is dominated by
235// `Inst->getParent()`.
236static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) {
237 // 'BB' is used only by assert.
238 [[maybe_unused]] BasicBlock *BB = Inst->getParent();
239
240 assert(BB != DestBB && BB->getTerminator()->getNumSuccessors() == 2 &&
241 DestBB->getUniquePredecessor() == BB &&
242 "Guaranteed by ICP transformation");
243
244 BasicBlock *UserBB = nullptr;
245 for (Use &Use : Inst->uses()) {
246 User *User = Use.getUser();
247 // Do checked cast since IR verifier guarantees that the user of an
248 // instruction must be an instruction. See `Verifier::visitInstruction`.
249 Instruction *UserInst = cast<Instruction>(Val: User);
250 // We can sink debug or pseudo instructions together with Inst.
251 if (UserInst->isDebugOrPseudoInst())
252 continue;
253 UserBB = getUserBasicBlock(U&: Use, UserInst);
254 // Do not sink if Inst is used in a basic block that is not DestBB.
255 // TODO: Sink to the common dominator of all user blocks.
256 if (UserBB != DestBB)
257 return false;
258 }
259 return UserBB != nullptr;
260}
261
262// For the virtual call dispatch sequence, try to sink vtable load instructions
263// to the cold indirect call fallback.
264// FIXME: Move the sink eligibility check below to a utility function in
265// Transforms/Utils/ directory.
266static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
267 if (!isDestBBSuitableForSink(Inst: I, DestBB: DestBlock))
268 return false;
269
270 // Do not move control-flow-involving, volatile loads, vaarg, alloca
271 // instructions, etc.
272 if (isa<PHINode>(Val: I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
273 isa<AllocaInst>(Val: I))
274 return false;
275
276 // Do not sink convergent call instructions.
277 if (const auto *C = dyn_cast<CallBase>(Val: I))
278 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
279 return false;
280
281 // Do not move an instruction that may write to memory.
282 if (I->mayWriteToMemory())
283 return false;
284
285 // We can only sink load instructions if there is nothing between the load and
286 // the end of block that could change the value.
287 if (I->mayReadFromMemory()) {
288 // We already know that SrcBlock is the unique predecessor of DestBlock.
289 for (BasicBlock::iterator Scan = std::next(x: I->getIterator()),
290 E = I->getParent()->end();
291 Scan != E; ++Scan) {
292 // Note analysis analysis can tell whether two pointers can point to the
293 // same object in memory or not thereby find further opportunities to
294 // sink.
295 if (Scan->mayWriteToMemory())
296 return false;
297 }
298 }
299
300 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
301 I->moveBefore(BB&: *DestBlock, I: InsertPos);
302
303 // TODO: Sink debug intrinsic users of I to 'DestBlock'.
304 // 'InstCombinerImpl::tryToSinkInstructionDbgValues' and
305 // 'InstCombinerImpl::tryToSinkInstructionDbgVariableRecords' already have
306 // the core logic to do this.
307 return true;
308}
309
310// Try to sink instructions after VPtr to the indirect call fallback.
311// Return the number of sunk IR instructions.
312static int tryToSinkInstructions(BasicBlock *OriginalBB,
313 BasicBlock *IndirectCallBB) {
314 int SinkCount = 0;
315 // Do not sink across a critical edge for simplicity.
316 if (IndirectCallBB->getUniquePredecessor() != OriginalBB)
317 return SinkCount;
318 // Sink all eligible instructions in OriginalBB in reverse order.
319 for (Instruction &I :
320 llvm::make_early_inc_range(Range: llvm::drop_begin(RangeOrContainer: llvm::reverse(C&: *OriginalBB))))
321 if (tryToSinkInstruction(I: &I, DestBlock: IndirectCallBB))
322 SinkCount++;
323
324 return SinkCount;
325}
326
327// Promote indirect calls to conditional direct calls, keeping track of
328// thresholds.
329class IndirectCallPromoter {
330private:
331 Function &F;
332 Module &M;
333
334 // Symtab that maps indirect call profile values to function names and
335 // defines.
336 InstrProfSymtab *const Symtab;
337
338 const bool SamplePGO;
339
340 // A map from a virtual call to its type information.
341 const VirtualCallSiteTypeInfoMap &VirtualCSInfo;
342
343 VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal;
344
345 OptimizationRemarkEmitter &ORE;
346
347 const DenseSet<StringRef> &IgnoredBaseTypes;
348
349 // A struct that records the direct target and it's call count.
350 struct PromotionCandidate {
351 Function *const TargetFunction;
352 const uint64_t Count;
353 const uint32_t Index;
354
355 // The following fields only exists for promotion candidates with vtable
356 // information.
357 //
358 // Due to class inheritance, one virtual call candidate can come from
359 // multiple vtables. `VTableGUIDAndCounts` tracks the vtable GUIDs and
360 // counts for 'TargetFunction'. `AddressPoints` stores the vtable address
361 // points for comparison.
362 VTableGUIDCountsMap VTableGUIDAndCounts;
363 SmallVector<Constant *> AddressPoints;
364
365 PromotionCandidate(Function *F, uint64_t C, uint32_t I)
366 : TargetFunction(F), Count(C), Index(I) {}
367 };
368
369 // Check if the indirect-call call site should be promoted. Return the number
370 // of promotions. Inst is the candidate indirect call, ValueDataRef
371 // contains the array of value profile data for profiled targets,
372 // TotalCount is the total profiled count of call executions, and
373 // NumCandidates is the number of candidate entries in ValueDataRef.
374 std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
375 const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef,
376 uint64_t TotalCount, uint32_t NumCandidates);
377
378 // Promote a list of targets for one indirect-call callsite by comparing
379 // indirect callee with functions. Return true if there are IR
380 // transformations and false otherwise.
381 bool tryToPromoteWithFuncCmp(
382 CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
383 uint64_t TotalCount, MutableArrayRef<InstrProfValueData> ICallProfDataRef,
384 uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts);
385
386 // Promote a list of targets for one indirect call by comparing vtables with
387 // functions. Return true if there are IR transformations and false
388 // otherwise.
389 bool tryToPromoteWithVTableCmp(
390 CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
391 uint64_t TotalFuncCount, uint32_t NumCandidates,
392 MutableArrayRef<InstrProfValueData> ICallProfDataRef,
393 VTableGUIDCountsMap &VTableGUIDCounts);
394
395 // Return true if it's profitable to compare vtables for the callsite.
396 bool isProfitableToCompareVTables(const CallBase &CB,
397 ArrayRef<PromotionCandidate> Candidates);
398
399 // Return true if the vtable corresponding to VTableGUID should be skipped
400 // for vtable-based comparison.
401 bool shouldSkipVTable(uint64_t VTableGUID);
402
403 // Given an indirect callsite and the list of function candidates, compute
404 // the following vtable information in output parameters and return vtable
405 // pointer if type profiles exist.
406 // - Populate `VTableGUIDCounts` with <vtable-guid, count> using !prof
407 // metadata attached on the vtable pointer.
408 // - For each function candidate, finds out the vtables from which it gets
409 // called and stores the <vtable-guid, count> in promotion candidate.
410 Instruction *computeVTableInfos(const CallBase *CB,
411 VTableGUIDCountsMap &VTableGUIDCounts,
412 std::vector<PromotionCandidate> &Candidates);
413
414 Constant *getOrCreateVTableAddressPointVar(GlobalVariable *GV,
415 uint64_t AddressPointOffset);
416
417 void updateFuncValueProfiles(CallBase &CB,
418 MutableArrayRef<InstrProfValueData> VDs,
419 uint64_t Sum, uint32_t MaxMDCount);
420
421 void updateVPtrValueProfiles(Instruction *VPtr,
422 VTableGUIDCountsMap &VTableGUIDCounts);
423
424 bool isValidTarget(uint64_t, Function *, const CallBase &, uint64_t);
425
426public:
427 IndirectCallPromoter(
428 Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO,
429 const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
430 VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
431 const DenseSet<StringRef> &IgnoredBaseTypes,
432 OptimizationRemarkEmitter &ORE)
433 : F(Func), M(M), Symtab(Symtab), SamplePGO(SamplePGO),
434 VirtualCSInfo(VirtualCSInfo),
435 VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE),
436 IgnoredBaseTypes(IgnoredBaseTypes) {}
437 IndirectCallPromoter(const IndirectCallPromoter &) = delete;
438 IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
439
440 bool processFunction(ProfileSummaryInfo *PSI);
441};
442
443} // end anonymous namespace
444
445bool IndirectCallPromoter::isValidTarget(uint64_t Target,
446 Function *TargetFunction,
447 const CallBase &CB, uint64_t Count) {
448 // Don't promote if the symbol is not defined in the module. This avoids
449 // creating a reference to a symbol that doesn't exist in the module
450 // This can happen when we compile with a sample profile collected from
451 // one binary but used for another, which may have profiled targets that
452 // aren't used in the new binary. We might have a declaration initially in
453 // the case where the symbol is globally dead in the binary and removed by
454 // ThinLTO.
455 using namespace ore;
456 if (TargetFunction == nullptr) {
457 LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
458 ORE.emit(RemarkBuilder: [&]() {
459 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
460 << "Cannot promote indirect call: target with md5sum "
461 << NV("target md5sum", Target)
462 << " not found (count=" << NV("Count", Count) << ")";
463 });
464 return false;
465 }
466 if (!ICPAllowDecls && TargetFunction->isDeclaration()) {
467 LLVM_DEBUG(dbgs() << " Not promote: target definition is not available\n");
468 ORE.emit(RemarkBuilder: [&]() {
469 return OptimizationRemarkMissed(DEBUG_TYPE, "NoTargetDef", &CB)
470 << "Do not promote indirect call: target with md5sum "
471 << NV("target md5sum", Target)
472 << " definition not available (count=" << ore::NV("Count", Count)
473 << ")";
474 });
475 return false;
476 }
477
478 const char *Reason = nullptr;
479 if (!isLegalToPromote(CB, Callee: TargetFunction, FailureReason: &Reason)) {
480
481 ORE.emit(RemarkBuilder: [&]() {
482 return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB)
483 << "Cannot promote indirect call to "
484 << NV("TargetFunction", TargetFunction)
485 << " (count=" << NV("Count", Count) << "): " << Reason;
486 });
487 return false;
488 }
489 return true;
490}
491
492// Indirect-call promotion heuristic. The direct targets are sorted based on
493// the count. Stop at the first target that is not promoted.
494std::vector<IndirectCallPromoter::PromotionCandidate>
495IndirectCallPromoter::getPromotionCandidatesForCallSite(
496 const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef,
497 uint64_t TotalCount, uint32_t NumCandidates) {
498 std::vector<PromotionCandidate> Ret;
499
500 LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB
501 << " Num_targets: " << ValueDataRef.size()
502 << " Num_candidates: " << NumCandidates << "\n");
503 NumOfPGOICallsites++;
504 if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
505 LLVM_DEBUG(dbgs() << " Skip: User options.\n");
506 return Ret;
507 }
508
509 for (uint32_t I = 0; I < NumCandidates; I++) {
510 uint64_t Count = ValueDataRef[I].Count;
511 assert(Count <= TotalCount);
512 (void)TotalCount;
513 uint64_t Target = ValueDataRef[I].Value;
514 LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
515 << " Target_func: " << Target << "\n");
516
517 if (ICPInvokeOnly && isa<CallInst>(Val: CB)) {
518 LLVM_DEBUG(dbgs() << " Not promote: User options.\n");
519 ORE.emit(RemarkBuilder: [&]() {
520 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
521 << " Not promote: User options";
522 });
523 break;
524 }
525 if (ICPCallOnly && isa<InvokeInst>(Val: CB)) {
526 LLVM_DEBUG(dbgs() << " Not promote: User option.\n");
527 ORE.emit(RemarkBuilder: [&]() {
528 return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
529 << " Not promote: User options";
530 });
531 break;
532 }
533 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
534 LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
535 ORE.emit(RemarkBuilder: [&]() {
536 return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", &CB)
537 << " Not promote: Cutoff reached";
538 });
539 break;
540 }
541
542 Function *TargetFunction = Symtab->getFunction(FuncMD5Hash: Target);
543 if (!isValidTarget(Target, TargetFunction, CB, Count)) {
544 if (ICPAllowCandidateSkip)
545 continue;
546 else
547 break;
548 }
549
550 Ret.push_back(x: PromotionCandidate(TargetFunction, Count, I));
551 TotalCount -= Count;
552 }
553 return Ret;
554}
555
556Constant *IndirectCallPromoter::getOrCreateVTableAddressPointVar(
557 GlobalVariable *GV, uint64_t AddressPointOffset) {
558 auto [Iter, Inserted] =
559 VTableAddressPointOffsetVal[GV].try_emplace(k: AddressPointOffset, args: nullptr);
560 if (Inserted)
561 Iter->second = getVTableAddressPointOffset(VTable: GV, AddressPointOffset);
562 return Iter->second;
563}
564
565Instruction *IndirectCallPromoter::computeVTableInfos(
566 const CallBase *CB, VTableGUIDCountsMap &GUIDCountsMap,
567 std::vector<PromotionCandidate> &Candidates) {
568 if (!EnableVTableProfileUse)
569 return nullptr;
570
571 // Take the following code sequence as an example, here is how the code works
572 // @vtable1 = {[n x ptr] [... ptr @func1]}
573 // @vtable2 = {[m x ptr] [... ptr @func2]}
574 //
575 // %vptr = load ptr, ptr %d, !prof !0
576 // %0 = tail call i1 @llvm.type.test(ptr %vptr, metadata !"vtable1")
577 // tail call void @llvm.assume(i1 %0)
578 // %vfn = getelementptr inbounds ptr, ptr %vptr, i64 1
579 // %1 = load ptr, ptr %vfn
580 // call void %1(ptr %d), !prof !1
581 //
582 // !0 = !{!"VP", i32 2, i64 100, i64 123, i64 50, i64 456, i64 50}
583 // !1 = !{!"VP", i32 0, i64 100, i64 789, i64 50, i64 579, i64 50}
584 //
585 // Step 1. Find out the %vptr instruction for indirect call and use its !prof
586 // to populate `GUIDCountsMap`.
587 // Step 2. For each vtable-guid, look up its definition from symtab. LTO can
588 // make vtable definitions visible across modules.
589 // Step 3. Compute the byte offset of the virtual call, by adding vtable
590 // address point offset and function's offset relative to vtable address
591 // point. For each function candidate, this step tells us the vtable from
592 // which it comes from, and the vtable address point to compare %vptr with.
593
594 // Only virtual calls have virtual call site info.
595 auto Iter = VirtualCSInfo.find(Val: CB);
596 if (Iter == VirtualCSInfo.end())
597 return nullptr;
598
599 LLVM_DEBUG(dbgs() << "\nComputing vtable infos for callsite #"
600 << NumOfPGOICallsites << "\n");
601
602 const auto &VirtualCallInfo = Iter->second;
603 Instruction *VPtr = VirtualCallInfo.VPtr;
604
605 SmallDenseMap<Function *, int, 4> CalleeIndexMap;
606 for (size_t I = 0; I < Candidates.size(); I++)
607 CalleeIndexMap[Candidates[I].TargetFunction] = I;
608
609 uint64_t TotalVTableCount = 0;
610 auto VTableValueDataArray =
611 getValueProfDataFromInst(Inst: *VirtualCallInfo.VPtr, ValueKind: IPVK_VTableTarget,
612 MaxNumValueData: MaxNumVTableAnnotations, TotalC&: TotalVTableCount);
613 if (VTableValueDataArray.empty())
614 return VPtr;
615
616 // Compute the functions and counts from by each vtable.
617 for (const auto &V : VTableValueDataArray) {
618 uint64_t VTableVal = V.Value;
619 GUIDCountsMap[VTableVal] = V.Count;
620 GlobalVariable *VTableVar = Symtab->getGlobalVariable(MD5Hash: VTableVal);
621 if (!VTableVar) {
622 LLVM_DEBUG(dbgs() << " Cannot find vtable definition for " << VTableVal
623 << "; maybe the vtable isn't imported\n");
624 continue;
625 }
626
627 std::optional<uint64_t> MaybeAddressPointOffset =
628 getAddressPointOffset(VTableVar: *VTableVar, CompatibleType: VirtualCallInfo.CompatibleTypeStr);
629 if (!MaybeAddressPointOffset)
630 continue;
631
632 const uint64_t AddressPointOffset = *MaybeAddressPointOffset;
633
634 Function *Callee = nullptr;
635 std::tie(args&: Callee, args: std::ignore) = getFunctionAtVTableOffset(
636 GV: VTableVar, Offset: AddressPointOffset + VirtualCallInfo.FunctionOffset, M);
637 if (!Callee)
638 continue;
639 auto CalleeIndexIter = CalleeIndexMap.find(Val: Callee);
640 if (CalleeIndexIter == CalleeIndexMap.end())
641 continue;
642
643 auto &Candidate = Candidates[CalleeIndexIter->second];
644 // There shouldn't be duplicate GUIDs in one !prof metadata (except
645 // duplicated zeros), so assign counters directly won't cause overwrite or
646 // counter loss.
647 Candidate.VTableGUIDAndCounts[VTableVal] = V.Count;
648 Candidate.AddressPoints.push_back(
649 Elt: getOrCreateVTableAddressPointVar(GV: VTableVar, AddressPointOffset));
650 }
651
652 return VPtr;
653}
654
655// Creates 'branch_weights' prof metadata using TrueWeight and FalseWeight.
656// Scales uint64_t counters down to uint32_t if necessary to prevent overflow.
657static MDNode *createBranchWeights(LLVMContext &Context, uint64_t TrueWeight,
658 uint64_t FalseWeight) {
659 MDBuilder MDB(Context);
660 uint64_t Scale = calculateCountScale(MaxCount: std::max(a: TrueWeight, b: FalseWeight));
661 return MDB.createBranchWeights(TrueWeight: scaleBranchCount(Count: TrueWeight, Scale),
662 FalseWeight: scaleBranchCount(Count: FalseWeight, Scale));
663}
664
665CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
666 uint64_t Count, uint64_t TotalCount,
667 bool AttachProfToDirectCall,
668 OptimizationRemarkEmitter *ORE) {
669 CallBase &NewInst = promoteCallWithIfThenElse(
670 CB, Callee: DirectCallee,
671 BranchWeights: createBranchWeights(Context&: CB.getContext(), TrueWeight: Count, FalseWeight: TotalCount - Count));
672
673 if (AttachProfToDirectCall)
674 setFittedBranchWeights(I&: NewInst, Weights: {Count},
675 /*IsExpected=*/false);
676
677 using namespace ore;
678
679 if (ORE)
680 ORE->emit(RemarkBuilder: [&]() {
681 return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB)
682 << "Promote indirect call to " << NV("DirectCallee", DirectCallee)
683 << " with count " << NV("Count", Count) << " out of "
684 << NV("TotalCount", TotalCount);
685 });
686 return NewInst;
687}
688
689// Promote indirect-call to conditional direct-call for one callsite.
690bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
691 CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
692 uint64_t TotalCount, MutableArrayRef<InstrProfValueData> ICallProfDataRef,
693 uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts) {
694 uint32_t NumPromoted = 0;
695
696 for (const auto &C : Candidates) {
697 uint64_t FuncCount = C.Count;
698 pgo::promoteIndirectCall(CB, DirectCallee: C.TargetFunction, Count: FuncCount, TotalCount,
699 AttachProfToDirectCall: SamplePGO, ORE: &ORE);
700 assert(TotalCount >= FuncCount);
701 TotalCount -= FuncCount;
702 NumOfPGOICallPromotion++;
703 NumPromoted++;
704
705 // Update the count and this entry will be erased later.
706 ICallProfDataRef[C.Index].Count = 0;
707 if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty())
708 continue;
709
710 // After a virtual call candidate gets promoted, update the vtable's counts
711 // proportionally. Each vtable-guid in `C.VTableGUIDAndCounts` represents
712 // a vtable from which the virtual call is loaded. Compute the sum and use
713 // 128-bit APInt to improve accuracy.
714 uint64_t SumVTableCount = 0;
715 for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts)
716 SumVTableCount += VTableCount;
717
718 for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) {
719 APInt APFuncCount((unsigned)128, FuncCount, false /*signed*/);
720 APFuncCount *= VTableCount;
721 VTableGUIDCounts[GUID] -= APFuncCount.udiv(RHS: SumVTableCount).getZExtValue();
722 }
723 }
724 if (NumPromoted == 0)
725 return false;
726
727 assert(NumPromoted <= ICallProfDataRef.size() &&
728 "Number of promoted functions should not be greater than the number "
729 "of values in profile metadata");
730
731 updateFuncValueProfiles(CB, VDs: ICallProfDataRef, Sum: TotalCount, MaxMDCount: NumCandidates);
732 updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
733 return true;
734}
735
736void IndirectCallPromoter::updateFuncValueProfiles(
737 CallBase &CB, MutableArrayRef<InstrProfValueData> CallVDs,
738 uint64_t TotalCount, uint32_t MaxMDCount) {
739 // First clear the existing !prof.
740 CB.setMetadata(KindID: LLVMContext::MD_prof, Node: nullptr);
741
742 // Sort value profiles by count in descending order.
743 llvm::stable_sort(Range&: CallVDs, C: [](const InstrProfValueData &LHS,
744 const InstrProfValueData &RHS) {
745 return LHS.Count > RHS.Count;
746 });
747 // Drop the <target-value, count> pair if count is zero.
748 ArrayRef<InstrProfValueData> VDs(
749 CallVDs.begin(),
750 llvm::upper_bound(Range&: CallVDs, Value: 0U,
751 C: [](uint64_t Count, const InstrProfValueData &ProfData) {
752 return ProfData.Count <= Count;
753 }));
754
755 // Annotate the remaining value profiles if counter is not zero.
756 if (TotalCount != 0)
757 annotateValueSite(M, Inst&: CB, VDs, Sum: TotalCount, ValueKind: IPVK_IndirectCallTarget,
758 MaxMDCount);
759}
760
761void IndirectCallPromoter::updateVPtrValueProfiles(
762 Instruction *VPtr, VTableGUIDCountsMap &VTableGUIDCounts) {
763 if (!EnableVTableProfileUse || VPtr == nullptr ||
764 !VPtr->getMetadata(KindID: LLVMContext::MD_prof))
765 return;
766 VPtr->setMetadata(KindID: LLVMContext::MD_prof, Node: nullptr);
767 std::vector<InstrProfValueData> VTableValueProfiles;
768 uint64_t TotalVTableCount = 0;
769 for (auto [GUID, Count] : VTableGUIDCounts) {
770 if (Count == 0)
771 continue;
772
773 VTableValueProfiles.push_back(x: {.Value: GUID, .Count: Count});
774 TotalVTableCount += Count;
775 }
776 llvm::sort(C&: VTableValueProfiles,
777 Comp: [](const InstrProfValueData &LHS, const InstrProfValueData &RHS) {
778 return LHS.Count > RHS.Count;
779 });
780
781 annotateValueSite(M, Inst&: *VPtr, VDs: VTableValueProfiles, Sum: TotalVTableCount,
782 ValueKind: IPVK_VTableTarget, MaxMDCount: VTableValueProfiles.size());
783}
784
785bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
786 CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
787 uint64_t TotalFuncCount, uint32_t NumCandidates,
788 MutableArrayRef<InstrProfValueData> ICallProfDataRef,
789 VTableGUIDCountsMap &VTableGUIDCounts) {
790 SmallVector<std::pair<uint32_t, uint64_t>, 4> PromotedFuncCount;
791
792 for (const auto &Candidate : Candidates) {
793 for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts)
794 VTableGUIDCounts[GUID] -= Count;
795
796 // 'OriginalBB' is the basic block of indirect call. After each candidate
797 // is promoted, a new basic block is created for the indirect fallback basic
798 // block and indirect call `CB` is moved into this new BB.
799 BasicBlock *OriginalBB = CB.getParent();
800 promoteCallWithVTableCmp(
801 CB, VPtr, Callee: Candidate.TargetFunction, AddressPoints: Candidate.AddressPoints,
802 BranchWeights: createBranchWeights(Context&: CB.getContext(), TrueWeight: Candidate.Count,
803 FalseWeight: TotalFuncCount - Candidate.Count));
804
805 int SinkCount = tryToSinkInstructions(OriginalBB, IndirectCallBB: CB.getParent());
806
807 ORE.emit(RemarkBuilder: [&]() {
808 OptimizationRemark Remark(DEBUG_TYPE, "Promoted", &CB);
809
810 const auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts;
811 Remark << "Promote indirect call to "
812 << ore::NV("DirectCallee", Candidate.TargetFunction)
813 << " with count " << ore::NV("Count", Candidate.Count)
814 << " out of " << ore::NV("TotalCount", TotalFuncCount) << ", sink "
815 << ore::NV("SinkCount", SinkCount)
816 << " instruction(s) and compare "
817 << ore::NV("VTable", VTableGUIDAndCounts.size())
818 << " vtable(s): {";
819
820 // Sort GUIDs so remark message is deterministic.
821 std::set<uint64_t> GUIDSet;
822 for (auto [GUID, Count] : VTableGUIDAndCounts)
823 GUIDSet.insert(x: GUID);
824 for (auto Iter = GUIDSet.begin(); Iter != GUIDSet.end(); Iter++) {
825 if (Iter != GUIDSet.begin())
826 Remark << ", ";
827 Remark << ore::NV("VTable", Symtab->getGlobalVariable(MD5Hash: *Iter));
828 }
829
830 Remark << "}";
831
832 return Remark;
833 });
834
835 PromotedFuncCount.push_back(Elt: {Candidate.Index, Candidate.Count});
836
837 assert(TotalFuncCount >= Candidate.Count &&
838 "Within one prof metadata, total count is the sum of counts from "
839 "individual <target, count> pairs");
840 // Use std::min since 'TotalFuncCount' is the saturated sum of individual
841 // counts, see
842 // https://github.com/llvm/llvm-project/blob/abedb3b8356d5d56f1c575c4f7682fba2cb19787/llvm/lib/ProfileData/InstrProf.cpp#L1281-L1288
843 TotalFuncCount -= std::min(a: TotalFuncCount, b: Candidate.Count);
844 NumOfPGOICallPromotion++;
845 }
846
847 if (PromotedFuncCount.empty())
848 return false;
849
850 // Update value profiles for 'CB' and 'VPtr', assuming that each 'CB' has a
851 // a distinct 'VPtr'.
852 // FIXME: When Clang `-fstrict-vtable-pointers` is enabled, a vtable might be
853 // used to load multiple virtual functions. The vtable profiles needs to be
854 // updated properly in that case (e.g, for each indirect call annotate both
855 // type profiles and function profiles in one !prof).
856 for (size_t I = 0; I < PromotedFuncCount.size(); I++) {
857 uint32_t Index = PromotedFuncCount[I].first;
858 ICallProfDataRef[Index].Count -=
859 std::max(a: PromotedFuncCount[I].second, b: ICallProfDataRef[Index].Count);
860 }
861 updateFuncValueProfiles(CB, CallVDs: ICallProfDataRef, TotalCount: TotalFuncCount, MaxMDCount: NumCandidates);
862 updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
863 return true;
864}
865
866// Traverse all the indirect-call callsite and get the value profile
867// annotation to perform indirect-call promotion.
868bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
869 bool Changed = false;
870 ICallPromotionAnalysis ICallAnalysis;
871 for (auto *CB : findIndirectCalls(F)) {
872 uint32_t NumCandidates;
873 uint64_t TotalCount;
874 auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
875 I: CB, TotalCount, NumCandidates);
876 if (!NumCandidates)
877 continue;
878 if (PSI && PSI->hasProfileSummary()) {
879 // Don't promote cold candidates.
880 if (PSI->isColdCount(C: TotalCount)) {
881 LLVM_DEBUG(dbgs() << "Don't promote the cold candidate: TotalCount="
882 << TotalCount << "\n");
883 continue;
884 }
885 // Only pormote hot if ICPAllowHotOnly is true.
886 if (ICPAllowHotOnly && !PSI->isHotCount(C: TotalCount)) {
887 LLVM_DEBUG(dbgs() << "Don't promote the non-hot candidate: TotalCount="
888 << TotalCount << "\n");
889 continue;
890 }
891 }
892
893 auto PromotionCandidates = getPromotionCandidatesForCallSite(
894 CB: *CB, ValueDataRef: ICallProfDataRef, TotalCount, NumCandidates);
895
896 VTableGUIDCountsMap VTableGUIDCounts;
897 Instruction *VPtr =
898 computeVTableInfos(CB, GUIDCountsMap&: VTableGUIDCounts, Candidates&: PromotionCandidates);
899
900 if (isProfitableToCompareVTables(CB: *CB, Candidates: PromotionCandidates))
901 Changed |= tryToPromoteWithVTableCmp(CB&: *CB, VPtr, Candidates: PromotionCandidates,
902 TotalFuncCount: TotalCount, NumCandidates,
903 ICallProfDataRef, VTableGUIDCounts);
904 else
905 Changed |= tryToPromoteWithFuncCmp(CB&: *CB, VPtr, Candidates: PromotionCandidates,
906 TotalCount, ICallProfDataRef,
907 NumCandidates, VTableGUIDCounts);
908 }
909 return Changed;
910}
911
912// TODO: Return false if the function addressing and vtable load instructions
913// cannot sink to indirect fallback.
914bool IndirectCallPromoter::isProfitableToCompareVTables(
915 const CallBase &CB, ArrayRef<PromotionCandidate> Candidates) {
916 if (!EnableVTableProfileUse || Candidates.empty())
917 return false;
918 LLVM_DEBUG(dbgs() << "\nEvaluating vtable profitability for callsite #"
919 << NumOfPGOICallsites << CB << "\n");
920 const size_t CandidateSize = Candidates.size();
921 for (size_t I = 0; I < CandidateSize; I++) {
922 auto &Candidate = Candidates[I];
923 auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts;
924
925 LLVM_DEBUG({
926 dbgs() << " Candidate " << I << " FunctionCount: " << Candidate.Count
927 << ", VTableCounts:";
928 for (const auto &[GUID, Count] : VTableGUIDAndCounts)
929 dbgs() << " {" << Symtab->getGlobalVariable(GUID)->getName() << ", "
930 << Count << "}";
931 dbgs() << "\n";
932 });
933
934 uint64_t CandidateVTableCount = 0;
935
936 for (auto &[GUID, Count] : VTableGUIDAndCounts) {
937 CandidateVTableCount += Count;
938
939 if (shouldSkipVTable(VTableGUID: GUID))
940 return false;
941 }
942
943 if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) {
944 LLVM_DEBUG(
945 dbgs() << " function count " << Candidate.Count
946 << " and its vtable sum count " << CandidateVTableCount
947 << " have discrepancies. Bail out vtable comparison.\n");
948 return false;
949 }
950
951 // 'MaxNumVTable' limits the number of vtables to make vtable comparison
952 // profitable. Comparing multiple vtables for one function candidate will
953 // insert additional instructions on the hot path, and allowing more than
954 // one vtable for non last candidates may or may not elongate the dependency
955 // chain for the subsequent candidates. Set its value to 1 for non-last
956 // candidate and allow option to override it for the last candidate.
957 int MaxNumVTable = 1;
958 if (I == CandidateSize - 1)
959 MaxNumVTable = ICPMaxNumVTableLastCandidate;
960
961 if ((int)Candidate.AddressPoints.size() > MaxNumVTable) {
962 LLVM_DEBUG(dbgs() << " allow at most " << MaxNumVTable << " and got "
963 << Candidate.AddressPoints.size()
964 << " vtables. Bail out for vtable comparison.\n");
965 return false;
966 }
967 }
968
969 return true;
970}
971
972bool IndirectCallPromoter::shouldSkipVTable(uint64_t VTableGUID) {
973 if (IgnoredBaseTypes.empty())
974 return false;
975
976 auto *VTableVar = Symtab->getGlobalVariable(MD5Hash: VTableGUID);
977
978 assert(VTableVar && "VTableVar must exist for GUID in VTableGUIDAndCounts");
979
980 SmallVector<MDNode *, 2> Types;
981 VTableVar->getMetadata(KindID: LLVMContext::MD_type, MDs&: Types);
982
983 for (auto *Type : Types)
984 if (auto *TypeId = dyn_cast<MDString>(Val: Type->getOperand(I: 1).get()))
985 if (IgnoredBaseTypes.contains(V: TypeId->getString())) {
986 LLVM_DEBUG(dbgs() << " vtable profiles should be ignored. Bail "
987 "out of vtable comparison.");
988 return true;
989 }
990 return false;
991}
992
993// For virtual calls in the module, collect per-callsite information which will
994// be used to associate an ICP candidate with a vtable and a specific function
995// in the vtable. With type intrinsics (llvm.type.test), we can find virtual
996// calls in a compile-time efficient manner (by iterating its users) and more
997// importantly use the compatible type later to figure out the function byte
998// offset relative to the start of vtables.
999static void
1000computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM,
1001 VirtualCallSiteTypeInfoMap &VirtualCSInfo) {
1002 // Right now only llvm.type.test is used to find out virtual call sites.
1003 // With ThinLTO and whole-program-devirtualization, llvm.type.test and
1004 // llvm.public.type.test are emitted, and llvm.public.type.test is either
1005 // refined to llvm.type.test or dropped before indirect-call-promotion pass.
1006 //
1007 // FIXME: For fullLTO with VFE, `llvm.type.checked.load intrinsic` is emitted.
1008 // Find out virtual calls by looking at users of llvm.type.checked.load in
1009 // that case.
1010 Function *TypeTestFunc =
1011 Intrinsic::getDeclarationIfExists(M: &M, id: Intrinsic::type_test);
1012 if (!TypeTestFunc || TypeTestFunc->use_empty())
1013 return;
1014
1015 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
1016 auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
1017 return FAM.getResult<DominatorTreeAnalysis>(IR&: F);
1018 };
1019 // Iterate all type.test calls to find all indirect calls.
1020 for (Use &U : llvm::make_early_inc_range(Range: TypeTestFunc->uses())) {
1021 auto *CI = dyn_cast<CallInst>(Val: U.getUser());
1022 if (!CI)
1023 continue;
1024 auto *TypeMDVal = cast<MetadataAsValue>(Val: CI->getArgOperand(i: 1));
1025 if (!TypeMDVal)
1026 continue;
1027 auto *CompatibleTypeId = dyn_cast<MDString>(Val: TypeMDVal->getMetadata());
1028 if (!CompatibleTypeId)
1029 continue;
1030
1031 // Find out all devirtualizable call sites given a llvm.type.test
1032 // intrinsic call.
1033 SmallVector<DevirtCallSite, 1> DevirtCalls;
1034 SmallVector<CallInst *, 1> Assumes;
1035 auto &DT = LookupDomTree(*CI->getFunction());
1036 findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
1037
1038 for (auto &DevirtCall : DevirtCalls) {
1039 CallBase &CB = DevirtCall.CB;
1040 // Given an indirect call, try find the instruction which loads a
1041 // pointer to virtual table.
1042 Instruction *VTablePtr =
1043 PGOIndirectCallVisitor::tryGetVTableInstruction(CB: &CB);
1044 if (!VTablePtr)
1045 continue;
1046 VirtualCSInfo[&CB] = {.FunctionOffset: DevirtCall.Offset, .VPtr: VTablePtr,
1047 .CompatibleTypeStr: CompatibleTypeId->getString()};
1048 }
1049 }
1050}
1051
1052// A wrapper function that does the actual work.
1053static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
1054 bool SamplePGO, ModuleAnalysisManager &MAM) {
1055 if (DisableICP)
1056 return false;
1057 InstrProfSymtab Symtab;
1058 if (Error E = Symtab.create(M, InLTO)) {
1059 std::string SymtabFailure = toString(E: std::move(E));
1060 M.getContext().emitError(ErrorStr: "Failed to create symtab: " + SymtabFailure);
1061 return false;
1062 }
1063 bool Changed = false;
1064 VirtualCallSiteTypeInfoMap VirtualCSInfo;
1065
1066 DenseSet<StringRef> IgnoredBaseTypes;
1067
1068 if (EnableVTableProfileUse) {
1069 computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo);
1070
1071 IgnoredBaseTypes.insert_range(R&: ICPIgnoredBaseTypes);
1072 }
1073
1074 // VTableAddressPointOffsetVal stores the vtable address points. The vtable
1075 // address point of a given <vtable, address point offset> is static (doesn't
1076 // change after being computed once).
1077 // IndirectCallPromoter::getOrCreateVTableAddressPointVar creates the map
1078 // entry the first time a <vtable, offset> pair is seen, as
1079 // promoteIndirectCalls processes an IR module and calls IndirectCallPromoter
1080 // repeatedly on each function.
1081 VTableAddressPointOffsetValMap VTableAddressPointOffsetVal;
1082
1083 for (auto &F : M) {
1084 if (F.isDeclaration() || F.hasOptNone())
1085 continue;
1086
1087 auto &FAM =
1088 MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
1089 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
1090
1091 IndirectCallPromoter CallPromoter(F, M, &Symtab, SamplePGO, VirtualCSInfo,
1092 VTableAddressPointOffsetVal,
1093 IgnoredBaseTypes, ORE);
1094 bool FuncChanged = CallPromoter.processFunction(PSI);
1095 if (ICPDUMPAFTER && FuncChanged) {
1096 LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
1097 LLVM_DEBUG(dbgs() << "\n");
1098 }
1099 Changed |= FuncChanged;
1100 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
1101 LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n");
1102 break;
1103 }
1104 }
1105 return Changed;
1106}
1107
1108PreservedAnalyses PGOIndirectCallPromotion::run(Module &M,
1109 ModuleAnalysisManager &MAM) {
1110 ProfileSummaryInfo *PSI = &MAM.getResult<ProfileSummaryAnalysis>(IR&: M);
1111
1112 if (!promoteIndirectCalls(M, PSI, InLTO: InLTO | ICPLTOMode,
1113 SamplePGO: SamplePGO | ICPSamplePGOMode, MAM))
1114 return PreservedAnalyses::all();
1115
1116 return PreservedAnalyses::none();
1117}
1118