1//===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass lowers instrprof_* intrinsics emitted by an instrumentor.
10// It also builds the data structures and initialization code needed for
11// updating execution counts and emitting the profile at runtime.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/ADT/Twine.h"
21#include "llvm/Analysis/BlockFrequencyInfo.h"
22#include "llvm/Analysis/BranchProbabilityInfo.h"
23#include "llvm/Analysis/CFG.h"
24#include "llvm/Analysis/LoopInfo.h"
25#include "llvm/Analysis/TargetLibraryInfo.h"
26#include "llvm/IR/Attributes.h"
27#include "llvm/IR/BasicBlock.h"
28#include "llvm/IR/CFG.h"
29#include "llvm/IR/Constant.h"
30#include "llvm/IR/Constants.h"
31#include "llvm/IR/DIBuilder.h"
32#include "llvm/IR/DerivedTypes.h"
33#include "llvm/IR/DiagnosticInfo.h"
34#include "llvm/IR/Dominators.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/GlobalValue.h"
37#include "llvm/IR/GlobalVariable.h"
38#include "llvm/IR/IRBuilder.h"
39#include "llvm/IR/Instruction.h"
40#include "llvm/IR/Instructions.h"
41#include "llvm/IR/IntrinsicInst.h"
42#include "llvm/IR/MDBuilder.h"
43#include "llvm/IR/Module.h"
44#include "llvm/IR/Type.h"
45#include "llvm/Pass.h"
46#include "llvm/ProfileData/InstrProf.h"
47#include "llvm/ProfileData/InstrProfCorrelator.h"
48#include "llvm/Support/Casting.h"
49#include "llvm/Support/CommandLine.h"
50#include "llvm/Support/Compiler.h"
51#include "llvm/Support/Error.h"
52#include "llvm/Support/ErrorHandling.h"
53#include "llvm/TargetParser/Triple.h"
54#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
55#include "llvm/Transforms/Utils/BasicBlockUtils.h"
56#include "llvm/Transforms/Utils/Instrumentation.h"
57#include "llvm/Transforms/Utils/ModuleUtils.h"
58#include "llvm/Transforms/Utils/SSAUpdater.h"
59#include <algorithm>
60#include <cassert>
61#include <cstdint>
62#include <string>
63
64using namespace llvm;
65
66#define DEBUG_TYPE "instrprof"
67
68namespace llvm {
69// Command line option to enable vtable value profiling. Defined in
70// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
71extern cl::opt<bool> EnableVTableValueProfiling;
72LLVM_ABI cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate(
73 "profile-correlate",
74 cl::desc("Use debug info or binary file to correlate profiles."),
75 cl::init(Val: InstrProfCorrelator::NONE),
76 cl::values(clEnumValN(InstrProfCorrelator::NONE, "",
77 "No profile correlation"),
78 clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info",
79 "Use debug info to correlate"),
80 clEnumValN(InstrProfCorrelator::BINARY, "binary",
81 "Use binary to correlate")));
82} // namespace llvm
83
84namespace {
85
86cl::opt<bool> DoHashBasedCounterSplit(
87 "hash-based-counter-split",
88 cl::desc("Rename counter variable of a comdat function based on cfg hash"),
89 cl::init(Val: true));
90
91cl::opt<bool>
92 RuntimeCounterRelocation("runtime-counter-relocation",
93 cl::desc("Enable relocating counters at runtime."),
94 cl::init(Val: false));
95
96cl::opt<bool> ValueProfileStaticAlloc(
97 "vp-static-alloc",
98 cl::desc("Do static counter allocation for value profiler"),
99 cl::init(Val: true));
100
101cl::opt<double> NumCountersPerValueSite(
102 "vp-counters-per-site",
103 cl::desc("The average number of profile counters allocated "
104 "per value profiling site."),
105 // This is set to a very small value because in real programs, only
106 // a very small percentage of value sites have non-zero targets, e.g, 1/30.
107 // For those sites with non-zero profile, the average number of targets
108 // is usually smaller than 2.
109 cl::init(Val: 1.0));
110
111cl::opt<bool> AtomicCounterUpdateAll(
112 "instrprof-atomic-counter-update-all",
113 cl::desc("Make all profile counter updates atomic (for testing only)"),
114 cl::init(Val: false));
115
116cl::opt<bool> AtomicCounterUpdatePromoted(
117 "atomic-counter-update-promoted",
118 cl::desc("Do counter update using atomic fetch add "
119 " for promoted counters only"),
120 cl::init(Val: false));
121
122cl::opt<bool> AtomicFirstCounter(
123 "atomic-first-counter",
124 cl::desc("Use atomic fetch add for first counter in a function (usually "
125 "the entry counter)"),
126 cl::init(Val: false));
127
128cl::opt<bool> ConditionalCounterUpdate(
129 "conditional-counter-update",
130 cl::desc("Do conditional counter updates in single byte counters mode)"),
131 cl::init(Val: false));
132
133// If the option is not specified, the default behavior about whether
134// counter promotion is done depends on how instrumentation lowering
135// pipeline is setup, i.e., the default value of true of this option
136// does not mean the promotion will be done by default. Explicitly
137// setting this option can override the default behavior.
138cl::opt<bool> DoCounterPromotion("do-counter-promotion",
139 cl::desc("Do counter register promotion"),
140 cl::init(Val: false));
141cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
142 "max-counter-promotions-per-loop", cl::init(Val: 20),
143 cl::desc("Max number counter promotions per loop to avoid"
144 " increasing register pressure too much"));
145
146// A debug option
147cl::opt<int>
148 MaxNumOfPromotions("max-counter-promotions", cl::init(Val: -1),
149 cl::desc("Max number of allowed counter promotions"));
150
151cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
152 "speculative-counter-promotion-max-exiting", cl::init(Val: 3),
153 cl::desc("The max number of exiting blocks of a loop to allow "
154 " speculative counter promotion"));
155
156cl::opt<bool> SpeculativeCounterPromotionToLoop(
157 "speculative-counter-promotion-to-loop",
158 cl::desc("When the option is false, if the target block is in a loop, "
159 "the promotion will be disallowed unless the promoted counter "
160 " update can be further/iteratively promoted into an acyclic "
161 " region."));
162
163cl::opt<bool> IterativeCounterPromotion(
164 "iterative-counter-promotion", cl::init(Val: true),
165 cl::desc("Allow counter promotion across the whole loop nest."));
166
167cl::opt<bool> SkipRetExitBlock(
168 "skip-ret-exit-block", cl::init(Val: true),
169 cl::desc("Suppress counter promotion if exit blocks contain ret."));
170
171static cl::opt<bool> SampledInstr("sampled-instrumentation",
172 cl::desc("Do PGO instrumentation sampling"));
173
174static cl::opt<unsigned> SampledInstrPeriod(
175 "sampled-instr-period",
176 cl::desc("Set the profile instrumentation sample period. A sample period "
177 "of 0 is invalid. For each sample period, a fixed number of "
178 "consecutive samples will be recorded. The number is controlled "
179 "by 'sampled-instr-burst-duration' flag. The default sample "
180 "period of 65536 is optimized for generating efficient code that "
181 "leverages unsigned short integer wrapping in overflow, but this "
182 "is disabled under simple sampling (burst duration = 1)."),
183 cl::init(USHRT_MAX + 1));
184
185static cl::opt<unsigned> SampledInstrBurstDuration(
186 "sampled-instr-burst-duration",
187 cl::desc("Set the profile instrumentation burst duration, which can range "
188 "from 1 to the value of 'sampled-instr-period' (0 is invalid). "
189 "This number of samples will be recorded for each "
190 "'sampled-instr-period' count update. Setting to 1 enables simple "
191 "sampling, in which case it is recommended to set "
192 "'sampled-instr-period' to a prime number."),
193 cl::init(Val: 200));
194
195struct SampledInstrumentationConfig {
196 unsigned BurstDuration;
197 unsigned Period;
198 bool UseShort;
199 bool IsSimpleSampling;
200 bool IsFastSampling;
201};
202
203static SampledInstrumentationConfig getSampledInstrumentationConfig() {
204 SampledInstrumentationConfig config;
205 config.BurstDuration = SampledInstrBurstDuration.getValue();
206 config.Period = SampledInstrPeriod.getValue();
207 if (config.BurstDuration > config.Period)
208 report_fatal_error(
209 reason: "SampledBurstDuration must be less than or equal to SampledPeriod");
210 if (config.Period == 0 || config.BurstDuration == 0)
211 report_fatal_error(
212 reason: "SampledPeriod and SampledBurstDuration must be greater than 0");
213 config.IsSimpleSampling = (config.BurstDuration == 1);
214 // If (BurstDuration == 1 && Period == 65536), generate the simple sampling
215 // style code.
216 config.IsFastSampling =
217 (!config.IsSimpleSampling && config.Period == USHRT_MAX + 1);
218 config.UseShort = (config.Period <= USHRT_MAX) || config.IsFastSampling;
219 return config;
220}
221
222using LoadStorePair = std::pair<Instruction *, Instruction *>;
223
224static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) {
225 auto *MD = dyn_cast_or_null<ConstantAsMetadata>(Val: M.getModuleFlag(Key: Flag));
226 if (!MD)
227 return 0;
228
229 // If the flag is a ConstantAsMetadata, it should be an integer representable
230 // in 64-bits.
231 return cast<ConstantInt>(Val: MD->getValue())->getZExtValue();
232}
233
234static bool enablesValueProfiling(const Module &M) {
235 return isIRPGOFlagSet(M: &M) ||
236 getIntModuleFlagOrZero(M, Flag: "EnableValueProfiling") != 0;
237}
238
239// Conservatively returns true if value profiling is enabled.
240static bool profDataReferencedByCode(const Module &M) {
241 return enablesValueProfiling(M);
242}
243
244class InstrLowerer final {
245public:
246 InstrLowerer(Module &M, const InstrProfOptions &Options,
247 std::function<const TargetLibraryInfo &(Function &F)> GetTLI,
248 bool IsCS)
249 : M(M), Options(Options), TT(M.getTargetTriple()), IsCS(IsCS),
250 GetTLI(GetTLI), DataReferencedByCode(profDataReferencedByCode(M)) {}
251
252 bool lower();
253
254private:
255 Module &M;
256 const InstrProfOptions Options;
257 const Triple TT;
258 // Is this lowering for the context-sensitive instrumentation.
259 const bool IsCS;
260
261 std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
262
263 const bool DataReferencedByCode;
264
265 struct PerFunctionProfileData {
266 uint32_t NumValueSites[IPVK_Last + 1] = {};
267 GlobalVariable *RegionCounters = nullptr;
268 GlobalVariable *DataVar = nullptr;
269 GlobalVariable *RegionBitmaps = nullptr;
270 uint32_t NumBitmapBytes = 0;
271
272 PerFunctionProfileData() = default;
273 };
274 DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
275 // Key is virtual table variable, value is 'VTableProfData' in the form of
276 // GlobalVariable.
277 DenseMap<GlobalVariable *, GlobalVariable *> VTableDataMap;
278 /// If runtime relocation is enabled, this maps functions to the load
279 /// instruction that produces the profile relocation bias.
280 DenseMap<const Function *, LoadInst *> FunctionToProfileBiasMap;
281 std::vector<GlobalValue *> CompilerUsedVars;
282 std::vector<GlobalValue *> UsedVars;
283 std::vector<GlobalVariable *> ReferencedNames;
284 // The list of virtual table variables of which the VTableProfData is
285 // collected.
286 std::vector<GlobalVariable *> ReferencedVTables;
287 GlobalVariable *NamesVar = nullptr;
288 size_t NamesSize = 0;
289
290 // vector of counter load/store pairs to be register promoted.
291 std::vector<LoadStorePair> PromotionCandidates;
292
293 int64_t TotalCountersPromoted = 0;
294
295 /// Lower instrumentation intrinsics in the function. Returns true if there
296 /// any lowering.
297 bool lowerIntrinsics(Function *F);
298
299 /// Register-promote counter loads and stores in loops.
300 void promoteCounterLoadStores(Function *F);
301
302 /// Returns true if relocating counters at runtime is enabled.
303 bool isRuntimeCounterRelocationEnabled() const;
304
305 /// Returns true if profile counter update register promotion is enabled.
306 bool isCounterPromotionEnabled() const;
307
308 /// Return true if profile sampling is enabled.
309 bool isSamplingEnabled() const;
310
311 /// Count the number of instrumented value sites for the function.
312 void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins);
313
314 /// Replace instrprof.value.profile with a call to runtime library.
315 void lowerValueProfileInst(InstrProfValueProfileInst *Ins);
316
317 /// Replace instrprof.cover with a store instruction to the coverage byte.
318 void lowerCover(InstrProfCoverInst *Inc);
319
320 /// Replace instrprof.timestamp with a call to
321 /// INSTR_PROF_PROFILE_SET_TIMESTAMP.
322 void lowerTimestamp(InstrProfTimestampInst *TimestampInstruction);
323
324 /// Replace instrprof.increment with an increment of the appropriate value.
325 void lowerIncrement(InstrProfIncrementInst *Inc);
326
327 /// Force emitting of name vars for unused functions.
328 void lowerCoverageData(GlobalVariable *CoverageNamesVar);
329
330 /// Replace instrprof.mcdc.tvbitmask.update with a shift and or instruction
331 /// using the index represented by the a temp value into a bitmap.
332 void lowerMCDCTestVectorBitmapUpdate(InstrProfMCDCTVBitmapUpdate *Ins);
333
334 /// Get the Bias value for data to access mmap-ed area.
335 /// Create it if it hasn't been seen.
336 GlobalVariable *getOrCreateBiasVar(StringRef VarName);
337
338 /// Compute the address of the counter value that this profiling instruction
339 /// acts on.
340 Value *getCounterAddress(InstrProfCntrInstBase *I);
341
342 /// Lower the incremental instructions under profile sampling predicates.
343 void doSampling(Instruction *I);
344
345 /// Get the region counters for an increment, creating them if necessary.
346 ///
347 /// If the counter array doesn't yet exist, the profile data variables
348 /// referring to them will also be created.
349 GlobalVariable *getOrCreateRegionCounters(InstrProfCntrInstBase *Inc);
350
351 /// Create the region counters.
352 GlobalVariable *createRegionCounters(InstrProfCntrInstBase *Inc,
353 StringRef Name,
354 GlobalValue::LinkageTypes Linkage);
355
356 /// Compute the address of the test vector bitmap that this profiling
357 /// instruction acts on.
358 Value *getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I);
359
360 /// Get the region bitmaps for an increment, creating them if necessary.
361 ///
362 /// If the bitmap array doesn't yet exist, the profile data variables
363 /// referring to them will also be created.
364 GlobalVariable *getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc);
365
366 /// Create the MC/DC bitmap as a byte-aligned array of bytes associated with
367 /// an MC/DC Decision region. The number of bytes required is indicated by
368 /// the intrinsic used (type InstrProfMCDCBitmapInstBase). This is called
369 /// as part of setupProfileSection() and is conceptually very similar to
370 /// what is done for profile data counters in createRegionCounters().
371 GlobalVariable *createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
372 StringRef Name,
373 GlobalValue::LinkageTypes Linkage);
374
375 /// Set Comdat property of GV, if required.
376 void maybeSetComdat(GlobalVariable *GV, GlobalObject *GO, StringRef VarName);
377
378 /// Setup the sections into which counters and bitmaps are allocated.
379 GlobalVariable *setupProfileSection(InstrProfInstBase *Inc,
380 InstrProfSectKind IPSK);
381
382 /// Create INSTR_PROF_DATA variable for counters and bitmaps.
383 void createDataVariable(InstrProfCntrInstBase *Inc);
384
385 /// Get the counters for virtual table values, creating them if necessary.
386 void getOrCreateVTableProfData(GlobalVariable *GV);
387
388 /// Emit the section with compressed function names.
389 void emitNameData();
390
391 /// Emit the section with compressed vtable names.
392 void emitVTableNames();
393
394 /// Emit value nodes section for value profiling.
395 void emitVNodes();
396
397 /// Emit runtime registration functions for each profile data variable.
398 void emitRegistration();
399
400 /// Emit the necessary plumbing to pull in the runtime initialization.
401 /// Returns true if a change was made.
402 bool emitRuntimeHook();
403
404 /// Add uses of our data variables and runtime hook.
405 void emitUses();
406
407 /// Create a static initializer for our data, on platforms that need it,
408 /// and for any profile output file that was specified.
409 void emitInitialization();
410};
411
412///
413/// A helper class to promote one counter RMW operation in the loop
414/// into register update.
415///
416/// RWM update for the counter will be sinked out of the loop after
417/// the transformation.
418///
419class PGOCounterPromoterHelper : public LoadAndStorePromoter {
420public:
421 PGOCounterPromoterHelper(
422 Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
423 BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
424 ArrayRef<Instruction *> InsertPts,
425 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
426 LoopInfo &LI)
427 : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
428 InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
429 assert(isa<LoadInst>(L));
430 assert(isa<StoreInst>(S));
431 SSA.AddAvailableValue(BB: PH, V: Init);
432 }
433
434 void doExtraRewritesBeforeFinalDeletion() override {
435 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
436 BasicBlock *ExitBlock = ExitBlocks[i];
437 Instruction *InsertPos = InsertPts[i];
438 // Get LiveIn value into the ExitBlock. If there are multiple
439 // predecessors, the value is defined by a PHI node in this
440 // block.
441 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(BB: ExitBlock);
442 Value *Addr = cast<StoreInst>(Val: Store)->getPointerOperand();
443 Type *Ty = LiveInValue->getType();
444 IRBuilder<> Builder(InsertPos);
445 if (auto *AddrInst = dyn_cast_or_null<IntToPtrInst>(Val: Addr)) {
446 // If isRuntimeCounterRelocationEnabled() is true then the address of
447 // the store instruction is computed with two instructions in
448 // InstrProfiling::getCounterAddress(). We need to copy those
449 // instructions to this block to compute Addr correctly.
450 // %BiasAdd = add i64 ptrtoint <__profc_>, <__llvm_profile_counter_bias>
451 // %Addr = inttoptr i64 %BiasAdd to i64*
452 auto *OrigBiasInst = dyn_cast<BinaryOperator>(Val: AddrInst->getOperand(i_nocapture: 0));
453 assert(OrigBiasInst->getOpcode() == Instruction::BinaryOps::Add);
454 Value *BiasInst = Builder.Insert(I: OrigBiasInst->clone());
455 Addr = Builder.CreateIntToPtr(V: BiasInst,
456 DestTy: PointerType::getUnqual(C&: Ty->getContext()));
457 }
458 if (AtomicCounterUpdatePromoted)
459 // automic update currently can only be promoted across the current
460 // loop, not the whole loop nest.
461 Builder.CreateAtomicRMW(Op: AtomicRMWInst::Add, Ptr: Addr, Val: LiveInValue,
462 Align: MaybeAlign(),
463 Ordering: AtomicOrdering::SequentiallyConsistent);
464 else {
465 LoadInst *OldVal = Builder.CreateLoad(Ty, Ptr: Addr, Name: "pgocount.promoted");
466 auto *NewVal = Builder.CreateAdd(LHS: OldVal, RHS: LiveInValue);
467 auto *NewStore = Builder.CreateStore(Val: NewVal, Ptr: Addr);
468
469 // Now update the parent loop's candidate list:
470 if (IterativeCounterPromotion) {
471 auto *TargetLoop = LI.getLoopFor(BB: ExitBlock);
472 if (TargetLoop)
473 LoopToCandidates[TargetLoop].emplace_back(Args&: OldVal, Args&: NewStore);
474 }
475 }
476 }
477 }
478
479private:
480 Instruction *Store;
481 ArrayRef<BasicBlock *> ExitBlocks;
482 ArrayRef<Instruction *> InsertPts;
483 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
484 LoopInfo &LI;
485};
486
487/// A helper class to do register promotion for all profile counter
488/// updates in a loop.
489///
490class PGOCounterPromoter {
491public:
492 PGOCounterPromoter(
493 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
494 Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI)
495 : LoopToCandidates(LoopToCands), L(CurLoop), LI(LI), BFI(BFI) {
496
497 // Skip collection of ExitBlocks and InsertPts for loops that will not be
498 // able to have counters promoted.
499 SmallVector<BasicBlock *, 8> LoopExitBlocks;
500 SmallPtrSet<BasicBlock *, 8> BlockSet;
501
502 L.getExitBlocks(ExitBlocks&: LoopExitBlocks);
503 if (!isPromotionPossible(LP: &L, LoopExitBlocks))
504 return;
505
506 for (BasicBlock *ExitBlock : LoopExitBlocks) {
507 if (BlockSet.insert(Ptr: ExitBlock).second &&
508 llvm::none_of(Range: predecessors(BB: ExitBlock), P: [&](const BasicBlock *Pred) {
509 return llvm::isPresplitCoroSuspendExitEdge(Src: *Pred, Dest: *ExitBlock);
510 })) {
511 ExitBlocks.push_back(Elt: ExitBlock);
512 InsertPts.push_back(Elt: &*ExitBlock->getFirstInsertionPt());
513 }
514 }
515 }
516
517 bool run(int64_t *NumPromoted) {
518 // Skip 'infinite' loops:
519 if (ExitBlocks.size() == 0)
520 return false;
521
522 // Skip if any of the ExitBlocks contains a ret instruction.
523 // This is to prevent dumping of incomplete profile -- if the
524 // the loop is a long running loop and dump is called in the middle
525 // of the loop, the result profile is incomplete.
526 // FIXME: add other heuristics to detect long running loops.
527 if (SkipRetExitBlock) {
528 for (auto *BB : ExitBlocks)
529 if (isa<ReturnInst>(Val: BB->getTerminator()))
530 return false;
531 }
532
533 unsigned MaxProm = getMaxNumOfPromotionsInLoop(LP: &L);
534 if (MaxProm == 0)
535 return false;
536
537 unsigned Promoted = 0;
538 for (auto &Cand : LoopToCandidates[&L]) {
539
540 SmallVector<PHINode *, 4> NewPHIs;
541 SSAUpdater SSA(&NewPHIs);
542 Value *InitVal = ConstantInt::get(Ty: Cand.first->getType(), V: 0);
543
544 // If BFI is set, we will use it to guide the promotions.
545 if (BFI) {
546 auto *BB = Cand.first->getParent();
547 auto InstrCount = BFI->getBlockProfileCount(BB);
548 if (!InstrCount)
549 continue;
550 auto PreheaderCount = BFI->getBlockProfileCount(BB: L.getLoopPreheader());
551 // If the average loop trip count is not greater than 1.5, we skip
552 // promotion.
553 if (PreheaderCount && (*PreheaderCount * 3) >= (*InstrCount * 2))
554 continue;
555 }
556
557 PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
558 L.getLoopPreheader(), ExitBlocks,
559 InsertPts, LoopToCandidates, LI);
560 Promoter.run(Insts: SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
561 Promoted++;
562 if (Promoted >= MaxProm)
563 break;
564
565 (*NumPromoted)++;
566 if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
567 break;
568 }
569
570 LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
571 << L.getLoopDepth() << ")\n");
572 return Promoted != 0;
573 }
574
575private:
576 bool allowSpeculativeCounterPromotion(Loop *LP) {
577 SmallVector<BasicBlock *, 8> ExitingBlocks;
578 L.getExitingBlocks(ExitingBlocks);
579 // Not considierered speculative.
580 if (ExitingBlocks.size() == 1)
581 return true;
582 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
583 return false;
584 return true;
585 }
586
587 // Check whether the loop satisfies the basic conditions needed to perform
588 // Counter Promotions.
589 bool
590 isPromotionPossible(Loop *LP,
591 const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) {
592 // We can't insert into a catchswitch.
593 if (llvm::any_of(Range: LoopExitBlocks, P: [](BasicBlock *Exit) {
594 return isa<CatchSwitchInst>(Val: Exit->getTerminator());
595 }))
596 return false;
597
598 if (!LP->hasDedicatedExits())
599 return false;
600
601 BasicBlock *PH = LP->getLoopPreheader();
602 if (!PH)
603 return false;
604
605 return true;
606 }
607
608 // Returns the max number of Counter Promotions for LP.
609 unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
610 SmallVector<BasicBlock *, 8> LoopExitBlocks;
611 LP->getExitBlocks(ExitBlocks&: LoopExitBlocks);
612 if (!isPromotionPossible(LP, LoopExitBlocks))
613 return 0;
614
615 SmallVector<BasicBlock *, 8> ExitingBlocks;
616 LP->getExitingBlocks(ExitingBlocks);
617
618 // If BFI is set, we do more aggressive promotions based on BFI.
619 if (BFI)
620 return (unsigned)-1;
621
622 // Not considierered speculative.
623 if (ExitingBlocks.size() == 1)
624 return MaxNumOfPromotionsPerLoop;
625
626 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
627 return 0;
628
629 // Whether the target block is in a loop does not matter:
630 if (SpeculativeCounterPromotionToLoop)
631 return MaxNumOfPromotionsPerLoop;
632
633 // Now check the target block:
634 unsigned MaxProm = MaxNumOfPromotionsPerLoop;
635 for (auto *TargetBlock : LoopExitBlocks) {
636 auto *TargetLoop = LI.getLoopFor(BB: TargetBlock);
637 if (!TargetLoop)
638 continue;
639 unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(LP: TargetLoop);
640 unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
641 MaxProm =
642 std::min(a: MaxProm, b: std::max(a: MaxPromForTarget, b: PendingCandsInTarget) -
643 PendingCandsInTarget);
644 }
645 return MaxProm;
646 }
647
648 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
649 SmallVector<BasicBlock *, 8> ExitBlocks;
650 SmallVector<Instruction *, 8> InsertPts;
651 Loop &L;
652 LoopInfo &LI;
653 BlockFrequencyInfo *BFI;
654};
655
656enum class ValueProfilingCallType {
657 // Individual values are tracked. Currently used for indiret call target
658 // profiling.
659 Default,
660
661 // MemOp: the memop size value profiling.
662 MemOp
663};
664
665} // end anonymous namespace
666
667PreservedAnalyses InstrProfilingLoweringPass::run(Module &M,
668 ModuleAnalysisManager &AM) {
669 FunctionAnalysisManager &FAM =
670 AM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
671 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
672 return FAM.getResult<TargetLibraryAnalysis>(IR&: F);
673 };
674 InstrLowerer Lowerer(M, Options, GetTLI, IsCS);
675 if (!Lowerer.lower())
676 return PreservedAnalyses::all();
677
678 return PreservedAnalyses::none();
679}
680
681//
682// Perform instrumentation sampling.
683//
684// There are 3 favors of sampling:
685// (1) Full burst sampling: We transform:
686// Increment_Instruction;
687// to:
688// if (__llvm_profile_sampling__ <= SampledInstrBurstDuration - 1) {
689// Increment_Instruction;
690// }
691// __llvm_profile_sampling__ += 1;
692// if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
693// __llvm_profile_sampling__ = 0;
694// }
695//
696// "__llvm_profile_sampling__" is a thread-local global shared by all PGO
697// counters (value-instrumentation and edge instrumentation).
698//
699// (2) Fast burst sampling:
700// "__llvm_profile_sampling__" variable is an unsigned type, meaning it will
701// wrap around to zero when overflows. In this case, the second check is
702// unnecessary, so we won't generate check2 when the SampledInstrPeriod is
703// set to 65536 (64K). The code after:
704// if (__llvm_profile_sampling__ <= SampledInstrBurstDuration - 1) {
705// Increment_Instruction;
706// }
707// __llvm_profile_sampling__ += 1;
708//
709// (3) Simple sampling:
710// When SampledInstrBurstDuration is set to 1, we do a simple sampling:
711// __llvm_profile_sampling__ += 1;
712// if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
713// __llvm_profile_sampling__ = 0;
714// Increment_Instruction;
715// }
716//
717// Note that, the code snippet after the transformation can still be counter
718// promoted. However, with sampling enabled, counter updates are expected to
719// be infrequent, making the benefits of counter promotion negligible.
720// Moreover, counter promotion can potentially cause issues in server
721// applications, particularly when the counters are dumped without a clean
722// exit. To mitigate this risk, counter promotion is disabled by default when
723// sampling is enabled. This behavior can be overridden using the internal
724// option.
725void InstrLowerer::doSampling(Instruction *I) {
726 if (!isSamplingEnabled())
727 return;
728
729 SampledInstrumentationConfig config = getSampledInstrumentationConfig();
730 auto GetConstant = [&config](IRBuilder<> &Builder, uint32_t C) {
731 if (config.UseShort)
732 return Builder.getInt16(C);
733 else
734 return Builder.getInt32(C);
735 };
736
737 IntegerType *SamplingVarTy;
738 if (config.UseShort)
739 SamplingVarTy = Type::getInt16Ty(C&: M.getContext());
740 else
741 SamplingVarTy = Type::getInt32Ty(C&: M.getContext());
742 auto *SamplingVar =
743 M.getGlobalVariable(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR));
744 assert(SamplingVar && "SamplingVar not set properly");
745
746 // Create the condition for checking the burst duration.
747 Instruction *SamplingVarIncr;
748 Value *NewSamplingVarVal;
749 MDBuilder MDB(I->getContext());
750 MDNode *BranchWeight;
751 IRBuilder<> CondBuilder(I);
752 auto *LoadSamplingVar = CondBuilder.CreateLoad(Ty: SamplingVarTy, Ptr: SamplingVar);
753 if (config.IsSimpleSampling) {
754 // For the simple sampling, just create the load and increments.
755 IRBuilder<> IncBuilder(I);
756 NewSamplingVarVal =
757 IncBuilder.CreateAdd(LHS: LoadSamplingVar, RHS: GetConstant(IncBuilder, 1));
758 SamplingVarIncr = IncBuilder.CreateStore(Val: NewSamplingVarVal, Ptr: SamplingVar);
759 } else {
760 // For the burst-sampling, create the conditional update.
761 auto *DurationCond = CondBuilder.CreateICmpULE(
762 LHS: LoadSamplingVar, RHS: GetConstant(CondBuilder, config.BurstDuration - 1));
763 BranchWeight = MDB.createBranchWeights(
764 TrueWeight: config.BurstDuration, FalseWeight: config.Period - config.BurstDuration);
765 Instruction *ThenTerm = SplitBlockAndInsertIfThen(
766 Cond: DurationCond, SplitBefore: I, /* Unreachable */ false, BranchWeights: BranchWeight);
767 IRBuilder<> IncBuilder(I);
768 NewSamplingVarVal =
769 IncBuilder.CreateAdd(LHS: LoadSamplingVar, RHS: GetConstant(IncBuilder, 1));
770 SamplingVarIncr = IncBuilder.CreateStore(Val: NewSamplingVarVal, Ptr: SamplingVar);
771 I->moveBefore(InsertPos: ThenTerm->getIterator());
772 }
773
774 if (config.IsFastSampling)
775 return;
776
777 // Create the condition for checking the period.
778 Instruction *ThenTerm, *ElseTerm;
779 IRBuilder<> PeriodCondBuilder(SamplingVarIncr);
780 auto *PeriodCond = PeriodCondBuilder.CreateICmpUGE(
781 LHS: NewSamplingVarVal, RHS: GetConstant(PeriodCondBuilder, config.Period));
782 BranchWeight = MDB.createBranchWeights(TrueWeight: 1, FalseWeight: config.Period - 1);
783 SplitBlockAndInsertIfThenElse(Cond: PeriodCond, SplitBefore: SamplingVarIncr, ThenTerm: &ThenTerm,
784 ElseTerm: &ElseTerm, BranchWeights: BranchWeight);
785
786 // For the simple sampling, the counter update happens in sampling var reset.
787 if (config.IsSimpleSampling)
788 I->moveBefore(InsertPos: ThenTerm->getIterator());
789
790 IRBuilder<> ResetBuilder(ThenTerm);
791 ResetBuilder.CreateStore(Val: GetConstant(ResetBuilder, 0), Ptr: SamplingVar);
792 SamplingVarIncr->moveBefore(InsertPos: ElseTerm->getIterator());
793}
794
795bool InstrLowerer::lowerIntrinsics(Function *F) {
796 bool MadeChange = false;
797 PromotionCandidates.clear();
798 SmallVector<InstrProfInstBase *, 8> InstrProfInsts;
799
800 // To ensure compatibility with sampling, we save the intrinsics into
801 // a buffer to prevent potential breakage of the iterator (as the
802 // intrinsics will be moved to a different BB).
803 for (BasicBlock &BB : *F) {
804 for (Instruction &Instr : llvm::make_early_inc_range(Range&: BB)) {
805 if (auto *IP = dyn_cast<InstrProfInstBase>(Val: &Instr))
806 InstrProfInsts.push_back(Elt: IP);
807 }
808 }
809
810 for (auto *Instr : InstrProfInsts) {
811 doSampling(I: Instr);
812 if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(Val: Instr)) {
813 lowerIncrement(Inc: IPIS);
814 MadeChange = true;
815 } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(Val: Instr)) {
816 lowerIncrement(Inc: IPI);
817 MadeChange = true;
818 } else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(Val: Instr)) {
819 lowerTimestamp(TimestampInstruction: IPC);
820 MadeChange = true;
821 } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(Val: Instr)) {
822 lowerCover(Inc: IPC);
823 MadeChange = true;
824 } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(Val: Instr)) {
825 lowerValueProfileInst(Ins: IPVP);
826 MadeChange = true;
827 } else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(Val: Instr)) {
828 IPMP->eraseFromParent();
829 MadeChange = true;
830 } else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(Val: Instr)) {
831 lowerMCDCTestVectorBitmapUpdate(Ins: IPBU);
832 MadeChange = true;
833 }
834 }
835
836 if (!MadeChange)
837 return false;
838
839 promoteCounterLoadStores(F);
840 return true;
841}
842
843bool InstrLowerer::isRuntimeCounterRelocationEnabled() const {
844 // Mach-O don't support weak external references.
845 if (TT.isOSBinFormatMachO())
846 return false;
847
848 if (RuntimeCounterRelocation.getNumOccurrences() > 0)
849 return RuntimeCounterRelocation;
850
851 // Fuchsia uses runtime counter relocation by default.
852 return TT.isOSFuchsia();
853}
854
855bool InstrLowerer::isSamplingEnabled() const {
856 if (SampledInstr.getNumOccurrences() > 0)
857 return SampledInstr;
858 return Options.Sampling;
859}
860
861bool InstrLowerer::isCounterPromotionEnabled() const {
862 if (DoCounterPromotion.getNumOccurrences() > 0)
863 return DoCounterPromotion;
864
865 return Options.DoCounterPromotion;
866}
867
868void InstrLowerer::promoteCounterLoadStores(Function *F) {
869 if (!isCounterPromotionEnabled())
870 return;
871
872 DominatorTree DT(*F);
873 LoopInfo LI(DT);
874 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
875
876 std::unique_ptr<BlockFrequencyInfo> BFI;
877 if (Options.UseBFIInPromotion) {
878 std::unique_ptr<BranchProbabilityInfo> BPI;
879 BPI.reset(p: new BranchProbabilityInfo(*F, LI, &GetTLI(*F)));
880 BFI.reset(p: new BlockFrequencyInfo(*F, *BPI, LI));
881 }
882
883 for (const auto &LoadStore : PromotionCandidates) {
884 auto *CounterLoad = LoadStore.first;
885 auto *CounterStore = LoadStore.second;
886 BasicBlock *BB = CounterLoad->getParent();
887 Loop *ParentLoop = LI.getLoopFor(BB);
888 if (!ParentLoop)
889 continue;
890 LoopPromotionCandidates[ParentLoop].emplace_back(Args&: CounterLoad, Args&: CounterStore);
891 }
892
893 SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
894
895 // Do a post-order traversal of the loops so that counter updates can be
896 // iteratively hoisted outside the loop nest.
897 for (auto *Loop : llvm::reverse(C&: Loops)) {
898 PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get());
899 Promoter.run(NumPromoted: &TotalCountersPromoted);
900 }
901}
902
903static bool needsRuntimeHookUnconditionally(const Triple &TT) {
904 // On Fuchsia, we only need runtime hook if any counters are present.
905 if (TT.isOSFuchsia())
906 return false;
907
908 return true;
909}
910
911/// Check if the module contains uses of any profiling intrinsics.
912static bool containsProfilingIntrinsics(Module &M) {
913 auto containsIntrinsic = [&](int ID) {
914 if (auto *F = Intrinsic::getDeclarationIfExists(M: &M, id: ID))
915 return !F->use_empty();
916 return false;
917 };
918 return containsIntrinsic(Intrinsic::instrprof_cover) ||
919 containsIntrinsic(Intrinsic::instrprof_increment) ||
920 containsIntrinsic(Intrinsic::instrprof_increment_step) ||
921 containsIntrinsic(Intrinsic::instrprof_timestamp) ||
922 containsIntrinsic(Intrinsic::instrprof_value_profile);
923}
924
925bool InstrLowerer::lower() {
926 bool MadeChange = false;
927 bool NeedsRuntimeHook = needsRuntimeHookUnconditionally(TT);
928 if (NeedsRuntimeHook)
929 MadeChange = emitRuntimeHook();
930
931 if (!IsCS && isSamplingEnabled())
932 createProfileSamplingVar(M);
933
934 bool ContainsProfiling = containsProfilingIntrinsics(M);
935 GlobalVariable *CoverageNamesVar =
936 M.getNamedGlobal(Name: getCoverageUnusedNamesVarName());
937 // Improve compile time by avoiding linear scans when there is no work.
938 if (!ContainsProfiling && !CoverageNamesVar)
939 return MadeChange;
940
941 // We did not know how many value sites there would be inside
942 // the instrumented function. This is counting the number of instrumented
943 // target value sites to enter it as field in the profile data variable.
944 for (Function &F : M) {
945 InstrProfCntrInstBase *FirstProfInst = nullptr;
946 for (BasicBlock &BB : F) {
947 for (auto I = BB.begin(), E = BB.end(); I != E; I++) {
948 if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Val&: I))
949 computeNumValueSiteCounts(Ins: Ind);
950 else {
951 if (FirstProfInst == nullptr &&
952 (isa<InstrProfIncrementInst>(Val: I) || isa<InstrProfCoverInst>(Val: I)))
953 FirstProfInst = dyn_cast<InstrProfCntrInstBase>(Val&: I);
954 // If the MCDCBitmapParameters intrinsic seen, create the bitmaps.
955 if (const auto &Params = dyn_cast<InstrProfMCDCBitmapParameters>(Val&: I))
956 static_cast<void>(getOrCreateRegionBitmaps(Inc: Params));
957 }
958 }
959 }
960
961 // Use a profile intrinsic to create the region counters and data variable.
962 // Also create the data variable based on the MCDCParams.
963 if (FirstProfInst != nullptr) {
964 static_cast<void>(getOrCreateRegionCounters(Inc: FirstProfInst));
965 }
966 }
967
968 if (EnableVTableValueProfiling)
969 for (GlobalVariable &GV : M.globals())
970 // Global variables with type metadata are virtual table variables.
971 if (GV.hasMetadata(KindID: LLVMContext::MD_type))
972 getOrCreateVTableProfData(GV: &GV);
973
974 for (Function &F : M)
975 MadeChange |= lowerIntrinsics(F: &F);
976
977 if (CoverageNamesVar) {
978 lowerCoverageData(CoverageNamesVar);
979 MadeChange = true;
980 }
981
982 if (!MadeChange)
983 return false;
984
985 emitVNodes();
986 emitNameData();
987 emitVTableNames();
988
989 // Emit runtime hook for the cases where the target does not unconditionally
990 // require pulling in profile runtime, and coverage is enabled on code that is
991 // not eliminated by the front-end, e.g. unused functions with internal
992 // linkage.
993 if (!NeedsRuntimeHook && ContainsProfiling)
994 emitRuntimeHook();
995
996 emitRegistration();
997 emitUses();
998 emitInitialization();
999 return true;
1000}
1001
1002static FunctionCallee getOrInsertValueProfilingCall(
1003 Module &M, const TargetLibraryInfo &TLI,
1004 ValueProfilingCallType CallType = ValueProfilingCallType::Default) {
1005 LLVMContext &Ctx = M.getContext();
1006 auto *ReturnTy = Type::getVoidTy(C&: M.getContext());
1007
1008 AttributeList AL;
1009 if (auto AK = TLI.getExtAttrForI32Param(Signed: false))
1010 AL = AL.addParamAttribute(C&: M.getContext(), ArgNo: 2, Kind: AK);
1011
1012 assert((CallType == ValueProfilingCallType::Default ||
1013 CallType == ValueProfilingCallType::MemOp) &&
1014 "Must be Default or MemOp");
1015 Type *ParamTypes[] = {
1016#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
1017#include "llvm/ProfileData/InstrProfData.inc"
1018 };
1019 auto *ValueProfilingCallTy =
1020 FunctionType::get(Result: ReturnTy, Params: ArrayRef(ParamTypes), isVarArg: false);
1021 StringRef FuncName = CallType == ValueProfilingCallType::Default
1022 ? getInstrProfValueProfFuncName()
1023 : getInstrProfValueProfMemOpFuncName();
1024 return M.getOrInsertFunction(Name: FuncName, T: ValueProfilingCallTy, AttributeList: AL);
1025}
1026
1027void InstrLowerer::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
1028 GlobalVariable *Name = Ind->getName();
1029 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
1030 uint64_t Index = Ind->getIndex()->getZExtValue();
1031 auto &PD = ProfileDataMap[Name];
1032 PD.NumValueSites[ValueKind] =
1033 std::max(a: PD.NumValueSites[ValueKind], b: (uint32_t)(Index + 1));
1034}
1035
1036void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
1037 // TODO: Value profiling heavily depends on the data section which is omitted
1038 // in lightweight mode. We need to move the value profile pointer to the
1039 // Counter struct to get this working.
1040 assert(
1041 ProfileCorrelate == InstrProfCorrelator::NONE &&
1042 "Value profiling is not yet supported with lightweight instrumentation");
1043 GlobalVariable *Name = Ind->getName();
1044 auto It = ProfileDataMap.find(Val: Name);
1045 assert(It != ProfileDataMap.end() && It->second.DataVar &&
1046 "value profiling detected in function with no counter increment");
1047
1048 GlobalVariable *DataVar = It->second.DataVar;
1049 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
1050 uint64_t Index = Ind->getIndex()->getZExtValue();
1051 for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
1052 Index += It->second.NumValueSites[Kind];
1053
1054 IRBuilder<> Builder(Ind);
1055 bool IsMemOpSize = (Ind->getValueKind()->getZExtValue() ==
1056 llvm::InstrProfValueKind::IPVK_MemOPSize);
1057 CallInst *Call = nullptr;
1058 auto *TLI = &GetTLI(*Ind->getFunction());
1059 auto *NormalizedDataVarPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1060 C: DataVar, Ty: PointerType::get(C&: M.getContext(), AddressSpace: 0));
1061
1062 // To support value profiling calls within Windows exception handlers, funclet
1063 // information contained within operand bundles needs to be copied over to
1064 // the library call. This is required for the IR to be processed by the
1065 // WinEHPrepare pass.
1066 SmallVector<OperandBundleDef, 1> OpBundles;
1067 Ind->getOperandBundlesAsDefs(Defs&: OpBundles);
1068 if (!IsMemOpSize) {
1069 Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
1070 Builder.getInt32(C: Index)};
1071 Call = Builder.CreateCall(Callee: getOrInsertValueProfilingCall(M, TLI: *TLI), Args,
1072 OpBundles);
1073 } else {
1074 Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
1075 Builder.getInt32(C: Index)};
1076 Call = Builder.CreateCall(
1077 Callee: getOrInsertValueProfilingCall(M, TLI: *TLI, CallType: ValueProfilingCallType::MemOp),
1078 Args, OpBundles);
1079 }
1080 if (auto AK = TLI->getExtAttrForI32Param(Signed: false))
1081 Call->addParamAttr(ArgNo: 2, Kind: AK);
1082 Ind->replaceAllUsesWith(V: Call);
1083 Ind->eraseFromParent();
1084}
1085
1086GlobalVariable *InstrLowerer::getOrCreateBiasVar(StringRef VarName) {
1087 GlobalVariable *Bias = M.getGlobalVariable(Name: VarName);
1088 if (Bias)
1089 return Bias;
1090
1091 Type *Int64Ty = Type::getInt64Ty(C&: M.getContext());
1092
1093 // Compiler must define this variable when runtime counter relocation
1094 // is being used. Runtime has a weak external reference that is used
1095 // to check whether that's the case or not.
1096 Bias = new GlobalVariable(M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
1097 Constant::getNullValue(Ty: Int64Ty), VarName);
1098 Bias->setVisibility(GlobalVariable::HiddenVisibility);
1099 // A definition that's weak (linkonce_odr) without being in a COMDAT
1100 // section wouldn't lead to link errors, but it would lead to a dead
1101 // data word from every TU but one. Putting it in COMDAT ensures there
1102 // will be exactly one data slot in the link.
1103 if (TT.supportsCOMDAT())
1104 Bias->setComdat(M.getOrInsertComdat(Name: VarName));
1105
1106 return Bias;
1107}
1108
1109Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) {
1110 auto *Counters = getOrCreateRegionCounters(Inc: I);
1111 IRBuilder<> Builder(I);
1112
1113 if (isa<InstrProfTimestampInst>(Val: I))
1114 Counters->setAlignment(Align(8));
1115
1116 auto *Addr = Builder.CreateConstInBoundsGEP2_32(
1117 Ty: Counters->getValueType(), Ptr: Counters, Idx0: 0, Idx1: I->getIndex()->getZExtValue());
1118
1119 if (!isRuntimeCounterRelocationEnabled())
1120 return Addr;
1121
1122 Type *Int64Ty = Type::getInt64Ty(C&: M.getContext());
1123 Function *Fn = I->getParent()->getParent();
1124 LoadInst *&BiasLI = FunctionToProfileBiasMap[Fn];
1125 if (!BiasLI) {
1126 IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());
1127 auto *Bias = getOrCreateBiasVar(VarName: getInstrProfCounterBiasVarName());
1128 BiasLI = EntryBuilder.CreateLoad(Ty: Int64Ty, Ptr: Bias, Name: "profc_bias");
1129 // Bias doesn't change after startup.
1130 BiasLI->setMetadata(KindID: LLVMContext::MD_invariant_load,
1131 Node: MDNode::get(Context&: M.getContext(), MDs: {}));
1132 }
1133 auto *Add = Builder.CreateAdd(LHS: Builder.CreatePtrToInt(V: Addr, DestTy: Int64Ty), RHS: BiasLI);
1134 return Builder.CreateIntToPtr(V: Add, DestTy: Addr->getType());
1135}
1136
1137Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) {
1138 auto *Bitmaps = getOrCreateRegionBitmaps(Inc: I);
1139 if (!isRuntimeCounterRelocationEnabled())
1140 return Bitmaps;
1141
1142 // Put BiasLI onto the entry block.
1143 Type *Int64Ty = Type::getInt64Ty(C&: M.getContext());
1144 Function *Fn = I->getFunction();
1145 IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());
1146 auto *Bias = getOrCreateBiasVar(VarName: getInstrProfBitmapBiasVarName());
1147 auto *BiasLI = EntryBuilder.CreateLoad(Ty: Int64Ty, Ptr: Bias, Name: "profbm_bias");
1148 // Assume BiasLI invariant (in the function at least)
1149 BiasLI->setMetadata(KindID: LLVMContext::MD_invariant_load,
1150 Node: MDNode::get(Context&: M.getContext(), MDs: {}));
1151
1152 // Add Bias to Bitmaps and put it before the intrinsic.
1153 IRBuilder<> Builder(I);
1154 return Builder.CreatePtrAdd(Ptr: Bitmaps, Offset: BiasLI, Name: "profbm_addr");
1155}
1156
1157void InstrLowerer::lowerCover(InstrProfCoverInst *CoverInstruction) {
1158 auto *Addr = getCounterAddress(I: CoverInstruction);
1159 IRBuilder<> Builder(CoverInstruction);
1160 if (ConditionalCounterUpdate) {
1161 Instruction *SplitBefore = CoverInstruction->getNextNode();
1162 auto &Ctx = CoverInstruction->getParent()->getContext();
1163 auto *Int8Ty = llvm::Type::getInt8Ty(C&: Ctx);
1164 Value *Load = Builder.CreateLoad(Ty: Int8Ty, Ptr: Addr, Name: "pgocount");
1165 Value *Cmp = Builder.CreateIsNotNull(Arg: Load, Name: "pgocount.ifnonzero");
1166 Instruction *ThenBranch =
1167 SplitBlockAndInsertIfThen(Cond: Cmp, SplitBefore, Unreachable: false);
1168 Builder.SetInsertPoint(ThenBranch);
1169 }
1170
1171 // We store zero to represent that this block is covered.
1172 Builder.CreateStore(Val: Builder.getInt8(C: 0), Ptr: Addr);
1173 CoverInstruction->eraseFromParent();
1174}
1175
1176void InstrLowerer::lowerTimestamp(
1177 InstrProfTimestampInst *TimestampInstruction) {
1178 assert(TimestampInstruction->getIndex()->isNullValue() &&
1179 "timestamp probes are always the first probe for a function");
1180 auto &Ctx = M.getContext();
1181 auto *TimestampAddr = getCounterAddress(I: TimestampInstruction);
1182 IRBuilder<> Builder(TimestampInstruction);
1183 auto *CalleeTy =
1184 FunctionType::get(Result: Type::getVoidTy(C&: Ctx), Params: TimestampAddr->getType(), isVarArg: false);
1185 auto Callee = M.getOrInsertFunction(
1186 INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SET_TIMESTAMP), T: CalleeTy);
1187 Builder.CreateCall(Callee, Args: {TimestampAddr});
1188 TimestampInstruction->eraseFromParent();
1189}
1190
1191void InstrLowerer::lowerIncrement(InstrProfIncrementInst *Inc) {
1192 auto *Addr = getCounterAddress(I: Inc);
1193
1194 IRBuilder<> Builder(Inc);
1195 if (isGPUProfTarget(M)) {
1196 auto *I64Ty = Builder.getInt64Ty();
1197 auto *PtrTy = Builder.getPtrTy();
1198 auto *CalleeTy = FunctionType::get(Result: Type::getVoidTy(C&: M.getContext()),
1199 Params: {PtrTy, PtrTy, I64Ty}, isVarArg: false);
1200 auto Callee =
1201 M.getOrInsertFunction(Name: "__llvm_profile_instrument_gpu", T: CalleeTy);
1202 Value *CastAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(V: Addr, DestTy: PtrTy);
1203 Value *Uniform =
1204 ConstantPointerNull::get(T: PointerType::getUnqual(C&: M.getContext()));
1205 Builder.CreateCall(Callee, Args: {CastAddr, Uniform, Inc->getStep()});
1206 } else if (Options.Atomic || AtomicCounterUpdateAll ||
1207 (Inc->getIndex()->isNullValue() && AtomicFirstCounter)) {
1208 Builder.CreateAtomicRMW(Op: AtomicRMWInst::Add, Ptr: Addr, Val: Inc->getStep(),
1209 Align: MaybeAlign(), Ordering: AtomicOrdering::Monotonic);
1210 } else {
1211 Value *IncStep = Inc->getStep();
1212 Value *Load = Builder.CreateLoad(Ty: IncStep->getType(), Ptr: Addr, Name: "pgocount");
1213 auto *Count = Builder.CreateAdd(LHS: Load, RHS: Inc->getStep());
1214 auto *Store = Builder.CreateStore(Val: Count, Ptr: Addr);
1215 if (isCounterPromotionEnabled())
1216 PromotionCandidates.emplace_back(args: cast<Instruction>(Val: Load), args&: Store);
1217 }
1218 Inc->eraseFromParent();
1219}
1220
1221void InstrLowerer::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
1222 ConstantArray *Names =
1223 cast<ConstantArray>(Val: CoverageNamesVar->getInitializer());
1224 for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
1225 Constant *NC = Names->getOperand(i_nocapture: I);
1226 Value *V = NC->stripPointerCasts();
1227 assert(isa<GlobalVariable>(V) && "Missing reference to function name");
1228 GlobalVariable *Name = cast<GlobalVariable>(Val: V);
1229
1230 Name->setLinkage(GlobalValue::PrivateLinkage);
1231 ReferencedNames.push_back(x: Name);
1232 if (isa<ConstantExpr>(Val: NC))
1233 NC->dropAllReferences();
1234 }
1235 CoverageNamesVar->eraseFromParent();
1236}
1237
1238void InstrLowerer::lowerMCDCTestVectorBitmapUpdate(
1239 InstrProfMCDCTVBitmapUpdate *Update) {
1240 auto &Ctx = M.getContext();
1241 IRBuilder<> Builder(Update);
1242 auto *Int8Ty = Type::getInt8Ty(C&: Ctx);
1243 auto *Int32Ty = Type::getInt32Ty(C&: Ctx);
1244 auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr();
1245 auto *BitmapAddr = getBitmapAddress(I: Update);
1246
1247 // Load Temp Val + BitmapIdx.
1248 // %mcdc.temp = load i32, ptr %mcdc.addr, align 4
1249 auto *Temp = Builder.CreateAdd(
1250 LHS: Builder.CreateLoad(Ty: Int32Ty, Ptr: MCDCCondBitmapAddr, Name: "mcdc.temp"),
1251 RHS: Update->getBitmapIndex());
1252
1253 // Calculate byte offset using div8.
1254 // %1 = lshr i32 %mcdc.temp, 3
1255 auto *BitmapByteOffset = Builder.CreateLShr(LHS: Temp, RHS: 0x3);
1256
1257 // Add byte offset to section base byte address.
1258 // %4 = getelementptr inbounds i8, ptr @__profbm_test, i32 %1
1259 auto *BitmapByteAddr =
1260 Builder.CreateInBoundsPtrAdd(Ptr: BitmapAddr, Offset: BitmapByteOffset);
1261
1262 // Calculate bit offset into bitmap byte by using div8 remainder (AND ~8)
1263 // %5 = and i32 %mcdc.temp, 7
1264 // %6 = trunc i32 %5 to i8
1265 auto *BitToSet = Builder.CreateTrunc(V: Builder.CreateAnd(LHS: Temp, RHS: 0x7), DestTy: Int8Ty);
1266
1267 // Shift bit offset left to form a bitmap.
1268 // %7 = shl i8 1, %6
1269 auto *ShiftedVal = Builder.CreateShl(LHS: Builder.getInt8(C: 0x1), RHS: BitToSet);
1270
1271 // Load profile bitmap byte.
1272 // %mcdc.bits = load i8, ptr %4, align 1
1273 auto *Bitmap = Builder.CreateLoad(Ty: Int8Ty, Ptr: BitmapByteAddr, Name: "mcdc.bits");
1274
1275 if (Options.Atomic || AtomicCounterUpdateAll) {
1276 // If ((Bitmap & Val) != Val), then execute atomic (Bitmap |= Val).
1277 // Note, just-loaded Bitmap might not be up-to-date. Use it just for
1278 // early testing.
1279 auto *Masked = Builder.CreateAnd(LHS: Bitmap, RHS: ShiftedVal);
1280 auto *ShouldStore = Builder.CreateICmpNE(LHS: Masked, RHS: ShiftedVal);
1281
1282 // Assume updating will be rare.
1283 auto *Unlikely = MDBuilder(Ctx).createUnlikelyBranchWeights();
1284 Instruction *ThenBranch =
1285 SplitBlockAndInsertIfThen(Cond: ShouldStore, SplitBefore: Update, Unreachable: false, BranchWeights: Unlikely);
1286
1287 // Execute if (unlikely(ShouldStore)).
1288 Builder.SetInsertPoint(ThenBranch);
1289 Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: BitmapByteAddr, Val: ShiftedVal,
1290 Align: MaybeAlign(), Ordering: AtomicOrdering::Monotonic);
1291 } else {
1292 // Perform logical OR of profile bitmap byte and shifted bit offset.
1293 // %8 = or i8 %mcdc.bits, %7
1294 auto *Result = Builder.CreateOr(LHS: Bitmap, RHS: ShiftedVal);
1295
1296 // Store the updated profile bitmap byte.
1297 // store i8 %8, ptr %3, align 1
1298 Builder.CreateStore(Val: Result, Ptr: BitmapByteAddr);
1299 }
1300
1301 Update->eraseFromParent();
1302}
1303
1304/// Get the name of a profiling variable for a particular function.
1305static std::string getVarName(InstrProfInstBase *Inc, StringRef Prefix,
1306 bool &Renamed) {
1307 StringRef NamePrefix = getInstrProfNameVarPrefix();
1308 StringRef Name = Inc->getName()->getName().substr(Start: NamePrefix.size());
1309 Function *F = Inc->getParent()->getParent();
1310 Module *M = F->getParent();
1311 if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
1312 !canRenameComdatFunc(F: *F)) {
1313 Renamed = false;
1314 return (Prefix + Name).str();
1315 }
1316 Renamed = true;
1317 uint64_t FuncHash = Inc->getHash()->getZExtValue();
1318 SmallVector<char, 24> HashPostfix;
1319 if (Name.ends_with(Suffix: (Twine(".") + Twine(FuncHash)).toStringRef(Out&: HashPostfix)))
1320 return (Prefix + Name).str();
1321 return (Prefix + Name + "." + Twine(FuncHash)).str();
1322}
1323
1324static inline bool shouldRecordFunctionAddr(Function *F) {
1325 // Only record function addresses if IR PGO is enabled or if clang value
1326 // profiling is enabled. Recording function addresses greatly increases object
1327 // file size, because it prevents the inliner from deleting functions that
1328 // have been inlined everywhere.
1329 if (!profDataReferencedByCode(M: *F->getParent()))
1330 return false;
1331
1332 // Check the linkage
1333 bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
1334 if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
1335 !HasAvailableExternallyLinkage)
1336 return true;
1337
1338 // A function marked 'alwaysinline' with available_externally linkage can't
1339 // have its address taken. Doing so would create an undefined external ref to
1340 // the function, which would fail to link.
1341 if (HasAvailableExternallyLinkage &&
1342 F->hasFnAttribute(Kind: Attribute::AlwaysInline))
1343 return false;
1344
1345 // Prohibit function address recording if the function is both internal and
1346 // COMDAT. This avoids the profile data variable referencing internal symbols
1347 // in COMDAT.
1348 if (F->hasLocalLinkage() && F->hasComdat())
1349 return false;
1350
1351 // Check uses of this function for other than direct calls or invokes to it.
1352 // Inline virtual functions have linkeOnceODR linkage. When a key method
1353 // exists, the vtable will only be emitted in the TU where the key method
1354 // is defined. In a TU where vtable is not available, the function won't
1355 // be 'addresstaken'. If its address is not recorded here, the profile data
1356 // with missing address may be picked by the linker leading to missing
1357 // indirect call target info.
1358 return F->hasAddressTaken() || F->hasLinkOnceLinkage();
1359}
1360
1361static inline bool shouldUsePublicSymbol(Function *Fn) {
1362 // It isn't legal to make an alias of this function at all
1363 if (Fn->isDeclarationForLinker())
1364 return true;
1365
1366 // Symbols with local linkage can just use the symbol directly without
1367 // introducing relocations
1368 if (Fn->hasLocalLinkage())
1369 return true;
1370
1371 // PGO + ThinLTO + CFI cause duplicate symbols to be introduced due to some
1372 // unfavorable interaction between the new alias and the alias renaming done
1373 // in LowerTypeTests under ThinLTO. For comdat functions that would normally
1374 // be deduplicated, but the renaming scheme ends up preventing renaming, since
1375 // it creates unique names for each alias, resulting in duplicated symbols. In
1376 // the future, we should update the CFI related passes to migrate these
1377 // aliases to the same module as the jump-table they refer to will be defined.
1378 if (Fn->hasMetadata(KindID: LLVMContext::MD_type))
1379 return true;
1380
1381 // For comdat functions, an alias would need the same linkage as the original
1382 // function and hidden visibility. There is no point in adding an alias with
1383 // identical linkage an visibility to avoid introducing symbolic relocations.
1384 if (Fn->hasComdat() &&
1385 (Fn->getVisibility() == GlobalValue::VisibilityTypes::HiddenVisibility))
1386 return true;
1387
1388 // its OK to use an alias
1389 return false;
1390}
1391
1392static inline Constant *getFuncAddrForProfData(Function *Fn) {
1393 auto *Int8PtrTy = PointerType::getUnqual(C&: Fn->getContext());
1394 // Store a nullptr in __llvm_profd, if we shouldn't use a real address
1395 if (!shouldRecordFunctionAddr(F: Fn))
1396 return ConstantPointerNull::get(T: Int8PtrTy);
1397
1398 // If we can't use an alias, we must use the public symbol, even though this
1399 // may require a symbolic relocation.
1400 if (shouldUsePublicSymbol(Fn))
1401 return Fn;
1402
1403 // When possible use a private alias to avoid symbolic relocations.
1404 auto *GA = GlobalAlias::create(Linkage: GlobalValue::LinkageTypes::PrivateLinkage,
1405 Name: Fn->getName() + ".local", Aliasee: Fn);
1406
1407 // When the instrumented function is a COMDAT function, we cannot use a
1408 // private alias. If we did, we would create reference to a local label in
1409 // this function's section. If this version of the function isn't selected by
1410 // the linker, then the metadata would introduce a reference to a discarded
1411 // section. So, for COMDAT functions, we need to adjust the linkage of the
1412 // alias. Using hidden visibility avoids a dynamic relocation and an entry in
1413 // the dynamic symbol table.
1414 //
1415 // Note that this handles COMDAT functions with visibility other than Hidden,
1416 // since that case is covered in shouldUsePublicSymbol()
1417 if (Fn->hasComdat()) {
1418 GA->setLinkage(Fn->getLinkage());
1419 GA->setVisibility(GlobalValue::VisibilityTypes::HiddenVisibility);
1420 }
1421
1422 // appendToCompilerUsed(*Fn->getParent(), {GA});
1423
1424 return GA;
1425}
1426
1427static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
1428 // NVPTX is an ELF target but PTX does not expose sections or linker symbols.
1429 if (TT.isNVPTX())
1430 return true;
1431
1432 // compiler-rt uses linker support to get data/counters/name start/end for
1433 // ELF, COFF, Mach-O, XCOFF, and Wasm.
1434 if (TT.isOSBinFormatELF() || TT.isOSBinFormatCOFF() ||
1435 TT.isOSBinFormatMachO() || TT.isOSBinFormatXCOFF() ||
1436 TT.isOSBinFormatWasm())
1437 return false;
1438
1439 return true;
1440}
1441
1442void InstrLowerer::maybeSetComdat(GlobalVariable *GV, GlobalObject *GO,
1443 StringRef CounterGroupName) {
1444 // Place lowered global variables in a comdat group if the associated function
1445 // or global variable is a COMDAT. This will make sure that only one copy of
1446 // global variable (e.g. function counters) of the COMDAT function will be
1447 // emitted after linking.
1448 bool NeedComdat = needsComdatForCounter(GV: *GO, M);
1449 bool UseComdat = (NeedComdat || TT.isOSBinFormatELF());
1450
1451 if (!UseComdat)
1452 return;
1453
1454 // Keep in mind that this pass may run before the inliner, so we need to
1455 // create a new comdat group (for counters, profiling data, etc). If we use
1456 // the comdat of the parent function, that will result in relocations against
1457 // discarded sections.
1458 //
1459 // If the data variable is referenced by code, non-counter variables (notably
1460 // profiling data) and counters have to be in different comdats for COFF
1461 // because the Visual C++ linker will report duplicate symbol errors if there
1462 // are multiple external symbols with the same name marked
1463 // IMAGE_COMDAT_SELECT_ASSOCIATIVE.
1464 StringRef GroupName = TT.isOSBinFormatCOFF() && DataReferencedByCode
1465 ? GV->getName()
1466 : CounterGroupName;
1467 Comdat *C = M.getOrInsertComdat(Name: GroupName);
1468
1469 if (!NeedComdat) {
1470 // Object file format must be ELF since `UseComdat && !NeedComdat` is true.
1471 //
1472 // For ELF, when not using COMDAT, put counters, data and values into a
1473 // nodeduplicate COMDAT which is lowered to a zero-flag section group. This
1474 // allows -z start-stop-gc to discard the entire group when the function is
1475 // discarded.
1476 C->setSelectionKind(Comdat::NoDeduplicate);
1477 }
1478 GV->setComdat(C);
1479 // COFF doesn't allow the comdat group leader to have private linkage, so
1480 // upgrade private linkage to internal linkage to produce a symbol table
1481 // entry.
1482 if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage())
1483 GV->setLinkage(GlobalValue::InternalLinkage);
1484}
1485
1486static inline bool shouldRecordVTableAddr(GlobalVariable *GV) {
1487 if (!profDataReferencedByCode(M: *GV->getParent()))
1488 return false;
1489
1490 if (!GV->hasLinkOnceLinkage() && !GV->hasLocalLinkage() &&
1491 !GV->hasAvailableExternallyLinkage())
1492 return true;
1493
1494 // This avoids the profile data from referencing internal symbols in
1495 // COMDAT.
1496 if (GV->hasLocalLinkage() && GV->hasComdat())
1497 return false;
1498
1499 return true;
1500}
1501
1502// FIXME: Introduce an internal alias like what's done for functions to reduce
1503// the number of relocation entries.
1504static inline Constant *getVTableAddrForProfData(GlobalVariable *GV) {
1505 // Store a nullptr in __profvt_ if a real address shouldn't be used.
1506 if (!shouldRecordVTableAddr(GV))
1507 return ConstantPointerNull::get(T: PointerType::getUnqual(C&: GV->getContext()));
1508
1509 return GV;
1510}
1511
1512void InstrLowerer::getOrCreateVTableProfData(GlobalVariable *GV) {
1513 assert(ProfileCorrelate != InstrProfCorrelator::DEBUG_INFO &&
1514 "Value profiling is not supported with lightweight instrumentation");
1515 if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
1516 return;
1517
1518 // Skip llvm internal global variable or __prof variables.
1519 if (GV->getName().starts_with(Prefix: "llvm.") ||
1520 GV->getName().starts_with(Prefix: "__llvm") ||
1521 GV->getName().starts_with(Prefix: "__prof"))
1522 return;
1523
1524 // VTableProfData already created
1525 auto It = VTableDataMap.find(Val: GV);
1526 if (It != VTableDataMap.end() && It->second)
1527 return;
1528
1529 GlobalValue::LinkageTypes Linkage = GV->getLinkage();
1530 GlobalValue::VisibilityTypes Visibility = GV->getVisibility();
1531
1532 // This is to keep consistent with per-function profile data
1533 // for correctness.
1534 if (TT.isOSBinFormatXCOFF()) {
1535 Linkage = GlobalValue::InternalLinkage;
1536 Visibility = GlobalValue::DefaultVisibility;
1537 }
1538
1539 LLVMContext &Ctx = M.getContext();
1540 Type *DataTypes[] = {
1541#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) LLVMType,
1542#include "llvm/ProfileData/InstrProfData.inc"
1543#undef INSTR_PROF_VTABLE_DATA
1544 };
1545
1546 auto *DataTy = StructType::get(Context&: Ctx, Elements: ArrayRef(DataTypes));
1547
1548 // Used by INSTR_PROF_VTABLE_DATA MACRO
1549 Constant *VTableAddr = getVTableAddrForProfData(GV);
1550 const std::string PGOVTableName = getPGOName(V: *GV);
1551 // Record the length of the vtable. This is needed since vtable pointers
1552 // loaded from C++ objects might be from the middle of a vtable definition.
1553 uint32_t VTableSizeVal = GV->getGlobalSize(DL: M.getDataLayout());
1554
1555 Constant *DataVals[] = {
1556#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) Init,
1557#include "llvm/ProfileData/InstrProfData.inc"
1558#undef INSTR_PROF_VTABLE_DATA
1559 };
1560
1561 auto *Data =
1562 new GlobalVariable(M, DataTy, /*constant=*/false, Linkage,
1563 ConstantStruct::get(T: DataTy, V: DataVals),
1564 getInstrProfVTableVarPrefix() + PGOVTableName);
1565
1566 Data->setVisibility(Visibility);
1567 Data->setSection(getInstrProfSectionName(IPSK: IPSK_vtab, OF: TT.getObjectFormat()));
1568 Data->setAlignment(Align(8));
1569
1570 maybeSetComdat(GV: Data, GO: GV, CounterGroupName: Data->getName());
1571
1572 VTableDataMap[GV] = Data;
1573
1574 ReferencedVTables.push_back(x: GV);
1575
1576 // VTable <Hash, Addr> is used by runtime but not referenced by other
1577 // sections. Conservatively mark it linker retained.
1578 UsedVars.push_back(x: Data);
1579}
1580
1581GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc,
1582 InstrProfSectKind IPSK) {
1583 GlobalVariable *NamePtr = Inc->getName();
1584
1585 // Match the linkage and visibility of the name global.
1586 Function *Fn = Inc->getParent()->getParent();
1587 GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
1588 GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
1589
1590 // Use internal rather than private linkage so the counter variable shows up
1591 // in the symbol table when using debug info for correlation.
1592 if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO &&
1593 TT.isOSBinFormatMachO() && Linkage == GlobalValue::PrivateLinkage)
1594 Linkage = GlobalValue::InternalLinkage;
1595
1596 // Due to the limitation of binder as of 2021/09/28, the duplicate weak
1597 // symbols in the same csect won't be discarded. When there are duplicate weak
1598 // symbols, we can NOT guarantee that the relocations get resolved to the
1599 // intended weak symbol, so we can not ensure the correctness of the relative
1600 // CounterPtr, so we have to use private linkage for counter and data symbols.
1601 if (TT.isOSBinFormatXCOFF()) {
1602 Linkage = GlobalValue::PrivateLinkage;
1603 Visibility = GlobalValue::DefaultVisibility;
1604 }
1605 // Move the name variable to the right section.
1606 bool Renamed;
1607 GlobalVariable *Ptr;
1608 StringRef VarPrefix;
1609 std::string VarName;
1610 if (IPSK == IPSK_cnts) {
1611 VarPrefix = getInstrProfCountersVarPrefix();
1612 VarName = getVarName(Inc, Prefix: VarPrefix, Renamed);
1613 InstrProfCntrInstBase *CntrIncrement = dyn_cast<InstrProfCntrInstBase>(Val: Inc);
1614 Ptr = createRegionCounters(Inc: CntrIncrement, Name: VarName, Linkage);
1615 } else if (IPSK == IPSK_bitmap) {
1616 VarPrefix = getInstrProfBitmapVarPrefix();
1617 VarName = getVarName(Inc, Prefix: VarPrefix, Renamed);
1618 InstrProfMCDCBitmapInstBase *BitmapUpdate =
1619 dyn_cast<InstrProfMCDCBitmapInstBase>(Val: Inc);
1620 Ptr = createRegionBitmaps(Inc: BitmapUpdate, Name: VarName, Linkage);
1621 } else {
1622 llvm_unreachable("Profile Section must be for Counters or Bitmaps");
1623 }
1624
1625 Ptr->setVisibility(Visibility);
1626 // Put the counters and bitmaps in their own sections so linkers can
1627 // remove unneeded sections.
1628 Ptr->setSection(getInstrProfSectionName(IPSK, OF: TT.getObjectFormat()));
1629 Ptr->setLinkage(Linkage);
1630 maybeSetComdat(GV: Ptr, GO: Fn, CounterGroupName: VarName);
1631 return Ptr;
1632}
1633
1634GlobalVariable *
1635InstrLowerer::createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
1636 StringRef Name,
1637 GlobalValue::LinkageTypes Linkage) {
1638 uint64_t NumBytes = Inc->getNumBitmapBytes();
1639 auto *BitmapTy = ArrayType::get(ElementType: Type::getInt8Ty(C&: M.getContext()), NumElements: NumBytes);
1640 auto GV = new GlobalVariable(M, BitmapTy, false, Linkage,
1641 Constant::getNullValue(Ty: BitmapTy), Name);
1642 GV->setAlignment(Align(1));
1643 return GV;
1644}
1645
1646GlobalVariable *
1647InstrLowerer::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) {
1648 GlobalVariable *NamePtr = Inc->getName();
1649 auto &PD = ProfileDataMap[NamePtr];
1650 if (PD.RegionBitmaps)
1651 return PD.RegionBitmaps;
1652
1653 // If RegionBitmaps doesn't already exist, create it by first setting up
1654 // the corresponding profile section.
1655 auto *BitmapPtr = setupProfileSection(Inc, IPSK: IPSK_bitmap);
1656 PD.RegionBitmaps = BitmapPtr;
1657 PD.NumBitmapBytes = Inc->getNumBitmapBytes();
1658 return PD.RegionBitmaps;
1659}
1660
1661GlobalVariable *
1662InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name,
1663 GlobalValue::LinkageTypes Linkage) {
1664 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
1665 auto &Ctx = M.getContext();
1666 GlobalVariable *GV;
1667 if (isa<InstrProfCoverInst>(Val: Inc)) {
1668 auto *CounterTy = Type::getInt8Ty(C&: Ctx);
1669 auto *CounterArrTy = ArrayType::get(ElementType: CounterTy, NumElements: NumCounters);
1670 // TODO: `Constant::getAllOnesValue()` does not yet accept an array type.
1671 std::vector<Constant *> InitialValues(NumCounters,
1672 Constant::getAllOnesValue(Ty: CounterTy));
1673 GV = new GlobalVariable(M, CounterArrTy, false, Linkage,
1674 ConstantArray::get(T: CounterArrTy, V: InitialValues),
1675 Name);
1676 GV->setAlignment(Align(1));
1677 } else {
1678 auto *CounterTy = ArrayType::get(ElementType: Type::getInt64Ty(C&: Ctx), NumElements: NumCounters);
1679 GV = new GlobalVariable(M, CounterTy, false, Linkage,
1680 Constant::getNullValue(Ty: CounterTy), Name);
1681 GV->setAlignment(Align(8));
1682 }
1683 return GV;
1684}
1685
1686GlobalVariable *
1687InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) {
1688 GlobalVariable *NamePtr = Inc->getName();
1689 auto &PD = ProfileDataMap[NamePtr];
1690 if (PD.RegionCounters)
1691 return PD.RegionCounters;
1692
1693 // If RegionCounters doesn't already exist, create it by first setting up
1694 // the corresponding profile section.
1695 auto *CounterPtr = setupProfileSection(Inc, IPSK: IPSK_cnts);
1696 PD.RegionCounters = CounterPtr;
1697
1698 if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) {
1699 LLVMContext &Ctx = M.getContext();
1700 Function *Fn = Inc->getParent()->getParent();
1701 if (auto *SP = Fn->getSubprogram()) {
1702 DIBuilder DB(M, true, SP->getUnit());
1703 Metadata *FunctionNameAnnotation[] = {
1704 MDString::get(Context&: Ctx, Str: InstrProfCorrelator::FunctionNameAttributeName),
1705 MDString::get(Context&: Ctx, Str: getPGOFuncNameVarInitializer(NameVar: NamePtr)),
1706 };
1707 Metadata *CFGHashAnnotation[] = {
1708 MDString::get(Context&: Ctx, Str: InstrProfCorrelator::CFGHashAttributeName),
1709 ConstantAsMetadata::get(C: Inc->getHash()),
1710 };
1711 Metadata *NumCountersAnnotation[] = {
1712 MDString::get(Context&: Ctx, Str: InstrProfCorrelator::NumCountersAttributeName),
1713 ConstantAsMetadata::get(C: Inc->getNumCounters()),
1714 };
1715 auto Annotations = DB.getOrCreateArray(Elements: {
1716 MDNode::get(Context&: Ctx, MDs: FunctionNameAnnotation),
1717 MDNode::get(Context&: Ctx, MDs: CFGHashAnnotation),
1718 MDNode::get(Context&: Ctx, MDs: NumCountersAnnotation),
1719 });
1720 auto *DICounter = DB.createGlobalVariableExpression(
1721 Context: SP, Name: CounterPtr->getName(), /*LinkageName=*/StringRef(), File: SP->getFile(),
1722 /*LineNo=*/0, Ty: DB.createUnspecifiedType(Name: "Profile Data Type"),
1723 IsLocalToUnit: CounterPtr->hasLocalLinkage(), /*IsDefined=*/isDefined: true, /*Expr=*/nullptr,
1724 /*Decl=*/nullptr, /*TemplateParams=*/nullptr, /*AlignInBits=*/0,
1725 Annotations);
1726 CounterPtr->addDebugInfo(GV: DICounter);
1727 DB.finalize();
1728 }
1729
1730 // Mark the counter variable as used so that it isn't optimized out.
1731 CompilerUsedVars.push_back(x: PD.RegionCounters);
1732 }
1733
1734 // Create the data variable (if it doesn't already exist).
1735 createDataVariable(Inc);
1736
1737 return PD.RegionCounters;
1738}
1739
1740void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
1741 // When debug information is correlated to profile data, a data variable
1742 // is not needed.
1743 if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
1744 return;
1745
1746 GlobalVariable *NamePtr = Inc->getName();
1747 auto &PD = ProfileDataMap[NamePtr];
1748
1749 // Return if data variable was already created.
1750 if (PD.DataVar)
1751 return;
1752
1753 LLVMContext &Ctx = M.getContext();
1754
1755 Function *Fn = Inc->getParent()->getParent();
1756 GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
1757 GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
1758
1759 // Due to the limitation of binder as of 2021/09/28, the duplicate weak
1760 // symbols in the same csect won't be discarded. When there are duplicate weak
1761 // symbols, we can NOT guarantee that the relocations get resolved to the
1762 // intended weak symbol, so we can not ensure the correctness of the relative
1763 // CounterPtr, so we have to use private linkage for counter and data symbols.
1764 if (TT.isOSBinFormatXCOFF()) {
1765 Linkage = GlobalValue::PrivateLinkage;
1766 Visibility = GlobalValue::DefaultVisibility;
1767 }
1768
1769 bool NeedComdat = needsComdatForCounter(GV: *Fn, M);
1770 bool Renamed;
1771
1772 // The Data Variable section is anchored to profile counters.
1773 std::string CntsVarName =
1774 getVarName(Inc, Prefix: getInstrProfCountersVarPrefix(), Renamed);
1775 std::string DataVarName =
1776 getVarName(Inc, Prefix: getInstrProfDataVarPrefix(), Renamed);
1777
1778 auto *Int8PtrTy = PointerType::getUnqual(C&: Ctx);
1779 // Allocate statically the array of pointers to value profile nodes for
1780 // the current function.
1781 Constant *ValuesPtrExpr = ConstantPointerNull::get(T: Int8PtrTy);
1782 uint64_t NS = 0;
1783 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1784 NS += PD.NumValueSites[Kind];
1785 if (NS > 0 && ValueProfileStaticAlloc &&
1786 !needsRuntimeRegistrationOfSectionRange(TT)) {
1787 ArrayType *ValuesTy = ArrayType::get(ElementType: Type::getInt64Ty(C&: Ctx), NumElements: NS);
1788 auto *ValuesVar = new GlobalVariable(
1789 M, ValuesTy, false, Linkage, Constant::getNullValue(Ty: ValuesTy),
1790 getVarName(Inc, Prefix: getInstrProfValuesVarPrefix(), Renamed));
1791 ValuesVar->setVisibility(Visibility);
1792 setGlobalVariableLargeSection(TargetTriple: TT, GV&: *ValuesVar);
1793 ValuesVar->setSection(
1794 getInstrProfSectionName(IPSK: IPSK_vals, OF: TT.getObjectFormat()));
1795 ValuesVar->setAlignment(Align(8));
1796 maybeSetComdat(GV: ValuesVar, GO: Fn, CounterGroupName: CntsVarName);
1797 ValuesPtrExpr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1798 C: ValuesVar, Ty: PointerType::get(C&: Fn->getContext(), AddressSpace: 0));
1799 }
1800
1801 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
1802 auto *CounterPtr = PD.RegionCounters;
1803
1804 uint64_t NumBitmapBytes = PD.NumBitmapBytes;
1805
1806 // Create data variable.
1807 auto *IntPtrTy = M.getDataLayout().getIntPtrType(C&: M.getContext());
1808 auto *Int16Ty = Type::getInt16Ty(C&: Ctx);
1809 auto *Int16ArrayTy = ArrayType::get(ElementType: Int16Ty, NumElements: IPVK_Last + 1);
1810 Type *DataTypes[] = {
1811#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
1812#include "llvm/ProfileData/InstrProfData.inc"
1813 };
1814 auto *DataTy = StructType::get(Context&: Ctx, Elements: ArrayRef(DataTypes));
1815
1816 Constant *FunctionAddr = getFuncAddrForProfData(Fn);
1817
1818 Constant *Int16ArrayVals[IPVK_Last + 1];
1819 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1820 Int16ArrayVals[Kind] = ConstantInt::get(Ty: Int16Ty, V: PD.NumValueSites[Kind]);
1821
1822 // If the data variable is not referenced by code (if we don't emit
1823 // @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
1824 // data variable live under linker GC, the data variable can be private. This
1825 // optimization applies to ELF.
1826 //
1827 // On COFF, a comdat leader cannot be local so we require DataReferencedByCode
1828 // to be false.
1829 //
1830 // If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees
1831 // that other copies must have the same CFG and cannot have value profiling.
1832 // If no hash suffix, other profd copies may be referenced by code.
1833 if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
1834 (TT.isOSBinFormatELF() ||
1835 (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
1836 Linkage = GlobalValue::PrivateLinkage;
1837 Visibility = GlobalValue::DefaultVisibility;
1838 }
1839 // AMDGPU objects are always ET_DYN, so non-local symbols with default
1840 // visibility are preemptible. The CounterPtr label difference emits a REL32
1841 // relocation that lld rejects against preemptible targets.
1842 if (TT.isAMDGPU() && !GlobalValue::isLocalLinkage(Linkage))
1843 Visibility = GlobalValue::ProtectedVisibility;
1844 auto *Data =
1845 new GlobalVariable(M, DataTy, false, Linkage, nullptr, DataVarName);
1846 Constant *RelativeCounterPtr;
1847 GlobalVariable *BitmapPtr = PD.RegionBitmaps;
1848 Constant *RelativeBitmapPtr = ConstantInt::get(Ty: IntPtrTy, V: 0);
1849 InstrProfSectKind DataSectionKind;
1850 // With binary profile correlation, profile data is not loaded into memory.
1851 // profile data must reference profile counter with an absolute relocation.
1852 if (ProfileCorrelate == InstrProfCorrelator::BINARY) {
1853 DataSectionKind = IPSK_covdata;
1854 RelativeCounterPtr = ConstantExpr::getPtrToInt(C: CounterPtr, Ty: IntPtrTy);
1855 if (BitmapPtr != nullptr)
1856 RelativeBitmapPtr = ConstantExpr::getPtrToInt(C: BitmapPtr, Ty: IntPtrTy);
1857 } else if (TT.isNVPTX()) {
1858 // The NVPTX target cannot handle self-referencing constant expressions in
1859 // global initializers at all. Use absolute pointers and have the runtime
1860 // registration convert them to relative offsets.
1861 DataSectionKind = IPSK_data;
1862 RelativeCounterPtr = ConstantExpr::getPtrToInt(C: CounterPtr, Ty: IntPtrTy);
1863 } else {
1864 // Reference the counter variable with a label difference (link-time
1865 // constant).
1866 DataSectionKind = IPSK_data;
1867 RelativeCounterPtr =
1868 ConstantExpr::getSub(C1: ConstantExpr::getPtrToInt(C: CounterPtr, Ty: IntPtrTy),
1869 C2: ConstantExpr::getPtrToInt(C: Data, Ty: IntPtrTy));
1870 if (BitmapPtr != nullptr)
1871 RelativeBitmapPtr =
1872 ConstantExpr::getSub(C1: ConstantExpr::getPtrToInt(C: BitmapPtr, Ty: IntPtrTy),
1873 C2: ConstantExpr::getPtrToInt(C: Data, Ty: IntPtrTy));
1874 }
1875
1876 Constant *DataVals[] = {
1877#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
1878#include "llvm/ProfileData/InstrProfData.inc"
1879 };
1880 Data->setInitializer(ConstantStruct::get(T: DataTy, V: DataVals));
1881
1882 Data->setVisibility(Visibility);
1883 Data->setSection(
1884 getInstrProfSectionName(IPSK: DataSectionKind, OF: TT.getObjectFormat()));
1885 Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
1886 maybeSetComdat(GV: Data, GO: Fn, CounterGroupName: CntsVarName);
1887
1888 PD.DataVar = Data;
1889
1890 // Mark the data variable as used so that it isn't stripped out.
1891 CompilerUsedVars.push_back(x: Data);
1892 // Now that the linkage set by the FE has been passed to the data and counter
1893 // variables, reset Name variable's linkage and visibility to private so that
1894 // it can be removed later by the compiler.
1895 NamePtr->setLinkage(GlobalValue::PrivateLinkage);
1896 // Collect the referenced names to be used by emitNameData.
1897 ReferencedNames.push_back(x: NamePtr);
1898}
1899
1900void InstrLowerer::emitVNodes() {
1901 if (!ValueProfileStaticAlloc)
1902 return;
1903
1904 // For now only support this on platforms that do
1905 // not require runtime registration to discover
1906 // named section start/end.
1907 if (needsRuntimeRegistrationOfSectionRange(TT))
1908 return;
1909
1910 size_t TotalNS = 0;
1911 for (auto &PD : ProfileDataMap) {
1912 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1913 TotalNS += PD.second.NumValueSites[Kind];
1914 }
1915
1916 if (!TotalNS)
1917 return;
1918
1919 uint64_t NumCounters = TotalNS * NumCountersPerValueSite;
1920// Heuristic for small programs with very few total value sites.
1921// The default value of vp-counters-per-site is chosen based on
1922// the observation that large apps usually have a low percentage
1923// of value sites that actually have any profile data, and thus
1924// the average number of counters per site is low. For small
1925// apps with very few sites, this may not be true. Bump up the
1926// number of counters in this case.
1927#define INSTR_PROF_MIN_VAL_COUNTS 10
1928 if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)
1929 NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, b: (int)NumCounters * 2);
1930
1931 auto &Ctx = M.getContext();
1932 Type *VNodeTypes[] = {
1933#define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
1934#include "llvm/ProfileData/InstrProfData.inc"
1935 };
1936 auto *VNodeTy = StructType::get(Context&: Ctx, Elements: ArrayRef(VNodeTypes));
1937
1938 ArrayType *VNodesTy = ArrayType::get(ElementType: VNodeTy, NumElements: NumCounters);
1939 auto *VNodesVar = new GlobalVariable(
1940 M, VNodesTy, false, GlobalValue::PrivateLinkage,
1941 Constant::getNullValue(Ty: VNodesTy), getInstrProfVNodesVarName());
1942 setGlobalVariableLargeSection(TargetTriple: TT, GV&: *VNodesVar);
1943 VNodesVar->setSection(
1944 getInstrProfSectionName(IPSK: IPSK_vnodes, OF: TT.getObjectFormat()));
1945 VNodesVar->setAlignment(M.getDataLayout().getABITypeAlign(Ty: VNodesTy));
1946 // VNodesVar is used by runtime but not referenced via relocation by other
1947 // sections. Conservatively make it linker retained.
1948 UsedVars.push_back(x: VNodesVar);
1949}
1950
1951void InstrLowerer::emitNameData() {
1952 if (ReferencedNames.empty())
1953 return;
1954
1955 std::string CompressedNameStr;
1956 if (Error E = collectPGOFuncNameStrings(NameVars: ReferencedNames, Result&: CompressedNameStr,
1957 doCompression: DoInstrProfNameCompression)) {
1958 report_fatal_error(reason: Twine(toString(E: std::move(E))), gen_crash_diag: false);
1959 }
1960
1961 auto &Ctx = M.getContext();
1962 auto *NamesVal =
1963 ConstantDataArray::getString(Context&: Ctx, Initializer: StringRef(CompressedNameStr), AddNull: false);
1964 NamesVar = new GlobalVariable(M, NamesVal->getType(), true,
1965 GlobalValue::PrivateLinkage, NamesVal,
1966 getInstrProfNamesVarName());
1967
1968 NamesSize = CompressedNameStr.size();
1969 setGlobalVariableLargeSection(TargetTriple: TT, GV&: *NamesVar);
1970 NamesVar->setSection(
1971 ProfileCorrelate == InstrProfCorrelator::BINARY
1972 ? getInstrProfSectionName(IPSK: IPSK_covname, OF: TT.getObjectFormat())
1973 : getInstrProfSectionName(IPSK: IPSK_name, OF: TT.getObjectFormat()));
1974 // On COFF, it's important to reduce the alignment down to 1 to prevent the
1975 // linker from inserting padding before the start of the names section or
1976 // between names entries.
1977 NamesVar->setAlignment(Align(1));
1978 // NamesVar is used by runtime but not referenced via relocation by other
1979 // sections. Conservatively make it linker retained.
1980 UsedVars.push_back(x: NamesVar);
1981
1982 for (auto *NamePtr : ReferencedNames)
1983 NamePtr->eraseFromParent();
1984}
1985
1986void InstrLowerer::emitVTableNames() {
1987 if (!EnableVTableValueProfiling || ReferencedVTables.empty())
1988 return;
1989
1990 // Collect the PGO names of referenced vtables and compress them.
1991 std::string CompressedVTableNames;
1992 if (Error E = collectVTableStrings(VTables: ReferencedVTables, Result&: CompressedVTableNames,
1993 doCompression: DoInstrProfNameCompression)) {
1994 report_fatal_error(reason: Twine(toString(E: std::move(E))), gen_crash_diag: false);
1995 }
1996
1997 auto &Ctx = M.getContext();
1998 auto *VTableNamesVal = ConstantDataArray::getString(
1999 Context&: Ctx, Initializer: StringRef(CompressedVTableNames), AddNull: false /* AddNull */);
2000 GlobalVariable *VTableNamesVar =
2001 new GlobalVariable(M, VTableNamesVal->getType(), true /* constant */,
2002 GlobalValue::PrivateLinkage, VTableNamesVal,
2003 getInstrProfVTableNamesVarName());
2004 VTableNamesVar->setSection(
2005 getInstrProfSectionName(IPSK: IPSK_vname, OF: TT.getObjectFormat()));
2006 VTableNamesVar->setAlignment(Align(1));
2007 // Make VTableNames linker retained.
2008 UsedVars.push_back(x: VTableNamesVar);
2009}
2010
2011void InstrLowerer::emitRegistration() {
2012 if (!needsRuntimeRegistrationOfSectionRange(TT))
2013 return;
2014
2015 // Construct the function.
2016 auto *VoidTy = Type::getVoidTy(C&: M.getContext());
2017 auto *VoidPtrTy = PointerType::getUnqual(C&: M.getContext());
2018 auto *Int64Ty = Type::getInt64Ty(C&: M.getContext());
2019 auto *RegisterFTy = FunctionType::get(Result: VoidTy, isVarArg: false);
2020 auto *RegisterF = Function::Create(Ty: RegisterFTy, Linkage: GlobalValue::InternalLinkage,
2021 N: getInstrProfRegFuncsName(), M);
2022 RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
2023 if (Options.NoRedZone)
2024 RegisterF->addFnAttr(Kind: Attribute::NoRedZone);
2025
2026 auto *RuntimeRegisterTy = FunctionType::get(Result: VoidTy, Params: VoidPtrTy, isVarArg: false);
2027 auto *RuntimeRegisterF =
2028 Function::Create(Ty: RuntimeRegisterTy, Linkage: GlobalVariable::ExternalLinkage,
2029 N: getInstrProfRegFuncName(), M);
2030
2031 IRBuilder<> IRB(BasicBlock::Create(Context&: M.getContext(), Name: "", Parent: RegisterF));
2032 for (Value *Data : CompilerUsedVars)
2033 if (!isa<Function>(Val: Data))
2034 // Check for addrspace cast when profiling GPU
2035 IRB.CreateCall(Callee: RuntimeRegisterF,
2036 Args: IRB.CreatePointerBitCastOrAddrSpaceCast(V: Data, DestTy: VoidPtrTy));
2037 for (Value *Data : UsedVars)
2038 if (Data != NamesVar && !isa<Function>(Val: Data))
2039 IRB.CreateCall(Callee: RuntimeRegisterF,
2040 Args: IRB.CreatePointerBitCastOrAddrSpaceCast(V: Data, DestTy: VoidPtrTy));
2041
2042 if (NamesVar) {
2043 Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
2044 auto *NamesRegisterTy =
2045 FunctionType::get(Result: VoidTy, Params: ArrayRef(ParamTypes), isVarArg: false);
2046 auto *NamesRegisterF =
2047 Function::Create(Ty: NamesRegisterTy, Linkage: GlobalVariable::ExternalLinkage,
2048 N: getInstrProfNamesRegFuncName(), M);
2049 IRB.CreateCall(Callee: NamesRegisterF, Args: {IRB.CreatePointerBitCastOrAddrSpaceCast(
2050 V: NamesVar, DestTy: VoidPtrTy),
2051 IRB.getInt64(C: NamesSize)});
2052 }
2053
2054 IRB.CreateRetVoid();
2055}
2056
2057bool InstrLowerer::emitRuntimeHook() {
2058 // GPU profiling data is read directly by the host offload runtime. We do not
2059 // need the standard runtime hook.
2060 if (TT.isGPU())
2061 return false;
2062
2063 // We expect the linker to be invoked with -u<hook_var> flag for Linux
2064 // in which case there is no need to emit the external variable.
2065 if (TT.isOSLinux() || TT.isOSAIX())
2066 return false;
2067
2068 // If the module's provided its own runtime, we don't need to do anything.
2069 if (M.getGlobalVariable(Name: getInstrProfRuntimeHookVarName()))
2070 return false;
2071
2072 // Declare an external variable that will pull in the runtime initialization.
2073 auto *Int32Ty = Type::getInt32Ty(C&: M.getContext());
2074 auto *Var =
2075 new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
2076 nullptr, getInstrProfRuntimeHookVarName());
2077 Var->setVisibility(GlobalValue::HiddenVisibility);
2078
2079 if (TT.isOSBinFormatELF() && !TT.isPS()) {
2080 // Mark the user variable as used so that it isn't stripped out.
2081 CompilerUsedVars.push_back(x: Var);
2082 } else {
2083 // Make a function that uses it.
2084 auto *User = Function::Create(Ty: FunctionType::get(Result: Int32Ty, isVarArg: false),
2085 Linkage: GlobalValue::LinkOnceODRLinkage,
2086 N: getInstrProfRuntimeHookVarUseFuncName(), M);
2087 User->addFnAttr(Kind: Attribute::NoInline);
2088 if (Options.NoRedZone)
2089 User->addFnAttr(Kind: Attribute::NoRedZone);
2090 User->setVisibility(GlobalValue::HiddenVisibility);
2091 if (TT.supportsCOMDAT())
2092 User->setComdat(M.getOrInsertComdat(Name: User->getName()));
2093 // Explicitly mark this function as cold since it is never called.
2094 User->setEntryCount(Count: 0);
2095
2096 IRBuilder<> IRB(BasicBlock::Create(Context&: M.getContext(), Name: "", Parent: User));
2097 auto *Load = IRB.CreateLoad(Ty: Int32Ty, Ptr: Var);
2098 IRB.CreateRet(V: Load);
2099
2100 // Mark the function as used so that it isn't stripped out.
2101 CompilerUsedVars.push_back(x: User);
2102 }
2103 return true;
2104}
2105
2106void InstrLowerer::emitUses() {
2107 // The metadata sections are parallel arrays. Optimizers (e.g.
2108 // GlobalOpt/ConstantMerge) may not discard associated sections as a unit, so
2109 // we conservatively retain all unconditionally in the compiler.
2110 //
2111 // On ELF and Mach-O, the linker can guarantee the associated sections will be
2112 // retained or discarded as a unit, so llvm.compiler.used is sufficient.
2113 // Similarly on COFF, if prof data is not referenced by code we use one comdat
2114 // and ensure this GC property as well. Otherwise, we have to conservatively
2115 // make all of the sections retained by the linker.
2116 if (TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() ||
2117 (TT.isOSBinFormatCOFF() && !DataReferencedByCode))
2118 appendToCompilerUsed(M, Values: CompilerUsedVars);
2119 else
2120 appendToUsed(M, Values: CompilerUsedVars);
2121
2122 // We do not add proper references from used metadata sections to NamesVar and
2123 // VNodesVar, so we have to be conservative and place them in llvm.used
2124 // regardless of the target,
2125 appendToUsed(M, Values: UsedVars);
2126}
2127
2128void InstrLowerer::emitInitialization() {
2129 // Create ProfileFileName variable. Don't don't this for the
2130 // context-sensitive instrumentation lowering: This lowering is after
2131 // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
2132 // have already create the variable before LTO/ThinLTO linking.
2133 if (!IsCS)
2134 createProfileFileNameVar(M, InstrProfileOutput: Options.InstrProfileOutput);
2135 Function *RegisterF = M.getFunction(Name: getInstrProfRegFuncsName());
2136 if (!RegisterF)
2137 return;
2138
2139 // Create the initialization function.
2140 auto *VoidTy = Type::getVoidTy(C&: M.getContext());
2141 auto *F = Function::Create(Ty: FunctionType::get(Result: VoidTy, isVarArg: false),
2142 Linkage: GlobalValue::InternalLinkage,
2143 N: getInstrProfInitFuncName(), M);
2144 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
2145 F->addFnAttr(Kind: Attribute::NoInline);
2146 if (Options.NoRedZone)
2147 F->addFnAttr(Kind: Attribute::NoRedZone);
2148
2149 // Add the basic block and the necessary calls.
2150 IRBuilder<> IRB(BasicBlock::Create(Context&: M.getContext(), Name: "", Parent: F));
2151 IRB.CreateCall(Callee: RegisterF, Args: {});
2152 IRB.CreateRetVoid();
2153
2154 appendToGlobalCtors(M, F, Priority: 0);
2155}
2156
2157namespace llvm {
2158// Create the variable for profile sampling.
2159void createProfileSamplingVar(Module &M) {
2160 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR));
2161 IntegerType *SamplingVarTy;
2162 Constant *ValueZero;
2163 if (getSampledInstrumentationConfig().UseShort) {
2164 SamplingVarTy = Type::getInt16Ty(C&: M.getContext());
2165 ValueZero = Constant::getIntegerValue(Ty: SamplingVarTy, V: APInt(16, 0));
2166 } else {
2167 SamplingVarTy = Type::getInt32Ty(C&: M.getContext());
2168 ValueZero = Constant::getIntegerValue(Ty: SamplingVarTy, V: APInt(32, 0));
2169 }
2170 auto SamplingVar = new GlobalVariable(
2171 M, SamplingVarTy, false, GlobalValue::WeakAnyLinkage, ValueZero, VarName);
2172 SamplingVar->setVisibility(GlobalValue::DefaultVisibility);
2173 SamplingVar->setThreadLocal(true);
2174 Triple TT(M.getTargetTriple());
2175 if (TT.supportsCOMDAT()) {
2176 SamplingVar->setLinkage(GlobalValue::ExternalLinkage);
2177 SamplingVar->setComdat(M.getOrInsertComdat(Name: VarName));
2178 }
2179 appendToCompilerUsed(M, Values: SamplingVar);
2180}
2181} // namespace llvm
2182