1//===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass lowers instrprof_* intrinsics emitted by an instrumentor.
10// It also builds the data structures and initialization code needed for
11// updating execution counts and emitting the profile at runtime.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/ADT/Twine.h"
21#include "llvm/Analysis/BlockFrequencyInfo.h"
22#include "llvm/Analysis/BranchProbabilityInfo.h"
23#include "llvm/Analysis/CFG.h"
24#include "llvm/Analysis/LoopInfo.h"
25#include "llvm/Analysis/TargetLibraryInfo.h"
26#include "llvm/IR/Attributes.h"
27#include "llvm/IR/BasicBlock.h"
28#include "llvm/IR/CFG.h"
29#include "llvm/IR/Constant.h"
30#include "llvm/IR/Constants.h"
31#include "llvm/IR/DIBuilder.h"
32#include "llvm/IR/DerivedTypes.h"
33#include "llvm/IR/DiagnosticInfo.h"
34#include "llvm/IR/Dominators.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/GlobalValue.h"
37#include "llvm/IR/GlobalVariable.h"
38#include "llvm/IR/IRBuilder.h"
39#include "llvm/IR/Instruction.h"
40#include "llvm/IR/Instructions.h"
41#include "llvm/IR/IntrinsicInst.h"
42#include "llvm/IR/MDBuilder.h"
43#include "llvm/IR/Module.h"
44#include "llvm/IR/Type.h"
45#include "llvm/Pass.h"
46#include "llvm/ProfileData/InstrProf.h"
47#include "llvm/ProfileData/InstrProfCorrelator.h"
48#include "llvm/Support/Casting.h"
49#include "llvm/Support/CommandLine.h"
50#include "llvm/Support/Compiler.h"
51#include "llvm/Support/Error.h"
52#include "llvm/Support/ErrorHandling.h"
53#include "llvm/TargetParser/Triple.h"
54#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
55#include "llvm/Transforms/Utils/BasicBlockUtils.h"
56#include "llvm/Transforms/Utils/Instrumentation.h"
57#include "llvm/Transforms/Utils/ModuleUtils.h"
58#include "llvm/Transforms/Utils/SSAUpdater.h"
59#include <algorithm>
60#include <cassert>
61#include <cstdint>
62#include <string>
63
64using namespace llvm;
65
66#define DEBUG_TYPE "instrprof"
67
68namespace llvm {
69// Command line option to enable vtable value profiling. Defined in
70// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
71extern cl::opt<bool> EnableVTableValueProfiling;
72LLVM_ABI cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate(
73 "profile-correlate",
74 cl::desc("Use debug info or binary file to correlate profiles."),
75 cl::init(Val: InstrProfCorrelator::NONE),
76 cl::values(clEnumValN(InstrProfCorrelator::NONE, "",
77 "No profile correlation"),
78 clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info",
79 "Use debug info to correlate"),
80 clEnumValN(InstrProfCorrelator::BINARY, "binary",
81 "Use binary to correlate")));
82} // namespace llvm
83
84namespace {
85
86cl::opt<bool> DoHashBasedCounterSplit(
87 "hash-based-counter-split",
88 cl::desc("Rename counter variable of a comdat function based on cfg hash"),
89 cl::init(Val: true));
90
91cl::opt<bool>
92 RuntimeCounterRelocation("runtime-counter-relocation",
93 cl::desc("Enable relocating counters at runtime."),
94 cl::init(Val: false));
95
96cl::opt<bool> ValueProfileStaticAlloc(
97 "vp-static-alloc",
98 cl::desc("Do static counter allocation for value profiler"),
99 cl::init(Val: true));
100
101cl::opt<double> NumCountersPerValueSite(
102 "vp-counters-per-site",
103 cl::desc("The average number of profile counters allocated "
104 "per value profiling site."),
105 // This is set to a very small value because in real programs, only
106 // a very small percentage of value sites have non-zero targets, e.g, 1/30.
107 // For those sites with non-zero profile, the average number of targets
108 // is usually smaller than 2.
109 cl::init(Val: 1.0));
110
111cl::opt<bool> AtomicCounterUpdateAll(
112 "instrprof-atomic-counter-update-all",
113 cl::desc("Make all profile counter updates atomic (for testing only)"),
114 cl::init(Val: false));
115
116cl::opt<bool> AtomicCounterUpdatePromoted(
117 "atomic-counter-update-promoted",
118 cl::desc("Do counter update using atomic fetch add "
119 " for promoted counters only"),
120 cl::init(Val: false));
121
122cl::opt<bool> AtomicFirstCounter(
123 "atomic-first-counter",
124 cl::desc("Use atomic fetch add for first counter in a function (usually "
125 "the entry counter)"),
126 cl::init(Val: false));
127
128cl::opt<bool> ConditionalCounterUpdate(
129 "conditional-counter-update",
130 cl::desc("Do conditional counter updates in single byte counters mode)"),
131 cl::init(Val: false));
132
133// If the option is not specified, the default behavior about whether
134// counter promotion is done depends on how instrumentation lowering
135// pipeline is setup, i.e., the default value of true of this option
136// does not mean the promotion will be done by default. Explicitly
137// setting this option can override the default behavior.
138cl::opt<bool> DoCounterPromotion("do-counter-promotion",
139 cl::desc("Do counter register promotion"),
140 cl::init(Val: false));
141cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
142 "max-counter-promotions-per-loop", cl::init(Val: 20),
143 cl::desc("Max number counter promotions per loop to avoid"
144 " increasing register pressure too much"));
145
146// A debug option
147cl::opt<int>
148 MaxNumOfPromotions("max-counter-promotions", cl::init(Val: -1),
149 cl::desc("Max number of allowed counter promotions"));
150
151cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
152 "speculative-counter-promotion-max-exiting", cl::init(Val: 3),
153 cl::desc("The max number of exiting blocks of a loop to allow "
154 " speculative counter promotion"));
155
156cl::opt<bool> SpeculativeCounterPromotionToLoop(
157 "speculative-counter-promotion-to-loop",
158 cl::desc("When the option is false, if the target block is in a loop, "
159 "the promotion will be disallowed unless the promoted counter "
160 " update can be further/iteratively promoted into an acyclic "
161 " region."));
162
163cl::opt<bool> IterativeCounterPromotion(
164 "iterative-counter-promotion", cl::init(Val: true),
165 cl::desc("Allow counter promotion across the whole loop nest."));
166
167cl::opt<bool> SkipRetExitBlock(
168 "skip-ret-exit-block", cl::init(Val: true),
169 cl::desc("Suppress counter promotion if exit blocks contain ret."));
170
171static cl::opt<bool> SampledInstr("sampled-instrumentation",
172 cl::desc("Do PGO instrumentation sampling"));
173
174static cl::opt<unsigned> SampledInstrPeriod(
175 "sampled-instr-period",
176 cl::desc("Set the profile instrumentation sample period. A sample period "
177 "of 0 is invalid. For each sample period, a fixed number of "
178 "consecutive samples will be recorded. The number is controlled "
179 "by 'sampled-instr-burst-duration' flag. The default sample "
180 "period of 65536 is optimized for generating efficient code that "
181 "leverages unsigned short integer wrapping in overflow, but this "
182 "is disabled under simple sampling (burst duration = 1)."),
183 cl::init(USHRT_MAX + 1));
184
185static cl::opt<unsigned> SampledInstrBurstDuration(
186 "sampled-instr-burst-duration",
187 cl::desc("Set the profile instrumentation burst duration, which can range "
188 "from 1 to the value of 'sampled-instr-period' (0 is invalid). "
189 "This number of samples will be recorded for each "
190 "'sampled-instr-period' count update. Setting to 1 enables simple "
191 "sampling, in which case it is recommended to set "
192 "'sampled-instr-period' to a prime number."),
193 cl::init(Val: 200));
194
195struct SampledInstrumentationConfig {
196 unsigned BurstDuration;
197 unsigned Period;
198 bool UseShort;
199 bool IsSimpleSampling;
200 bool IsFastSampling;
201};
202
203static SampledInstrumentationConfig getSampledInstrumentationConfig() {
204 SampledInstrumentationConfig config;
205 config.BurstDuration = SampledInstrBurstDuration.getValue();
206 config.Period = SampledInstrPeriod.getValue();
207 if (config.BurstDuration > config.Period)
208 report_fatal_error(
209 reason: "SampledBurstDuration must be less than or equal to SampledPeriod");
210 if (config.Period == 0 || config.BurstDuration == 0)
211 report_fatal_error(
212 reason: "SampledPeriod and SampledBurstDuration must be greater than 0");
213 config.IsSimpleSampling = (config.BurstDuration == 1);
214 // If (BurstDuration == 1 && Period == 65536), generate the simple sampling
215 // style code.
216 config.IsFastSampling =
217 (!config.IsSimpleSampling && config.Period == USHRT_MAX + 1);
218 config.UseShort = (config.Period <= USHRT_MAX) || config.IsFastSampling;
219 return config;
220}
221
222using LoadStorePair = std::pair<Instruction *, Instruction *>;
223
224static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) {
225 auto *MD = dyn_cast_or_null<ConstantAsMetadata>(Val: M.getModuleFlag(Key: Flag));
226 if (!MD)
227 return 0;
228
229 // If the flag is a ConstantAsMetadata, it should be an integer representable
230 // in 64-bits.
231 return cast<ConstantInt>(Val: MD->getValue())->getZExtValue();
232}
233
234static bool enablesValueProfiling(const Module &M) {
235 return isIRPGOFlagSet(M: &M) ||
236 getIntModuleFlagOrZero(M, Flag: "EnableValueProfiling") != 0;
237}
238
239// Conservatively returns true if value profiling is enabled.
240static bool profDataReferencedByCode(const Module &M) {
241 return enablesValueProfiling(M);
242}
243
244class InstrLowerer final {
245public:
246 InstrLowerer(Module &M, const InstrProfOptions &Options,
247 std::function<const TargetLibraryInfo &(Function &F)> GetTLI,
248 bool IsCS)
249 : M(M), Options(Options), TT(M.getTargetTriple()), IsCS(IsCS),
250 GetTLI(GetTLI), DataReferencedByCode(profDataReferencedByCode(M)) {}
251
252 bool lower();
253
254private:
255 Module &M;
256 const InstrProfOptions Options;
257 const Triple TT;
258 // Is this lowering for the context-sensitive instrumentation.
259 const bool IsCS;
260
261 std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
262
263 const bool DataReferencedByCode;
264
265 struct PerFunctionProfileData {
266 uint32_t NumValueSites[IPVK_Last + 1] = {};
267 GlobalVariable *RegionCounters = nullptr;
268 GlobalVariable *DataVar = nullptr;
269 GlobalVariable *RegionBitmaps = nullptr;
270 uint32_t NumBitmapBytes = 0;
271
272 PerFunctionProfileData() = default;
273 };
274 DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
275 // Key is virtual table variable, value is 'VTableProfData' in the form of
276 // GlobalVariable.
277 DenseMap<GlobalVariable *, GlobalVariable *> VTableDataMap;
278 /// If runtime relocation is enabled, this maps functions to the load
279 /// instruction that produces the profile relocation bias.
280 DenseMap<const Function *, LoadInst *> FunctionToProfileBiasMap;
281 std::vector<GlobalValue *> CompilerUsedVars;
282 std::vector<GlobalValue *> UsedVars;
283 std::vector<GlobalVariable *> ReferencedNames;
284 // The list of virtual table variables of which the VTableProfData is
285 // collected.
286 std::vector<GlobalVariable *> ReferencedVTables;
287 GlobalVariable *NamesVar = nullptr;
288 size_t NamesSize = 0;
289
290 // vector of counter load/store pairs to be register promoted.
291 std::vector<LoadStorePair> PromotionCandidates;
292
293 int64_t TotalCountersPromoted = 0;
294
295 /// Lower instrumentation intrinsics in the function. Returns true if there
296 /// any lowering.
297 bool lowerIntrinsics(Function *F);
298
299 /// Register-promote counter loads and stores in loops.
300 void promoteCounterLoadStores(Function *F);
301
302 /// Returns true if relocating counters at runtime is enabled.
303 bool isRuntimeCounterRelocationEnabled() const;
304
305 /// Returns true if profile counter update register promotion is enabled.
306 bool isCounterPromotionEnabled() const;
307
308 /// Return true if profile sampling is enabled.
309 bool isSamplingEnabled() const;
310
311 /// Count the number of instrumented value sites for the function.
312 void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins);
313
314 /// Replace instrprof.value.profile with a call to runtime library.
315 void lowerValueProfileInst(InstrProfValueProfileInst *Ins);
316
317 /// Replace instrprof.cover with a store instruction to the coverage byte.
318 void lowerCover(InstrProfCoverInst *Inc);
319
320 /// Replace instrprof.timestamp with a call to
321 /// INSTR_PROF_PROFILE_SET_TIMESTAMP.
322 void lowerTimestamp(InstrProfTimestampInst *TimestampInstruction);
323
324 /// Replace instrprof.increment with an increment of the appropriate value.
325 void lowerIncrement(InstrProfIncrementInst *Inc);
326
327 /// Force emitting of name vars for unused functions.
328 void lowerCoverageData(GlobalVariable *CoverageNamesVar);
329
330 /// Replace instrprof.mcdc.tvbitmask.update with a shift and or instruction
331 /// using the index represented by the a temp value into a bitmap.
332 void lowerMCDCTestVectorBitmapUpdate(InstrProfMCDCTVBitmapUpdate *Ins);
333
334 /// Get the Bias value for data to access mmap-ed area.
335 /// Create it if it hasn't been seen.
336 GlobalVariable *getOrCreateBiasVar(StringRef VarName);
337
338 /// Compute the address of the counter value that this profiling instruction
339 /// acts on.
340 Value *getCounterAddress(InstrProfCntrInstBase *I);
341
342 /// Lower the incremental instructions under profile sampling predicates.
343 void doSampling(Instruction *I);
344
345 /// Get the region counters for an increment, creating them if necessary.
346 ///
347 /// If the counter array doesn't yet exist, the profile data variables
348 /// referring to them will also be created.
349 GlobalVariable *getOrCreateRegionCounters(InstrProfCntrInstBase *Inc);
350
351 /// Create the region counters.
352 GlobalVariable *createRegionCounters(InstrProfCntrInstBase *Inc,
353 StringRef Name,
354 GlobalValue::LinkageTypes Linkage);
355
356 /// Compute the address of the test vector bitmap that this profiling
357 /// instruction acts on.
358 Value *getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I);
359
360 /// Get the region bitmaps for an increment, creating them if necessary.
361 ///
362 /// If the bitmap array doesn't yet exist, the profile data variables
363 /// referring to them will also be created.
364 GlobalVariable *getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc);
365
366 /// Create the MC/DC bitmap as a byte-aligned array of bytes associated with
367 /// an MC/DC Decision region. The number of bytes required is indicated by
368 /// the intrinsic used (type InstrProfMCDCBitmapInstBase). This is called
369 /// as part of setupProfileSection() and is conceptually very similar to
370 /// what is done for profile data counters in createRegionCounters().
371 GlobalVariable *createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
372 StringRef Name,
373 GlobalValue::LinkageTypes Linkage);
374
375 /// Set Comdat property of GV, if required.
376 void maybeSetComdat(GlobalVariable *GV, GlobalObject *GO, StringRef VarName);
377
378 /// Setup the sections into which counters and bitmaps are allocated.
379 GlobalVariable *setupProfileSection(InstrProfInstBase *Inc,
380 InstrProfSectKind IPSK);
381
382 /// Create INSTR_PROF_DATA variable for counters and bitmaps.
383 void createDataVariable(InstrProfCntrInstBase *Inc);
384
385 /// Get the counters for virtual table values, creating them if necessary.
386 void getOrCreateVTableProfData(GlobalVariable *GV);
387
388 /// Emit the section with compressed function names.
389 void emitNameData();
390
391 /// Emit the section with compressed vtable names.
392 void emitVTableNames();
393
394 /// Emit value nodes section for value profiling.
395 void emitVNodes();
396
397 /// Emit runtime registration functions for each profile data variable.
398 void emitRegistration();
399
400 /// Emit the necessary plumbing to pull in the runtime initialization.
401 /// Returns true if a change was made.
402 bool emitRuntimeHook();
403
404 /// Add uses of our data variables and runtime hook.
405 void emitUses();
406
407 /// Create a static initializer for our data, on platforms that need it,
408 /// and for any profile output file that was specified.
409 void emitInitialization();
410};
411
412///
413/// A helper class to promote one counter RMW operation in the loop
414/// into register update.
415///
416/// RWM update for the counter will be sinked out of the loop after
417/// the transformation.
418///
419class PGOCounterPromoterHelper : public LoadAndStorePromoter {
420public:
421 PGOCounterPromoterHelper(
422 Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
423 BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
424 ArrayRef<Instruction *> InsertPts,
425 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
426 LoopInfo &LI)
427 : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
428 InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
429 assert(isa<LoadInst>(L));
430 assert(isa<StoreInst>(S));
431 SSA.AddAvailableValue(BB: PH, V: Init);
432 }
433
434 void doExtraRewritesBeforeFinalDeletion() override {
435 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
436 BasicBlock *ExitBlock = ExitBlocks[i];
437 Instruction *InsertPos = InsertPts[i];
438 // Get LiveIn value into the ExitBlock. If there are multiple
439 // predecessors, the value is defined by a PHI node in this
440 // block.
441 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(BB: ExitBlock);
442 Value *Addr = cast<StoreInst>(Val: Store)->getPointerOperand();
443 Type *Ty = LiveInValue->getType();
444 IRBuilder<> Builder(InsertPos);
445 if (auto *AddrInst = dyn_cast_or_null<IntToPtrInst>(Val: Addr)) {
446 // If isRuntimeCounterRelocationEnabled() is true then the address of
447 // the store instruction is computed with two instructions in
448 // InstrProfiling::getCounterAddress(). We need to copy those
449 // instructions to this block to compute Addr correctly.
450 // %BiasAdd = add i64 ptrtoint <__profc_>, <__llvm_profile_counter_bias>
451 // %Addr = inttoptr i64 %BiasAdd to i64*
452 auto *OrigBiasInst = dyn_cast<BinaryOperator>(Val: AddrInst->getOperand(i_nocapture: 0));
453 assert(OrigBiasInst->getOpcode() == Instruction::BinaryOps::Add);
454 Value *BiasInst = Builder.Insert(I: OrigBiasInst->clone());
455 Addr = Builder.CreateIntToPtr(V: BiasInst,
456 DestTy: PointerType::getUnqual(C&: Ty->getContext()));
457 }
458 if (AtomicCounterUpdatePromoted)
459 // automic update currently can only be promoted across the current
460 // loop, not the whole loop nest.
461 Builder.CreateAtomicRMW(Op: AtomicRMWInst::Add, Ptr: Addr, Val: LiveInValue,
462 Align: MaybeAlign(),
463 Ordering: AtomicOrdering::SequentiallyConsistent);
464 else {
465 LoadInst *OldVal = Builder.CreateLoad(Ty, Ptr: Addr, Name: "pgocount.promoted");
466 auto *NewVal = Builder.CreateAdd(LHS: OldVal, RHS: LiveInValue);
467 auto *NewStore = Builder.CreateStore(Val: NewVal, Ptr: Addr);
468
469 // Now update the parent loop's candidate list:
470 if (IterativeCounterPromotion) {
471 auto *TargetLoop = LI.getLoopFor(BB: ExitBlock);
472 if (TargetLoop)
473 LoopToCandidates[TargetLoop].emplace_back(Args&: OldVal, Args&: NewStore);
474 }
475 }
476 }
477 }
478
479private:
480 Instruction *Store;
481 ArrayRef<BasicBlock *> ExitBlocks;
482 ArrayRef<Instruction *> InsertPts;
483 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
484 LoopInfo &LI;
485};
486
487/// A helper class to do register promotion for all profile counter
488/// updates in a loop.
489///
490class PGOCounterPromoter {
491public:
492 PGOCounterPromoter(
493 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
494 Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI)
495 : LoopToCandidates(LoopToCands), L(CurLoop), LI(LI), BFI(BFI) {
496
497 // Skip collection of ExitBlocks and InsertPts for loops that will not be
498 // able to have counters promoted.
499 SmallVector<BasicBlock *, 8> LoopExitBlocks;
500 SmallPtrSet<BasicBlock *, 8> BlockSet;
501
502 L.getExitBlocks(ExitBlocks&: LoopExitBlocks);
503 if (!isPromotionPossible(LP: &L, LoopExitBlocks))
504 return;
505
506 for (BasicBlock *ExitBlock : LoopExitBlocks) {
507 if (BlockSet.insert(Ptr: ExitBlock).second &&
508 llvm::none_of(Range: predecessors(BB: ExitBlock), P: [&](const BasicBlock *Pred) {
509 return llvm::isPresplitCoroSuspendExitEdge(Src: *Pred, Dest: *ExitBlock);
510 })) {
511 ExitBlocks.push_back(Elt: ExitBlock);
512 InsertPts.push_back(Elt: &*ExitBlock->getFirstInsertionPt());
513 }
514 }
515 }
516
517 bool run(int64_t *NumPromoted) {
518 // Skip 'infinite' loops:
519 if (ExitBlocks.size() == 0)
520 return false;
521
522 // Skip if any of the ExitBlocks contains a ret instruction.
523 // This is to prevent dumping of incomplete profile -- if the
524 // the loop is a long running loop and dump is called in the middle
525 // of the loop, the result profile is incomplete.
526 // FIXME: add other heuristics to detect long running loops.
527 if (SkipRetExitBlock) {
528 for (auto *BB : ExitBlocks)
529 if (isa<ReturnInst>(Val: BB->getTerminator()))
530 return false;
531 }
532
533 unsigned MaxProm = getMaxNumOfPromotionsInLoop(LP: &L);
534 if (MaxProm == 0)
535 return false;
536
537 unsigned Promoted = 0;
538 for (auto &Cand : LoopToCandidates[&L]) {
539
540 SmallVector<PHINode *, 4> NewPHIs;
541 SSAUpdater SSA(&NewPHIs);
542 Value *InitVal = ConstantInt::get(Ty: Cand.first->getType(), V: 0);
543
544 // If BFI is set, we will use it to guide the promotions.
545 if (BFI) {
546 auto *BB = Cand.first->getParent();
547 auto InstrCount = BFI->getBlockProfileCount(BB);
548 if (!InstrCount)
549 continue;
550 auto PreheaderCount = BFI->getBlockProfileCount(BB: L.getLoopPreheader());
551 // If the average loop trip count is not greater than 1.5, we skip
552 // promotion.
553 if (PreheaderCount && (*PreheaderCount * 3) >= (*InstrCount * 2))
554 continue;
555 }
556
557 PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
558 L.getLoopPreheader(), ExitBlocks,
559 InsertPts, LoopToCandidates, LI);
560 Promoter.run(Insts: SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
561 Promoted++;
562 if (Promoted >= MaxProm)
563 break;
564
565 (*NumPromoted)++;
566 if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
567 break;
568 }
569
570 LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
571 << L.getLoopDepth() << ")\n");
572 return Promoted != 0;
573 }
574
575private:
576 bool allowSpeculativeCounterPromotion(Loop *LP) {
577 SmallVector<BasicBlock *, 8> ExitingBlocks;
578 L.getExitingBlocks(ExitingBlocks);
579 // Not considierered speculative.
580 if (ExitingBlocks.size() == 1)
581 return true;
582 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
583 return false;
584 return true;
585 }
586
587 // Check whether the loop satisfies the basic conditions needed to perform
588 // Counter Promotions.
589 bool
590 isPromotionPossible(Loop *LP,
591 const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) {
592 // We can't insert into a catchswitch.
593 if (llvm::any_of(Range: LoopExitBlocks, P: [](BasicBlock *Exit) {
594 return isa<CatchSwitchInst>(Val: Exit->getTerminator());
595 }))
596 return false;
597
598 if (!LP->hasDedicatedExits())
599 return false;
600
601 BasicBlock *PH = LP->getLoopPreheader();
602 if (!PH)
603 return false;
604
605 return true;
606 }
607
608 // Returns the max number of Counter Promotions for LP.
609 unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
610 SmallVector<BasicBlock *, 8> LoopExitBlocks;
611 LP->getExitBlocks(ExitBlocks&: LoopExitBlocks);
612 if (!isPromotionPossible(LP, LoopExitBlocks))
613 return 0;
614
615 SmallVector<BasicBlock *, 8> ExitingBlocks;
616 LP->getExitingBlocks(ExitingBlocks);
617
618 // If BFI is set, we do more aggressive promotions based on BFI.
619 if (BFI)
620 return (unsigned)-1;
621
622 // Not considierered speculative.
623 if (ExitingBlocks.size() == 1)
624 return MaxNumOfPromotionsPerLoop;
625
626 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
627 return 0;
628
629 // Whether the target block is in a loop does not matter:
630 if (SpeculativeCounterPromotionToLoop)
631 return MaxNumOfPromotionsPerLoop;
632
633 // Now check the target block:
634 unsigned MaxProm = MaxNumOfPromotionsPerLoop;
635 for (auto *TargetBlock : LoopExitBlocks) {
636 auto *TargetLoop = LI.getLoopFor(BB: TargetBlock);
637 if (!TargetLoop)
638 continue;
639 unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(LP: TargetLoop);
640 unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
641 MaxProm =
642 std::min(a: MaxProm, b: std::max(a: MaxPromForTarget, b: PendingCandsInTarget) -
643 PendingCandsInTarget);
644 }
645 return MaxProm;
646 }
647
648 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
649 SmallVector<BasicBlock *, 8> ExitBlocks;
650 SmallVector<Instruction *, 8> InsertPts;
651 Loop &L;
652 LoopInfo &LI;
653 BlockFrequencyInfo *BFI;
654};
655
656enum class ValueProfilingCallType {
657 // Individual values are tracked. Currently used for indiret call target
658 // profiling.
659 Default,
660
661 // MemOp: the memop size value profiling.
662 MemOp
663};
664
665} // end anonymous namespace
666
667PreservedAnalyses InstrProfilingLoweringPass::run(Module &M,
668 ModuleAnalysisManager &AM) {
669 FunctionAnalysisManager &FAM =
670 AM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
671 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
672 return FAM.getResult<TargetLibraryAnalysis>(IR&: F);
673 };
674 InstrLowerer Lowerer(M, Options, GetTLI, IsCS);
675 if (!Lowerer.lower())
676 return PreservedAnalyses::all();
677
678 return PreservedAnalyses::none();
679}
680
681//
682// Perform instrumentation sampling.
683//
684// There are 3 favors of sampling:
685// (1) Full burst sampling: We transform:
686// Increment_Instruction;
687// to:
688// if (__llvm_profile_sampling__ <= SampledInstrBurstDuration - 1) {
689// Increment_Instruction;
690// }
691// __llvm_profile_sampling__ += 1;
692// if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
693// __llvm_profile_sampling__ = 0;
694// }
695//
696// "__llvm_profile_sampling__" is a thread-local global shared by all PGO
697// counters (value-instrumentation and edge instrumentation).
698//
699// (2) Fast burst sampling:
700// "__llvm_profile_sampling__" variable is an unsigned type, meaning it will
701// wrap around to zero when overflows. In this case, the second check is
702// unnecessary, so we won't generate check2 when the SampledInstrPeriod is
703// set to 65536 (64K). The code after:
704// if (__llvm_profile_sampling__ <= SampledInstrBurstDuration - 1) {
705// Increment_Instruction;
706// }
707// __llvm_profile_sampling__ += 1;
708//
709// (3) Simple sampling:
710// When SampledInstrBurstDuration is set to 1, we do a simple sampling:
711// __llvm_profile_sampling__ += 1;
712// if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
713// __llvm_profile_sampling__ = 0;
714// Increment_Instruction;
715// }
716//
717// Note that, the code snippet after the transformation can still be counter
718// promoted. However, with sampling enabled, counter updates are expected to
719// be infrequent, making the benefits of counter promotion negligible.
720// Moreover, counter promotion can potentially cause issues in server
721// applications, particularly when the counters are dumped without a clean
722// exit. To mitigate this risk, counter promotion is disabled by default when
723// sampling is enabled. This behavior can be overridden using the internal
724// option.
725void InstrLowerer::doSampling(Instruction *I) {
726 if (!isSamplingEnabled())
727 return;
728
729 SampledInstrumentationConfig config = getSampledInstrumentationConfig();
730 auto GetConstant = [&config](IRBuilder<> &Builder, uint32_t C) {
731 if (config.UseShort)
732 return Builder.getInt16(C);
733 else
734 return Builder.getInt32(C);
735 };
736
737 IntegerType *SamplingVarTy;
738 if (config.UseShort)
739 SamplingVarTy = Type::getInt16Ty(C&: M.getContext());
740 else
741 SamplingVarTy = Type::getInt32Ty(C&: M.getContext());
742 auto *SamplingVar =
743 M.getGlobalVariable(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR));
744 assert(SamplingVar && "SamplingVar not set properly");
745
746 // Create the condition for checking the burst duration.
747 Instruction *SamplingVarIncr;
748 Value *NewSamplingVarVal;
749 MDBuilder MDB(I->getContext());
750 MDNode *BranchWeight;
751 IRBuilder<> CondBuilder(I);
752 auto *LoadSamplingVar = CondBuilder.CreateLoad(Ty: SamplingVarTy, Ptr: SamplingVar);
753 if (config.IsSimpleSampling) {
754 // For the simple sampling, just create the load and increments.
755 IRBuilder<> IncBuilder(I);
756 NewSamplingVarVal =
757 IncBuilder.CreateAdd(LHS: LoadSamplingVar, RHS: GetConstant(IncBuilder, 1));
758 SamplingVarIncr = IncBuilder.CreateStore(Val: NewSamplingVarVal, Ptr: SamplingVar);
759 } else {
760 // For the burst-sampling, create the conditional update.
761 auto *DurationCond = CondBuilder.CreateICmpULE(
762 LHS: LoadSamplingVar, RHS: GetConstant(CondBuilder, config.BurstDuration - 1));
763 BranchWeight = MDB.createBranchWeights(
764 TrueWeight: config.BurstDuration, FalseWeight: config.Period - config.BurstDuration);
765 Instruction *ThenTerm = SplitBlockAndInsertIfThen(
766 Cond: DurationCond, SplitBefore: I, /* Unreachable */ false, BranchWeights: BranchWeight);
767 IRBuilder<> IncBuilder(I);
768 NewSamplingVarVal =
769 IncBuilder.CreateAdd(LHS: LoadSamplingVar, RHS: GetConstant(IncBuilder, 1));
770 SamplingVarIncr = IncBuilder.CreateStore(Val: NewSamplingVarVal, Ptr: SamplingVar);
771 I->moveBefore(InsertPos: ThenTerm->getIterator());
772 }
773
774 if (config.IsFastSampling)
775 return;
776
777 // Create the condition for checking the period.
778 Instruction *ThenTerm, *ElseTerm;
779 IRBuilder<> PeriodCondBuilder(SamplingVarIncr);
780 auto *PeriodCond = PeriodCondBuilder.CreateICmpUGE(
781 LHS: NewSamplingVarVal, RHS: GetConstant(PeriodCondBuilder, config.Period));
782 BranchWeight = MDB.createBranchWeights(TrueWeight: 1, FalseWeight: config.Period - 1);
783 SplitBlockAndInsertIfThenElse(Cond: PeriodCond, SplitBefore: SamplingVarIncr, ThenTerm: &ThenTerm,
784 ElseTerm: &ElseTerm, BranchWeights: BranchWeight);
785
786 // For the simple sampling, the counter update happens in sampling var reset.
787 if (config.IsSimpleSampling)
788 I->moveBefore(InsertPos: ThenTerm->getIterator());
789
790 IRBuilder<> ResetBuilder(ThenTerm);
791 ResetBuilder.CreateStore(Val: GetConstant(ResetBuilder, 0), Ptr: SamplingVar);
792 SamplingVarIncr->moveBefore(InsertPos: ElseTerm->getIterator());
793}
794
795bool InstrLowerer::lowerIntrinsics(Function *F) {
796 bool MadeChange = false;
797 PromotionCandidates.clear();
798 SmallVector<InstrProfInstBase *, 8> InstrProfInsts;
799
800 // To ensure compatibility with sampling, we save the intrinsics into
801 // a buffer to prevent potential breakage of the iterator (as the
802 // intrinsics will be moved to a different BB).
803 for (BasicBlock &BB : *F) {
804 for (Instruction &Instr : llvm::make_early_inc_range(Range&: BB)) {
805 if (auto *IP = dyn_cast<InstrProfInstBase>(Val: &Instr))
806 InstrProfInsts.push_back(Elt: IP);
807 }
808 }
809
810 for (auto *Instr : InstrProfInsts) {
811 doSampling(I: Instr);
812 if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(Val: Instr)) {
813 lowerIncrement(Inc: IPIS);
814 MadeChange = true;
815 } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(Val: Instr)) {
816 lowerIncrement(Inc: IPI);
817 MadeChange = true;
818 } else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(Val: Instr)) {
819 lowerTimestamp(TimestampInstruction: IPC);
820 MadeChange = true;
821 } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(Val: Instr)) {
822 lowerCover(Inc: IPC);
823 MadeChange = true;
824 } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(Val: Instr)) {
825 lowerValueProfileInst(Ins: IPVP);
826 MadeChange = true;
827 } else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(Val: Instr)) {
828 IPMP->eraseFromParent();
829 MadeChange = true;
830 } else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(Val: Instr)) {
831 lowerMCDCTestVectorBitmapUpdate(Ins: IPBU);
832 MadeChange = true;
833 }
834 }
835
836 if (!MadeChange)
837 return false;
838
839 promoteCounterLoadStores(F);
840 return true;
841}
842
843bool InstrLowerer::isRuntimeCounterRelocationEnabled() const {
844 // Mach-O don't support weak external references.
845 if (TT.isOSBinFormatMachO())
846 return false;
847
848 if (RuntimeCounterRelocation.getNumOccurrences() > 0)
849 return RuntimeCounterRelocation;
850
851 // Fuchsia uses runtime counter relocation by default.
852 return TT.isOSFuchsia();
853}
854
855bool InstrLowerer::isSamplingEnabled() const {
856 if (SampledInstr.getNumOccurrences() > 0)
857 return SampledInstr;
858 return Options.Sampling;
859}
860
861bool InstrLowerer::isCounterPromotionEnabled() const {
862 if (DoCounterPromotion.getNumOccurrences() > 0)
863 return DoCounterPromotion;
864
865 return Options.DoCounterPromotion;
866}
867
868void InstrLowerer::promoteCounterLoadStores(Function *F) {
869 if (!isCounterPromotionEnabled())
870 return;
871
872 DominatorTree DT(*F);
873 LoopInfo LI(DT);
874 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
875
876 std::unique_ptr<BlockFrequencyInfo> BFI;
877 if (Options.UseBFIInPromotion) {
878 std::unique_ptr<BranchProbabilityInfo> BPI;
879 BPI.reset(p: new BranchProbabilityInfo(*F, LI, &GetTLI(*F)));
880 BFI.reset(p: new BlockFrequencyInfo(*F, *BPI, LI));
881 }
882
883 for (const auto &LoadStore : PromotionCandidates) {
884 auto *CounterLoad = LoadStore.first;
885 auto *CounterStore = LoadStore.second;
886 BasicBlock *BB = CounterLoad->getParent();
887 Loop *ParentLoop = LI.getLoopFor(BB);
888 if (!ParentLoop)
889 continue;
890 LoopPromotionCandidates[ParentLoop].emplace_back(Args&: CounterLoad, Args&: CounterStore);
891 }
892
893 SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
894
895 // Do a post-order traversal of the loops so that counter updates can be
896 // iteratively hoisted outside the loop nest.
897 for (auto *Loop : llvm::reverse(C&: Loops)) {
898 PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get());
899 Promoter.run(NumPromoted: &TotalCountersPromoted);
900 }
901}
902
903static bool needsRuntimeHookUnconditionally(const Triple &TT) {
904 // On Fuchsia, we only need runtime hook if any counters are present.
905 if (TT.isOSFuchsia())
906 return false;
907
908 return true;
909}
910
911/// Check if the module contains uses of any profiling intrinsics.
912static bool containsProfilingIntrinsics(Module &M) {
913 auto containsIntrinsic = [&](int ID) {
914 if (auto *F = Intrinsic::getDeclarationIfExists(M: &M, id: ID))
915 return !F->use_empty();
916 return false;
917 };
918 return containsIntrinsic(Intrinsic::instrprof_cover) ||
919 containsIntrinsic(Intrinsic::instrprof_increment) ||
920 containsIntrinsic(Intrinsic::instrprof_increment_step) ||
921 containsIntrinsic(Intrinsic::instrprof_timestamp) ||
922 containsIntrinsic(Intrinsic::instrprof_value_profile);
923}
924
925bool InstrLowerer::lower() {
926 bool MadeChange = false;
927 bool NeedsRuntimeHook = needsRuntimeHookUnconditionally(TT);
928 if (NeedsRuntimeHook)
929 MadeChange = emitRuntimeHook();
930
931 if (!IsCS && isSamplingEnabled())
932 createProfileSamplingVar(M);
933
934 bool ContainsProfiling = containsProfilingIntrinsics(M);
935 GlobalVariable *CoverageNamesVar =
936 M.getNamedGlobal(Name: getCoverageUnusedNamesVarName());
937 // Improve compile time by avoiding linear scans when there is no work.
938 if (!ContainsProfiling && !CoverageNamesVar)
939 return MadeChange;
940
941 // We did not know how many value sites there would be inside
942 // the instrumented function. This is counting the number of instrumented
943 // target value sites to enter it as field in the profile data variable.
944 for (Function &F : M) {
945 InstrProfCntrInstBase *FirstProfInst = nullptr;
946 for (BasicBlock &BB : F) {
947 for (auto I = BB.begin(), E = BB.end(); I != E; I++) {
948 if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Val&: I))
949 computeNumValueSiteCounts(Ins: Ind);
950 else {
951 if (FirstProfInst == nullptr &&
952 (isa<InstrProfIncrementInst>(Val: I) || isa<InstrProfCoverInst>(Val: I)))
953 FirstProfInst = dyn_cast<InstrProfCntrInstBase>(Val&: I);
954 // If the MCDCBitmapParameters intrinsic seen, create the bitmaps.
955 if (const auto &Params = dyn_cast<InstrProfMCDCBitmapParameters>(Val&: I))
956 static_cast<void>(getOrCreateRegionBitmaps(Inc: Params));
957 }
958 }
959 }
960
961 // Use a profile intrinsic to create the region counters and data variable.
962 // Also create the data variable based on the MCDCParams.
963 if (FirstProfInst != nullptr) {
964 static_cast<void>(getOrCreateRegionCounters(Inc: FirstProfInst));
965 }
966 }
967
968 if (EnableVTableValueProfiling)
969 for (GlobalVariable &GV : M.globals())
970 // Global variables with type metadata are virtual table variables.
971 if (GV.hasMetadata(KindID: LLVMContext::MD_type))
972 getOrCreateVTableProfData(GV: &GV);
973
974 for (Function &F : M)
975 MadeChange |= lowerIntrinsics(F: &F);
976
977 if (CoverageNamesVar) {
978 lowerCoverageData(CoverageNamesVar);
979 MadeChange = true;
980 }
981
982 if (!MadeChange)
983 return false;
984
985 emitVNodes();
986 emitNameData();
987 emitVTableNames();
988
989 // Emit runtime hook for the cases where the target does not unconditionally
990 // require pulling in profile runtime, and coverage is enabled on code that is
991 // not eliminated by the front-end, e.g. unused functions with internal
992 // linkage.
993 if (!NeedsRuntimeHook && ContainsProfiling)
994 emitRuntimeHook();
995
996 emitRegistration();
997 emitUses();
998 emitInitialization();
999 return true;
1000}
1001
1002static FunctionCallee getOrInsertValueProfilingCall(
1003 Module &M, const TargetLibraryInfo &TLI,
1004 ValueProfilingCallType CallType = ValueProfilingCallType::Default) {
1005 LLVMContext &Ctx = M.getContext();
1006 auto *ReturnTy = Type::getVoidTy(C&: M.getContext());
1007
1008 AttributeList AL;
1009 if (auto AK = TLI.getExtAttrForI32Param(Signed: false))
1010 AL = AL.addParamAttribute(C&: M.getContext(), ArgNo: 2, Kind: AK);
1011
1012 assert((CallType == ValueProfilingCallType::Default ||
1013 CallType == ValueProfilingCallType::MemOp) &&
1014 "Must be Default or MemOp");
1015 Type *ParamTypes[] = {
1016#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
1017#include "llvm/ProfileData/InstrProfData.inc"
1018 };
1019 auto *ValueProfilingCallTy =
1020 FunctionType::get(Result: ReturnTy, Params: ArrayRef(ParamTypes), isVarArg: false);
1021 StringRef FuncName = CallType == ValueProfilingCallType::Default
1022 ? getInstrProfValueProfFuncName()
1023 : getInstrProfValueProfMemOpFuncName();
1024 return M.getOrInsertFunction(Name: FuncName, T: ValueProfilingCallTy, AttributeList: AL);
1025}
1026
1027void InstrLowerer::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
1028 GlobalVariable *Name = Ind->getName();
1029 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
1030 uint64_t Index = Ind->getIndex()->getZExtValue();
1031 auto &PD = ProfileDataMap[Name];
1032 PD.NumValueSites[ValueKind] =
1033 std::max(a: PD.NumValueSites[ValueKind], b: (uint32_t)(Index + 1));
1034}
1035
1036void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
1037 // TODO: Value profiling heavily depends on the data section which is omitted
1038 // in lightweight mode. We need to move the value profile pointer to the
1039 // Counter struct to get this working.
1040 assert(
1041 ProfileCorrelate == InstrProfCorrelator::NONE &&
1042 "Value profiling is not yet supported with lightweight instrumentation");
1043 GlobalVariable *Name = Ind->getName();
1044 auto It = ProfileDataMap.find(Val: Name);
1045 assert(It != ProfileDataMap.end() && It->second.DataVar &&
1046 "value profiling detected in function with no counter increment");
1047
1048 GlobalVariable *DataVar = It->second.DataVar;
1049 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
1050 uint64_t Index = Ind->getIndex()->getZExtValue();
1051 for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
1052 Index += It->second.NumValueSites[Kind];
1053
1054 IRBuilder<> Builder(Ind);
1055 bool IsMemOpSize = (Ind->getValueKind()->getZExtValue() ==
1056 llvm::InstrProfValueKind::IPVK_MemOPSize);
1057 CallInst *Call = nullptr;
1058 auto *TLI = &GetTLI(*Ind->getFunction());
1059 auto *NormalizedDataVarPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1060 C: DataVar, Ty: PointerType::get(C&: M.getContext(), AddressSpace: 0));
1061
1062 // To support value profiling calls within Windows exception handlers, funclet
1063 // information contained within operand bundles needs to be copied over to
1064 // the library call. This is required for the IR to be processed by the
1065 // WinEHPrepare pass.
1066 SmallVector<OperandBundleDef, 1> OpBundles;
1067 Ind->getOperandBundlesAsDefs(Defs&: OpBundles);
1068 if (!IsMemOpSize) {
1069 Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
1070 Builder.getInt32(C: Index)};
1071 Call = Builder.CreateCall(Callee: getOrInsertValueProfilingCall(M, TLI: *TLI), Args,
1072 OpBundles);
1073 } else {
1074 Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
1075 Builder.getInt32(C: Index)};
1076 Call = Builder.CreateCall(
1077 Callee: getOrInsertValueProfilingCall(M, TLI: *TLI, CallType: ValueProfilingCallType::MemOp),
1078 Args, OpBundles);
1079 }
1080 if (auto AK = TLI->getExtAttrForI32Param(Signed: false))
1081 Call->addParamAttr(ArgNo: 2, Kind: AK);
1082 Ind->replaceAllUsesWith(V: Call);
1083 Ind->eraseFromParent();
1084}
1085
1086GlobalVariable *InstrLowerer::getOrCreateBiasVar(StringRef VarName) {
1087 GlobalVariable *Bias = M.getGlobalVariable(Name: VarName);
1088 if (Bias)
1089 return Bias;
1090
1091 Type *Int64Ty = Type::getInt64Ty(C&: M.getContext());
1092
1093 // Compiler must define this variable when runtime counter relocation
1094 // is being used. Runtime has a weak external reference that is used
1095 // to check whether that's the case or not.
1096 Bias = new GlobalVariable(M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
1097 Constant::getNullValue(Ty: Int64Ty), VarName);
1098 Bias->setVisibility(GlobalVariable::HiddenVisibility);
1099 // A definition that's weak (linkonce_odr) without being in a COMDAT
1100 // section wouldn't lead to link errors, but it would lead to a dead
1101 // data word from every TU but one. Putting it in COMDAT ensures there
1102 // will be exactly one data slot in the link.
1103 if (TT.supportsCOMDAT())
1104 Bias->setComdat(M.getOrInsertComdat(Name: VarName));
1105
1106 return Bias;
1107}
1108
1109Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) {
1110 auto *Counters = getOrCreateRegionCounters(Inc: I);
1111 IRBuilder<> Builder(I);
1112
1113 if (isa<InstrProfTimestampInst>(Val: I))
1114 Counters->setAlignment(Align(8));
1115
1116 auto *Addr = Builder.CreateConstInBoundsGEP2_32(
1117 Ty: Counters->getValueType(), Ptr: Counters, Idx0: 0, Idx1: I->getIndex()->getZExtValue());
1118
1119 if (!isRuntimeCounterRelocationEnabled())
1120 return Addr;
1121
1122 Type *Int64Ty = Type::getInt64Ty(C&: M.getContext());
1123 Function *Fn = I->getParent()->getParent();
1124 LoadInst *&BiasLI = FunctionToProfileBiasMap[Fn];
1125 if (!BiasLI) {
1126 IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());
1127 auto *Bias = getOrCreateBiasVar(VarName: getInstrProfCounterBiasVarName());
1128 BiasLI = EntryBuilder.CreateLoad(Ty: Int64Ty, Ptr: Bias, Name: "profc_bias");
1129 // Bias doesn't change after startup.
1130 BiasLI->setMetadata(KindID: LLVMContext::MD_invariant_load,
1131 Node: MDNode::get(Context&: M.getContext(), MDs: {}));
1132 }
1133 auto *Add = Builder.CreateAdd(LHS: Builder.CreatePtrToInt(V: Addr, DestTy: Int64Ty), RHS: BiasLI);
1134 return Builder.CreateIntToPtr(V: Add, DestTy: Addr->getType());
1135}
1136
1137Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) {
1138 auto *Bitmaps = getOrCreateRegionBitmaps(Inc: I);
1139 if (!isRuntimeCounterRelocationEnabled())
1140 return Bitmaps;
1141
1142 // Put BiasLI onto the entry block.
1143 Type *Int64Ty = Type::getInt64Ty(C&: M.getContext());
1144 Function *Fn = I->getFunction();
1145 IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());
1146 auto *Bias = getOrCreateBiasVar(VarName: getInstrProfBitmapBiasVarName());
1147 auto *BiasLI = EntryBuilder.CreateLoad(Ty: Int64Ty, Ptr: Bias, Name: "profbm_bias");
1148 // Assume BiasLI invariant (in the function at least)
1149 BiasLI->setMetadata(KindID: LLVMContext::MD_invariant_load,
1150 Node: MDNode::get(Context&: M.getContext(), MDs: {}));
1151
1152 // Add Bias to Bitmaps and put it before the intrinsic.
1153 IRBuilder<> Builder(I);
1154 return Builder.CreatePtrAdd(Ptr: Bitmaps, Offset: BiasLI, Name: "profbm_addr");
1155}
1156
1157void InstrLowerer::lowerCover(InstrProfCoverInst *CoverInstruction) {
1158 auto *Addr = getCounterAddress(I: CoverInstruction);
1159 IRBuilder<> Builder(CoverInstruction);
1160 if (ConditionalCounterUpdate) {
1161 Instruction *SplitBefore = CoverInstruction->getNextNode();
1162 auto &Ctx = CoverInstruction->getParent()->getContext();
1163 auto *Int8Ty = llvm::Type::getInt8Ty(C&: Ctx);
1164 Value *Load = Builder.CreateLoad(Ty: Int8Ty, Ptr: Addr, Name: "pgocount");
1165 Value *Cmp = Builder.CreateIsNotNull(Arg: Load, Name: "pgocount.ifnonzero");
1166 Instruction *ThenBranch =
1167 SplitBlockAndInsertIfThen(Cond: Cmp, SplitBefore, Unreachable: false);
1168 Builder.SetInsertPoint(ThenBranch);
1169 }
1170
1171 // We store zero to represent that this block is covered.
1172 Builder.CreateStore(Val: Builder.getInt8(C: 0), Ptr: Addr);
1173 CoverInstruction->eraseFromParent();
1174}
1175
1176void InstrLowerer::lowerTimestamp(
1177 InstrProfTimestampInst *TimestampInstruction) {
1178 assert(TimestampInstruction->getIndex()->isNullValue() &&
1179 "timestamp probes are always the first probe for a function");
1180 auto &Ctx = M.getContext();
1181 auto *TimestampAddr = getCounterAddress(I: TimestampInstruction);
1182 IRBuilder<> Builder(TimestampInstruction);
1183 auto *CalleeTy =
1184 FunctionType::get(Result: Type::getVoidTy(C&: Ctx), Params: TimestampAddr->getType(), isVarArg: false);
1185 auto Callee = M.getOrInsertFunction(
1186 INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SET_TIMESTAMP), T: CalleeTy);
1187 Builder.CreateCall(Callee, Args: {TimestampAddr});
1188 TimestampInstruction->eraseFromParent();
1189}
1190
1191void InstrLowerer::lowerIncrement(InstrProfIncrementInst *Inc) {
1192 auto *Addr = getCounterAddress(I: Inc);
1193
1194 IRBuilder<> Builder(Inc);
1195 if (Options.Atomic || AtomicCounterUpdateAll ||
1196 (Inc->getIndex()->isNullValue() && AtomicFirstCounter)) {
1197 Builder.CreateAtomicRMW(Op: AtomicRMWInst::Add, Ptr: Addr, Val: Inc->getStep(),
1198 Align: MaybeAlign(), Ordering: AtomicOrdering::Monotonic);
1199 } else {
1200 Value *IncStep = Inc->getStep();
1201 Value *Load = Builder.CreateLoad(Ty: IncStep->getType(), Ptr: Addr, Name: "pgocount");
1202 auto *Count = Builder.CreateAdd(LHS: Load, RHS: Inc->getStep());
1203 auto *Store = Builder.CreateStore(Val: Count, Ptr: Addr);
1204 if (isCounterPromotionEnabled())
1205 PromotionCandidates.emplace_back(args: cast<Instruction>(Val: Load), args&: Store);
1206 }
1207 Inc->eraseFromParent();
1208}
1209
1210void InstrLowerer::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
1211 ConstantArray *Names =
1212 cast<ConstantArray>(Val: CoverageNamesVar->getInitializer());
1213 for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
1214 Constant *NC = Names->getOperand(i_nocapture: I);
1215 Value *V = NC->stripPointerCasts();
1216 assert(isa<GlobalVariable>(V) && "Missing reference to function name");
1217 GlobalVariable *Name = cast<GlobalVariable>(Val: V);
1218
1219 Name->setLinkage(GlobalValue::PrivateLinkage);
1220 ReferencedNames.push_back(x: Name);
1221 if (isa<ConstantExpr>(Val: NC))
1222 NC->dropAllReferences();
1223 }
1224 CoverageNamesVar->eraseFromParent();
1225}
1226
1227void InstrLowerer::lowerMCDCTestVectorBitmapUpdate(
1228 InstrProfMCDCTVBitmapUpdate *Update) {
1229 auto &Ctx = M.getContext();
1230 IRBuilder<> Builder(Update);
1231 auto *Int8Ty = Type::getInt8Ty(C&: Ctx);
1232 auto *Int32Ty = Type::getInt32Ty(C&: Ctx);
1233 auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr();
1234 auto *BitmapAddr = getBitmapAddress(I: Update);
1235
1236 // Load Temp Val + BitmapIdx.
1237 // %mcdc.temp = load i32, ptr %mcdc.addr, align 4
1238 auto *Temp = Builder.CreateAdd(
1239 LHS: Builder.CreateLoad(Ty: Int32Ty, Ptr: MCDCCondBitmapAddr, Name: "mcdc.temp"),
1240 RHS: Update->getBitmapIndex());
1241
1242 // Calculate byte offset using div8.
1243 // %1 = lshr i32 %mcdc.temp, 3
1244 auto *BitmapByteOffset = Builder.CreateLShr(LHS: Temp, RHS: 0x3);
1245
1246 // Add byte offset to section base byte address.
1247 // %4 = getelementptr inbounds i8, ptr @__profbm_test, i32 %1
1248 auto *BitmapByteAddr =
1249 Builder.CreateInBoundsPtrAdd(Ptr: BitmapAddr, Offset: BitmapByteOffset);
1250
1251 // Calculate bit offset into bitmap byte by using div8 remainder (AND ~8)
1252 // %5 = and i32 %mcdc.temp, 7
1253 // %6 = trunc i32 %5 to i8
1254 auto *BitToSet = Builder.CreateTrunc(V: Builder.CreateAnd(LHS: Temp, RHS: 0x7), DestTy: Int8Ty);
1255
1256 // Shift bit offset left to form a bitmap.
1257 // %7 = shl i8 1, %6
1258 auto *ShiftedVal = Builder.CreateShl(LHS: Builder.getInt8(C: 0x1), RHS: BitToSet);
1259
1260 // Load profile bitmap byte.
1261 // %mcdc.bits = load i8, ptr %4, align 1
1262 auto *Bitmap = Builder.CreateLoad(Ty: Int8Ty, Ptr: BitmapByteAddr, Name: "mcdc.bits");
1263
1264 if (Options.Atomic || AtomicCounterUpdateAll) {
1265 // If ((Bitmap & Val) != Val), then execute atomic (Bitmap |= Val).
1266 // Note, just-loaded Bitmap might not be up-to-date. Use it just for
1267 // early testing.
1268 auto *Masked = Builder.CreateAnd(LHS: Bitmap, RHS: ShiftedVal);
1269 auto *ShouldStore = Builder.CreateICmpNE(LHS: Masked, RHS: ShiftedVal);
1270
1271 // Assume updating will be rare.
1272 auto *Unlikely = MDBuilder(Ctx).createUnlikelyBranchWeights();
1273 Instruction *ThenBranch =
1274 SplitBlockAndInsertIfThen(Cond: ShouldStore, SplitBefore: Update, Unreachable: false, BranchWeights: Unlikely);
1275
1276 // Execute if (unlikely(ShouldStore)).
1277 Builder.SetInsertPoint(ThenBranch);
1278 Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: BitmapByteAddr, Val: ShiftedVal,
1279 Align: MaybeAlign(), Ordering: AtomicOrdering::Monotonic);
1280 } else {
1281 // Perform logical OR of profile bitmap byte and shifted bit offset.
1282 // %8 = or i8 %mcdc.bits, %7
1283 auto *Result = Builder.CreateOr(LHS: Bitmap, RHS: ShiftedVal);
1284
1285 // Store the updated profile bitmap byte.
1286 // store i8 %8, ptr %3, align 1
1287 Builder.CreateStore(Val: Result, Ptr: BitmapByteAddr);
1288 }
1289
1290 Update->eraseFromParent();
1291}
1292
1293/// Get the name of a profiling variable for a particular function.
1294static std::string getVarName(InstrProfInstBase *Inc, StringRef Prefix,
1295 bool &Renamed) {
1296 StringRef NamePrefix = getInstrProfNameVarPrefix();
1297 StringRef Name = Inc->getName()->getName().substr(Start: NamePrefix.size());
1298 Function *F = Inc->getParent()->getParent();
1299 Module *M = F->getParent();
1300 if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
1301 !canRenameComdatFunc(F: *F)) {
1302 Renamed = false;
1303 return (Prefix + Name).str();
1304 }
1305 Renamed = true;
1306 uint64_t FuncHash = Inc->getHash()->getZExtValue();
1307 SmallVector<char, 24> HashPostfix;
1308 if (Name.ends_with(Suffix: (Twine(".") + Twine(FuncHash)).toStringRef(Out&: HashPostfix)))
1309 return (Prefix + Name).str();
1310 return (Prefix + Name + "." + Twine(FuncHash)).str();
1311}
1312
1313static inline bool shouldRecordFunctionAddr(Function *F) {
1314 // Only record function addresses if IR PGO is enabled or if clang value
1315 // profiling is enabled. Recording function addresses greatly increases object
1316 // file size, because it prevents the inliner from deleting functions that
1317 // have been inlined everywhere.
1318 if (!profDataReferencedByCode(M: *F->getParent()))
1319 return false;
1320
1321 // Check the linkage
1322 bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
1323 if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
1324 !HasAvailableExternallyLinkage)
1325 return true;
1326
1327 // A function marked 'alwaysinline' with available_externally linkage can't
1328 // have its address taken. Doing so would create an undefined external ref to
1329 // the function, which would fail to link.
1330 if (HasAvailableExternallyLinkage &&
1331 F->hasFnAttribute(Kind: Attribute::AlwaysInline))
1332 return false;
1333
1334 // Prohibit function address recording if the function is both internal and
1335 // COMDAT. This avoids the profile data variable referencing internal symbols
1336 // in COMDAT.
1337 if (F->hasLocalLinkage() && F->hasComdat())
1338 return false;
1339
1340 // Check uses of this function for other than direct calls or invokes to it.
1341 // Inline virtual functions have linkeOnceODR linkage. When a key method
1342 // exists, the vtable will only be emitted in the TU where the key method
1343 // is defined. In a TU where vtable is not available, the function won't
1344 // be 'addresstaken'. If its address is not recorded here, the profile data
1345 // with missing address may be picked by the linker leading to missing
1346 // indirect call target info.
1347 return F->hasAddressTaken() || F->hasLinkOnceLinkage();
1348}
1349
1350static inline bool shouldUsePublicSymbol(Function *Fn) {
1351 // It isn't legal to make an alias of this function at all
1352 if (Fn->isDeclarationForLinker())
1353 return true;
1354
1355 // Symbols with local linkage can just use the symbol directly without
1356 // introducing relocations
1357 if (Fn->hasLocalLinkage())
1358 return true;
1359
1360 // PGO + ThinLTO + CFI cause duplicate symbols to be introduced due to some
1361 // unfavorable interaction between the new alias and the alias renaming done
1362 // in LowerTypeTests under ThinLTO. For comdat functions that would normally
1363 // be deduplicated, but the renaming scheme ends up preventing renaming, since
1364 // it creates unique names for each alias, resulting in duplicated symbols. In
1365 // the future, we should update the CFI related passes to migrate these
1366 // aliases to the same module as the jump-table they refer to will be defined.
1367 if (Fn->hasMetadata(KindID: LLVMContext::MD_type))
1368 return true;
1369
1370 // For comdat functions, an alias would need the same linkage as the original
1371 // function and hidden visibility. There is no point in adding an alias with
1372 // identical linkage an visibility to avoid introducing symbolic relocations.
1373 if (Fn->hasComdat() &&
1374 (Fn->getVisibility() == GlobalValue::VisibilityTypes::HiddenVisibility))
1375 return true;
1376
1377 // its OK to use an alias
1378 return false;
1379}
1380
1381static inline Constant *getFuncAddrForProfData(Function *Fn) {
1382 auto *Int8PtrTy = PointerType::getUnqual(C&: Fn->getContext());
1383 // Store a nullptr in __llvm_profd, if we shouldn't use a real address
1384 if (!shouldRecordFunctionAddr(F: Fn))
1385 return ConstantPointerNull::get(T: Int8PtrTy);
1386
1387 // If we can't use an alias, we must use the public symbol, even though this
1388 // may require a symbolic relocation.
1389 if (shouldUsePublicSymbol(Fn))
1390 return Fn;
1391
1392 // When possible use a private alias to avoid symbolic relocations.
1393 auto *GA = GlobalAlias::create(Linkage: GlobalValue::LinkageTypes::PrivateLinkage,
1394 Name: Fn->getName() + ".local", Aliasee: Fn);
1395
1396 // When the instrumented function is a COMDAT function, we cannot use a
1397 // private alias. If we did, we would create reference to a local label in
1398 // this function's section. If this version of the function isn't selected by
1399 // the linker, then the metadata would introduce a reference to a discarded
1400 // section. So, for COMDAT functions, we need to adjust the linkage of the
1401 // alias. Using hidden visibility avoids a dynamic relocation and an entry in
1402 // the dynamic symbol table.
1403 //
1404 // Note that this handles COMDAT functions with visibility other than Hidden,
1405 // since that case is covered in shouldUsePublicSymbol()
1406 if (Fn->hasComdat()) {
1407 GA->setLinkage(Fn->getLinkage());
1408 GA->setVisibility(GlobalValue::VisibilityTypes::HiddenVisibility);
1409 }
1410
1411 // appendToCompilerUsed(*Fn->getParent(), {GA});
1412
1413 return GA;
1414}
1415
1416static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
1417 // compiler-rt uses linker support to get data/counters/name start/end for
1418 // ELF, COFF, Mach-O, XCOFF, and Wasm.
1419 if (TT.isOSBinFormatELF() || TT.isOSBinFormatCOFF() ||
1420 TT.isOSBinFormatMachO() || TT.isOSBinFormatXCOFF() ||
1421 TT.isOSBinFormatWasm())
1422 return false;
1423
1424 return true;
1425}
1426
1427void InstrLowerer::maybeSetComdat(GlobalVariable *GV, GlobalObject *GO,
1428 StringRef CounterGroupName) {
1429 // Place lowered global variables in a comdat group if the associated function
1430 // or global variable is a COMDAT. This will make sure that only one copy of
1431 // global variable (e.g. function counters) of the COMDAT function will be
1432 // emitted after linking.
1433 bool NeedComdat = needsComdatForCounter(GV: *GO, M);
1434 bool UseComdat = (NeedComdat || TT.isOSBinFormatELF());
1435
1436 if (!UseComdat)
1437 return;
1438
1439 // Keep in mind that this pass may run before the inliner, so we need to
1440 // create a new comdat group (for counters, profiling data, etc). If we use
1441 // the comdat of the parent function, that will result in relocations against
1442 // discarded sections.
1443 //
1444 // If the data variable is referenced by code, non-counter variables (notably
1445 // profiling data) and counters have to be in different comdats for COFF
1446 // because the Visual C++ linker will report duplicate symbol errors if there
1447 // are multiple external symbols with the same name marked
1448 // IMAGE_COMDAT_SELECT_ASSOCIATIVE.
1449 StringRef GroupName = TT.isOSBinFormatCOFF() && DataReferencedByCode
1450 ? GV->getName()
1451 : CounterGroupName;
1452 Comdat *C = M.getOrInsertComdat(Name: GroupName);
1453
1454 if (!NeedComdat) {
1455 // Object file format must be ELF since `UseComdat && !NeedComdat` is true.
1456 //
1457 // For ELF, when not using COMDAT, put counters, data and values into a
1458 // nodeduplicate COMDAT which is lowered to a zero-flag section group. This
1459 // allows -z start-stop-gc to discard the entire group when the function is
1460 // discarded.
1461 C->setSelectionKind(Comdat::NoDeduplicate);
1462 }
1463 GV->setComdat(C);
1464 // COFF doesn't allow the comdat group leader to have private linkage, so
1465 // upgrade private linkage to internal linkage to produce a symbol table
1466 // entry.
1467 if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage())
1468 GV->setLinkage(GlobalValue::InternalLinkage);
1469}
1470
1471static inline bool shouldRecordVTableAddr(GlobalVariable *GV) {
1472 if (!profDataReferencedByCode(M: *GV->getParent()))
1473 return false;
1474
1475 if (!GV->hasLinkOnceLinkage() && !GV->hasLocalLinkage() &&
1476 !GV->hasAvailableExternallyLinkage())
1477 return true;
1478
1479 // This avoids the profile data from referencing internal symbols in
1480 // COMDAT.
1481 if (GV->hasLocalLinkage() && GV->hasComdat())
1482 return false;
1483
1484 return true;
1485}
1486
1487// FIXME: Introduce an internal alias like what's done for functions to reduce
1488// the number of relocation entries.
1489static inline Constant *getVTableAddrForProfData(GlobalVariable *GV) {
1490 // Store a nullptr in __profvt_ if a real address shouldn't be used.
1491 if (!shouldRecordVTableAddr(GV))
1492 return ConstantPointerNull::get(T: PointerType::getUnqual(C&: GV->getContext()));
1493
1494 return GV;
1495}
1496
1497void InstrLowerer::getOrCreateVTableProfData(GlobalVariable *GV) {
1498 assert(ProfileCorrelate != InstrProfCorrelator::DEBUG_INFO &&
1499 "Value profiling is not supported with lightweight instrumentation");
1500 if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
1501 return;
1502
1503 // Skip llvm internal global variable or __prof variables.
1504 if (GV->getName().starts_with(Prefix: "llvm.") ||
1505 GV->getName().starts_with(Prefix: "__llvm") ||
1506 GV->getName().starts_with(Prefix: "__prof"))
1507 return;
1508
1509 // VTableProfData already created
1510 auto It = VTableDataMap.find(Val: GV);
1511 if (It != VTableDataMap.end() && It->second)
1512 return;
1513
1514 GlobalValue::LinkageTypes Linkage = GV->getLinkage();
1515 GlobalValue::VisibilityTypes Visibility = GV->getVisibility();
1516
1517 // This is to keep consistent with per-function profile data
1518 // for correctness.
1519 if (TT.isOSBinFormatXCOFF()) {
1520 Linkage = GlobalValue::InternalLinkage;
1521 Visibility = GlobalValue::DefaultVisibility;
1522 }
1523
1524 LLVMContext &Ctx = M.getContext();
1525 Type *DataTypes[] = {
1526#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) LLVMType,
1527#include "llvm/ProfileData/InstrProfData.inc"
1528#undef INSTR_PROF_VTABLE_DATA
1529 };
1530
1531 auto *DataTy = StructType::get(Context&: Ctx, Elements: ArrayRef(DataTypes));
1532
1533 // Used by INSTR_PROF_VTABLE_DATA MACRO
1534 Constant *VTableAddr = getVTableAddrForProfData(GV);
1535 const std::string PGOVTableName = getPGOName(V: *GV);
1536 // Record the length of the vtable. This is needed since vtable pointers
1537 // loaded from C++ objects might be from the middle of a vtable definition.
1538 uint32_t VTableSizeVal = GV->getGlobalSize(DL: M.getDataLayout());
1539
1540 Constant *DataVals[] = {
1541#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) Init,
1542#include "llvm/ProfileData/InstrProfData.inc"
1543#undef INSTR_PROF_VTABLE_DATA
1544 };
1545
1546 auto *Data =
1547 new GlobalVariable(M, DataTy, /*constant=*/false, Linkage,
1548 ConstantStruct::get(T: DataTy, V: DataVals),
1549 getInstrProfVTableVarPrefix() + PGOVTableName);
1550
1551 Data->setVisibility(Visibility);
1552 Data->setSection(getInstrProfSectionName(IPSK: IPSK_vtab, OF: TT.getObjectFormat()));
1553 Data->setAlignment(Align(8));
1554
1555 maybeSetComdat(GV: Data, GO: GV, CounterGroupName: Data->getName());
1556
1557 VTableDataMap[GV] = Data;
1558
1559 ReferencedVTables.push_back(x: GV);
1560
1561 // VTable <Hash, Addr> is used by runtime but not referenced by other
1562 // sections. Conservatively mark it linker retained.
1563 UsedVars.push_back(x: Data);
1564}
1565
1566GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc,
1567 InstrProfSectKind IPSK) {
1568 GlobalVariable *NamePtr = Inc->getName();
1569
1570 // Match the linkage and visibility of the name global.
1571 Function *Fn = Inc->getParent()->getParent();
1572 GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
1573 GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
1574
1575 // Use internal rather than private linkage so the counter variable shows up
1576 // in the symbol table when using debug info for correlation.
1577 if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO &&
1578 TT.isOSBinFormatMachO() && Linkage == GlobalValue::PrivateLinkage)
1579 Linkage = GlobalValue::InternalLinkage;
1580
1581 // Due to the limitation of binder as of 2021/09/28, the duplicate weak
1582 // symbols in the same csect won't be discarded. When there are duplicate weak
1583 // symbols, we can NOT guarantee that the relocations get resolved to the
1584 // intended weak symbol, so we can not ensure the correctness of the relative
1585 // CounterPtr, so we have to use private linkage for counter and data symbols.
1586 if (TT.isOSBinFormatXCOFF()) {
1587 Linkage = GlobalValue::PrivateLinkage;
1588 Visibility = GlobalValue::DefaultVisibility;
1589 }
1590 // Move the name variable to the right section.
1591 bool Renamed;
1592 GlobalVariable *Ptr;
1593 StringRef VarPrefix;
1594 std::string VarName;
1595 if (IPSK == IPSK_cnts) {
1596 VarPrefix = getInstrProfCountersVarPrefix();
1597 VarName = getVarName(Inc, Prefix: VarPrefix, Renamed);
1598 InstrProfCntrInstBase *CntrIncrement = dyn_cast<InstrProfCntrInstBase>(Val: Inc);
1599 Ptr = createRegionCounters(Inc: CntrIncrement, Name: VarName, Linkage);
1600 } else if (IPSK == IPSK_bitmap) {
1601 VarPrefix = getInstrProfBitmapVarPrefix();
1602 VarName = getVarName(Inc, Prefix: VarPrefix, Renamed);
1603 InstrProfMCDCBitmapInstBase *BitmapUpdate =
1604 dyn_cast<InstrProfMCDCBitmapInstBase>(Val: Inc);
1605 Ptr = createRegionBitmaps(Inc: BitmapUpdate, Name: VarName, Linkage);
1606 } else {
1607 llvm_unreachable("Profile Section must be for Counters or Bitmaps");
1608 }
1609
1610 Ptr->setVisibility(Visibility);
1611 // Put the counters and bitmaps in their own sections so linkers can
1612 // remove unneeded sections.
1613 Ptr->setSection(getInstrProfSectionName(IPSK, OF: TT.getObjectFormat()));
1614 Ptr->setLinkage(Linkage);
1615 maybeSetComdat(GV: Ptr, GO: Fn, CounterGroupName: VarName);
1616 return Ptr;
1617}
1618
1619GlobalVariable *
1620InstrLowerer::createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
1621 StringRef Name,
1622 GlobalValue::LinkageTypes Linkage) {
1623 uint64_t NumBytes = Inc->getNumBitmapBytes();
1624 auto *BitmapTy = ArrayType::get(ElementType: Type::getInt8Ty(C&: M.getContext()), NumElements: NumBytes);
1625 auto GV = new GlobalVariable(M, BitmapTy, false, Linkage,
1626 Constant::getNullValue(Ty: BitmapTy), Name);
1627 GV->setAlignment(Align(1));
1628 return GV;
1629}
1630
1631GlobalVariable *
1632InstrLowerer::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) {
1633 GlobalVariable *NamePtr = Inc->getName();
1634 auto &PD = ProfileDataMap[NamePtr];
1635 if (PD.RegionBitmaps)
1636 return PD.RegionBitmaps;
1637
1638 // If RegionBitmaps doesn't already exist, create it by first setting up
1639 // the corresponding profile section.
1640 auto *BitmapPtr = setupProfileSection(Inc, IPSK: IPSK_bitmap);
1641 PD.RegionBitmaps = BitmapPtr;
1642 PD.NumBitmapBytes = Inc->getNumBitmapBytes();
1643 return PD.RegionBitmaps;
1644}
1645
1646GlobalVariable *
1647InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name,
1648 GlobalValue::LinkageTypes Linkage) {
1649 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
1650 auto &Ctx = M.getContext();
1651 GlobalVariable *GV;
1652 if (isa<InstrProfCoverInst>(Val: Inc)) {
1653 auto *CounterTy = Type::getInt8Ty(C&: Ctx);
1654 auto *CounterArrTy = ArrayType::get(ElementType: CounterTy, NumElements: NumCounters);
1655 // TODO: `Constant::getAllOnesValue()` does not yet accept an array type.
1656 std::vector<Constant *> InitialValues(NumCounters,
1657 Constant::getAllOnesValue(Ty: CounterTy));
1658 GV = new GlobalVariable(M, CounterArrTy, false, Linkage,
1659 ConstantArray::get(T: CounterArrTy, V: InitialValues),
1660 Name);
1661 GV->setAlignment(Align(1));
1662 } else {
1663 auto *CounterTy = ArrayType::get(ElementType: Type::getInt64Ty(C&: Ctx), NumElements: NumCounters);
1664 GV = new GlobalVariable(M, CounterTy, false, Linkage,
1665 Constant::getNullValue(Ty: CounterTy), Name);
1666 GV->setAlignment(Align(8));
1667 }
1668 return GV;
1669}
1670
1671GlobalVariable *
1672InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) {
1673 GlobalVariable *NamePtr = Inc->getName();
1674 auto &PD = ProfileDataMap[NamePtr];
1675 if (PD.RegionCounters)
1676 return PD.RegionCounters;
1677
1678 // If RegionCounters doesn't already exist, create it by first setting up
1679 // the corresponding profile section.
1680 auto *CounterPtr = setupProfileSection(Inc, IPSK: IPSK_cnts);
1681 PD.RegionCounters = CounterPtr;
1682
1683 if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) {
1684 LLVMContext &Ctx = M.getContext();
1685 Function *Fn = Inc->getParent()->getParent();
1686 if (auto *SP = Fn->getSubprogram()) {
1687 DIBuilder DB(M, true, SP->getUnit());
1688 Metadata *FunctionNameAnnotation[] = {
1689 MDString::get(Context&: Ctx, Str: InstrProfCorrelator::FunctionNameAttributeName),
1690 MDString::get(Context&: Ctx, Str: getPGOFuncNameVarInitializer(NameVar: NamePtr)),
1691 };
1692 Metadata *CFGHashAnnotation[] = {
1693 MDString::get(Context&: Ctx, Str: InstrProfCorrelator::CFGHashAttributeName),
1694 ConstantAsMetadata::get(C: Inc->getHash()),
1695 };
1696 Metadata *NumCountersAnnotation[] = {
1697 MDString::get(Context&: Ctx, Str: InstrProfCorrelator::NumCountersAttributeName),
1698 ConstantAsMetadata::get(C: Inc->getNumCounters()),
1699 };
1700 auto Annotations = DB.getOrCreateArray(Elements: {
1701 MDNode::get(Context&: Ctx, MDs: FunctionNameAnnotation),
1702 MDNode::get(Context&: Ctx, MDs: CFGHashAnnotation),
1703 MDNode::get(Context&: Ctx, MDs: NumCountersAnnotation),
1704 });
1705 auto *DICounter = DB.createGlobalVariableExpression(
1706 Context: SP, Name: CounterPtr->getName(), /*LinkageName=*/StringRef(), File: SP->getFile(),
1707 /*LineNo=*/0, Ty: DB.createUnspecifiedType(Name: "Profile Data Type"),
1708 IsLocalToUnit: CounterPtr->hasLocalLinkage(), /*IsDefined=*/isDefined: true, /*Expr=*/nullptr,
1709 /*Decl=*/nullptr, /*TemplateParams=*/nullptr, /*AlignInBits=*/0,
1710 Annotations);
1711 CounterPtr->addDebugInfo(GV: DICounter);
1712 DB.finalize();
1713 }
1714
1715 // Mark the counter variable as used so that it isn't optimized out.
1716 CompilerUsedVars.push_back(x: PD.RegionCounters);
1717 }
1718
1719 // Create the data variable (if it doesn't already exist).
1720 createDataVariable(Inc);
1721
1722 return PD.RegionCounters;
1723}
1724
1725void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
1726 // When debug information is correlated to profile data, a data variable
1727 // is not needed.
1728 if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
1729 return;
1730
1731 GlobalVariable *NamePtr = Inc->getName();
1732 auto &PD = ProfileDataMap[NamePtr];
1733
1734 // Return if data variable was already created.
1735 if (PD.DataVar)
1736 return;
1737
1738 LLVMContext &Ctx = M.getContext();
1739
1740 Function *Fn = Inc->getParent()->getParent();
1741 GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
1742 GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
1743
1744 // Due to the limitation of binder as of 2021/09/28, the duplicate weak
1745 // symbols in the same csect won't be discarded. When there are duplicate weak
1746 // symbols, we can NOT guarantee that the relocations get resolved to the
1747 // intended weak symbol, so we can not ensure the correctness of the relative
1748 // CounterPtr, so we have to use private linkage for counter and data symbols.
1749 if (TT.isOSBinFormatXCOFF()) {
1750 Linkage = GlobalValue::PrivateLinkage;
1751 Visibility = GlobalValue::DefaultVisibility;
1752 }
1753
1754 bool NeedComdat = needsComdatForCounter(GV: *Fn, M);
1755 bool Renamed;
1756
1757 // The Data Variable section is anchored to profile counters.
1758 std::string CntsVarName =
1759 getVarName(Inc, Prefix: getInstrProfCountersVarPrefix(), Renamed);
1760 std::string DataVarName =
1761 getVarName(Inc, Prefix: getInstrProfDataVarPrefix(), Renamed);
1762
1763 auto *Int8PtrTy = PointerType::getUnqual(C&: Ctx);
1764 // Allocate statically the array of pointers to value profile nodes for
1765 // the current function.
1766 Constant *ValuesPtrExpr = ConstantPointerNull::get(T: Int8PtrTy);
1767 uint64_t NS = 0;
1768 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1769 NS += PD.NumValueSites[Kind];
1770 if (NS > 0 && ValueProfileStaticAlloc &&
1771 !needsRuntimeRegistrationOfSectionRange(TT)) {
1772 ArrayType *ValuesTy = ArrayType::get(ElementType: Type::getInt64Ty(C&: Ctx), NumElements: NS);
1773 auto *ValuesVar = new GlobalVariable(
1774 M, ValuesTy, false, Linkage, Constant::getNullValue(Ty: ValuesTy),
1775 getVarName(Inc, Prefix: getInstrProfValuesVarPrefix(), Renamed));
1776 ValuesVar->setVisibility(Visibility);
1777 setGlobalVariableLargeSection(TargetTriple: TT, GV&: *ValuesVar);
1778 ValuesVar->setSection(
1779 getInstrProfSectionName(IPSK: IPSK_vals, OF: TT.getObjectFormat()));
1780 ValuesVar->setAlignment(Align(8));
1781 maybeSetComdat(GV: ValuesVar, GO: Fn, CounterGroupName: CntsVarName);
1782 ValuesPtrExpr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1783 C: ValuesVar, Ty: PointerType::get(C&: Fn->getContext(), AddressSpace: 0));
1784 }
1785
1786 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
1787 auto *CounterPtr = PD.RegionCounters;
1788
1789 uint64_t NumBitmapBytes = PD.NumBitmapBytes;
1790
1791 // Create data variable.
1792 auto *IntPtrTy = M.getDataLayout().getIntPtrType(C&: M.getContext());
1793 auto *Int16Ty = Type::getInt16Ty(C&: Ctx);
1794 auto *Int16ArrayTy = ArrayType::get(ElementType: Int16Ty, NumElements: IPVK_Last + 1);
1795 Type *DataTypes[] = {
1796#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
1797#include "llvm/ProfileData/InstrProfData.inc"
1798 };
1799 auto *DataTy = StructType::get(Context&: Ctx, Elements: ArrayRef(DataTypes));
1800
1801 Constant *FunctionAddr = getFuncAddrForProfData(Fn);
1802
1803 Constant *Int16ArrayVals[IPVK_Last + 1];
1804 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1805 Int16ArrayVals[Kind] = ConstantInt::get(Ty: Int16Ty, V: PD.NumValueSites[Kind]);
1806
1807 if (isGPUProfTarget(M)) {
1808 Linkage = GlobalValue::ExternalLinkage;
1809 Visibility = GlobalValue::ProtectedVisibility;
1810 }
1811 // If the data variable is not referenced by code (if we don't emit
1812 // @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
1813 // data variable live under linker GC, the data variable can be private. This
1814 // optimization applies to ELF.
1815 //
1816 // On COFF, a comdat leader cannot be local so we require DataReferencedByCode
1817 // to be false.
1818 //
1819 // If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees
1820 // that other copies must have the same CFG and cannot have value profiling.
1821 // If no hash suffix, other profd copies may be referenced by code.
1822 else if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
1823 (TT.isOSBinFormatELF() ||
1824 (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
1825 Linkage = GlobalValue::PrivateLinkage;
1826 Visibility = GlobalValue::DefaultVisibility;
1827 }
1828 auto *Data =
1829 new GlobalVariable(M, DataTy, false, Linkage, nullptr, DataVarName);
1830 Constant *RelativeCounterPtr;
1831 GlobalVariable *BitmapPtr = PD.RegionBitmaps;
1832 Constant *RelativeBitmapPtr = ConstantInt::get(Ty: IntPtrTy, V: 0);
1833 InstrProfSectKind DataSectionKind;
1834 // With binary profile correlation, profile data is not loaded into memory.
1835 // profile data must reference profile counter with an absolute relocation.
1836 if (ProfileCorrelate == InstrProfCorrelator::BINARY) {
1837 DataSectionKind = IPSK_covdata;
1838 RelativeCounterPtr = ConstantExpr::getPtrToInt(C: CounterPtr, Ty: IntPtrTy);
1839 if (BitmapPtr != nullptr)
1840 RelativeBitmapPtr = ConstantExpr::getPtrToInt(C: BitmapPtr, Ty: IntPtrTy);
1841 } else {
1842 // Reference the counter variable with a label difference (link-time
1843 // constant).
1844 DataSectionKind = IPSK_data;
1845 RelativeCounterPtr =
1846 ConstantExpr::getSub(C1: ConstantExpr::getPtrToInt(C: CounterPtr, Ty: IntPtrTy),
1847 C2: ConstantExpr::getPtrToInt(C: Data, Ty: IntPtrTy));
1848 if (BitmapPtr != nullptr)
1849 RelativeBitmapPtr =
1850 ConstantExpr::getSub(C1: ConstantExpr::getPtrToInt(C: BitmapPtr, Ty: IntPtrTy),
1851 C2: ConstantExpr::getPtrToInt(C: Data, Ty: IntPtrTy));
1852 }
1853
1854 Constant *DataVals[] = {
1855#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
1856#include "llvm/ProfileData/InstrProfData.inc"
1857 };
1858 Data->setInitializer(ConstantStruct::get(T: DataTy, V: DataVals));
1859
1860 Data->setVisibility(Visibility);
1861 Data->setSection(
1862 getInstrProfSectionName(IPSK: DataSectionKind, OF: TT.getObjectFormat()));
1863 Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
1864 maybeSetComdat(GV: Data, GO: Fn, CounterGroupName: CntsVarName);
1865
1866 PD.DataVar = Data;
1867
1868 // Mark the data variable as used so that it isn't stripped out.
1869 CompilerUsedVars.push_back(x: Data);
1870 // Now that the linkage set by the FE has been passed to the data and counter
1871 // variables, reset Name variable's linkage and visibility to private so that
1872 // it can be removed later by the compiler.
1873 NamePtr->setLinkage(GlobalValue::PrivateLinkage);
1874 // Collect the referenced names to be used by emitNameData.
1875 ReferencedNames.push_back(x: NamePtr);
1876}
1877
1878void InstrLowerer::emitVNodes() {
1879 if (!ValueProfileStaticAlloc)
1880 return;
1881
1882 // For now only support this on platforms that do
1883 // not require runtime registration to discover
1884 // named section start/end.
1885 if (needsRuntimeRegistrationOfSectionRange(TT))
1886 return;
1887
1888 size_t TotalNS = 0;
1889 for (auto &PD : ProfileDataMap) {
1890 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1891 TotalNS += PD.second.NumValueSites[Kind];
1892 }
1893
1894 if (!TotalNS)
1895 return;
1896
1897 uint64_t NumCounters = TotalNS * NumCountersPerValueSite;
1898// Heuristic for small programs with very few total value sites.
1899// The default value of vp-counters-per-site is chosen based on
1900// the observation that large apps usually have a low percentage
1901// of value sites that actually have any profile data, and thus
1902// the average number of counters per site is low. For small
1903// apps with very few sites, this may not be true. Bump up the
1904// number of counters in this case.
1905#define INSTR_PROF_MIN_VAL_COUNTS 10
1906 if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)
1907 NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, b: (int)NumCounters * 2);
1908
1909 auto &Ctx = M.getContext();
1910 Type *VNodeTypes[] = {
1911#define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
1912#include "llvm/ProfileData/InstrProfData.inc"
1913 };
1914 auto *VNodeTy = StructType::get(Context&: Ctx, Elements: ArrayRef(VNodeTypes));
1915
1916 ArrayType *VNodesTy = ArrayType::get(ElementType: VNodeTy, NumElements: NumCounters);
1917 auto *VNodesVar = new GlobalVariable(
1918 M, VNodesTy, false, GlobalValue::PrivateLinkage,
1919 Constant::getNullValue(Ty: VNodesTy), getInstrProfVNodesVarName());
1920 setGlobalVariableLargeSection(TargetTriple: TT, GV&: *VNodesVar);
1921 VNodesVar->setSection(
1922 getInstrProfSectionName(IPSK: IPSK_vnodes, OF: TT.getObjectFormat()));
1923 VNodesVar->setAlignment(M.getDataLayout().getABITypeAlign(Ty: VNodesTy));
1924 // VNodesVar is used by runtime but not referenced via relocation by other
1925 // sections. Conservatively make it linker retained.
1926 UsedVars.push_back(x: VNodesVar);
1927}
1928
1929void InstrLowerer::emitNameData() {
1930 if (ReferencedNames.empty())
1931 return;
1932
1933 std::string CompressedNameStr;
1934 if (Error E = collectPGOFuncNameStrings(NameVars: ReferencedNames, Result&: CompressedNameStr,
1935 doCompression: DoInstrProfNameCompression)) {
1936 report_fatal_error(reason: Twine(toString(E: std::move(E))), gen_crash_diag: false);
1937 }
1938
1939 auto &Ctx = M.getContext();
1940 auto *NamesVal =
1941 ConstantDataArray::getString(Context&: Ctx, Initializer: StringRef(CompressedNameStr), AddNull: false);
1942 NamesVar = new GlobalVariable(M, NamesVal->getType(), true,
1943 GlobalValue::PrivateLinkage, NamesVal,
1944 getInstrProfNamesVarName());
1945 if (isGPUProfTarget(M)) {
1946 NamesVar->setLinkage(GlobalValue::ExternalLinkage);
1947 NamesVar->setVisibility(GlobalValue::ProtectedVisibility);
1948 }
1949
1950 NamesSize = CompressedNameStr.size();
1951 setGlobalVariableLargeSection(TargetTriple: TT, GV&: *NamesVar);
1952 NamesVar->setSection(
1953 ProfileCorrelate == InstrProfCorrelator::BINARY
1954 ? getInstrProfSectionName(IPSK: IPSK_covname, OF: TT.getObjectFormat())
1955 : getInstrProfSectionName(IPSK: IPSK_name, OF: TT.getObjectFormat()));
1956 // On COFF, it's important to reduce the alignment down to 1 to prevent the
1957 // linker from inserting padding before the start of the names section or
1958 // between names entries.
1959 NamesVar->setAlignment(Align(1));
1960 // NamesVar is used by runtime but not referenced via relocation by other
1961 // sections. Conservatively make it linker retained.
1962 UsedVars.push_back(x: NamesVar);
1963
1964 for (auto *NamePtr : ReferencedNames)
1965 NamePtr->eraseFromParent();
1966}
1967
1968void InstrLowerer::emitVTableNames() {
1969 if (!EnableVTableValueProfiling || ReferencedVTables.empty())
1970 return;
1971
1972 // Collect the PGO names of referenced vtables and compress them.
1973 std::string CompressedVTableNames;
1974 if (Error E = collectVTableStrings(VTables: ReferencedVTables, Result&: CompressedVTableNames,
1975 doCompression: DoInstrProfNameCompression)) {
1976 report_fatal_error(reason: Twine(toString(E: std::move(E))), gen_crash_diag: false);
1977 }
1978
1979 auto &Ctx = M.getContext();
1980 auto *VTableNamesVal = ConstantDataArray::getString(
1981 Context&: Ctx, Initializer: StringRef(CompressedVTableNames), AddNull: false /* AddNull */);
1982 GlobalVariable *VTableNamesVar =
1983 new GlobalVariable(M, VTableNamesVal->getType(), true /* constant */,
1984 GlobalValue::PrivateLinkage, VTableNamesVal,
1985 getInstrProfVTableNamesVarName());
1986 VTableNamesVar->setSection(
1987 getInstrProfSectionName(IPSK: IPSK_vname, OF: TT.getObjectFormat()));
1988 VTableNamesVar->setAlignment(Align(1));
1989 // Make VTableNames linker retained.
1990 UsedVars.push_back(x: VTableNamesVar);
1991}
1992
1993void InstrLowerer::emitRegistration() {
1994 if (!needsRuntimeRegistrationOfSectionRange(TT))
1995 return;
1996
1997 // Construct the function.
1998 auto *VoidTy = Type::getVoidTy(C&: M.getContext());
1999 auto *VoidPtrTy = PointerType::getUnqual(C&: M.getContext());
2000 auto *Int64Ty = Type::getInt64Ty(C&: M.getContext());
2001 auto *RegisterFTy = FunctionType::get(Result: VoidTy, isVarArg: false);
2002 auto *RegisterF = Function::Create(Ty: RegisterFTy, Linkage: GlobalValue::InternalLinkage,
2003 N: getInstrProfRegFuncsName(), M);
2004 RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
2005 if (Options.NoRedZone)
2006 RegisterF->addFnAttr(Kind: Attribute::NoRedZone);
2007
2008 auto *RuntimeRegisterTy = FunctionType::get(Result: VoidTy, Params: VoidPtrTy, isVarArg: false);
2009 auto *RuntimeRegisterF =
2010 Function::Create(Ty: RuntimeRegisterTy, Linkage: GlobalVariable::ExternalLinkage,
2011 N: getInstrProfRegFuncName(), M);
2012
2013 IRBuilder<> IRB(BasicBlock::Create(Context&: M.getContext(), Name: "", Parent: RegisterF));
2014 for (Value *Data : CompilerUsedVars)
2015 if (!isa<Function>(Val: Data))
2016 // Check for addrspace cast when profiling GPU
2017 IRB.CreateCall(Callee: RuntimeRegisterF,
2018 Args: IRB.CreatePointerBitCastOrAddrSpaceCast(V: Data, DestTy: VoidPtrTy));
2019 for (Value *Data : UsedVars)
2020 if (Data != NamesVar && !isa<Function>(Val: Data))
2021 IRB.CreateCall(Callee: RuntimeRegisterF,
2022 Args: IRB.CreatePointerBitCastOrAddrSpaceCast(V: Data, DestTy: VoidPtrTy));
2023
2024 if (NamesVar) {
2025 Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
2026 auto *NamesRegisterTy =
2027 FunctionType::get(Result: VoidTy, Params: ArrayRef(ParamTypes), isVarArg: false);
2028 auto *NamesRegisterF =
2029 Function::Create(Ty: NamesRegisterTy, Linkage: GlobalVariable::ExternalLinkage,
2030 N: getInstrProfNamesRegFuncName(), M);
2031 IRB.CreateCall(Callee: NamesRegisterF, Args: {IRB.CreatePointerBitCastOrAddrSpaceCast(
2032 V: NamesVar, DestTy: VoidPtrTy),
2033 IRB.getInt64(C: NamesSize)});
2034 }
2035
2036 IRB.CreateRetVoid();
2037}
2038
2039bool InstrLowerer::emitRuntimeHook() {
2040 // We expect the linker to be invoked with -u<hook_var> flag for Linux
2041 // in which case there is no need to emit the external variable.
2042 if (TT.isOSLinux() || TT.isOSAIX())
2043 return false;
2044
2045 // If the module's provided its own runtime, we don't need to do anything.
2046 if (M.getGlobalVariable(Name: getInstrProfRuntimeHookVarName()))
2047 return false;
2048
2049 // Declare an external variable that will pull in the runtime initialization.
2050 auto *Int32Ty = Type::getInt32Ty(C&: M.getContext());
2051 auto *Var =
2052 new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
2053 nullptr, getInstrProfRuntimeHookVarName());
2054 if (isGPUProfTarget(M))
2055 Var->setVisibility(GlobalValue::ProtectedVisibility);
2056 else
2057 Var->setVisibility(GlobalValue::HiddenVisibility);
2058
2059 if (TT.isOSBinFormatELF() && !TT.isPS()) {
2060 // Mark the user variable as used so that it isn't stripped out.
2061 CompilerUsedVars.push_back(x: Var);
2062 } else {
2063 // Make a function that uses it.
2064 auto *User = Function::Create(Ty: FunctionType::get(Result: Int32Ty, isVarArg: false),
2065 Linkage: GlobalValue::LinkOnceODRLinkage,
2066 N: getInstrProfRuntimeHookVarUseFuncName(), M);
2067 User->addFnAttr(Kind: Attribute::NoInline);
2068 if (Options.NoRedZone)
2069 User->addFnAttr(Kind: Attribute::NoRedZone);
2070 User->setVisibility(GlobalValue::HiddenVisibility);
2071 if (TT.supportsCOMDAT())
2072 User->setComdat(M.getOrInsertComdat(Name: User->getName()));
2073 // Explicitly mark this function as cold since it is never called.
2074 User->setEntryCount(Count: 0);
2075
2076 IRBuilder<> IRB(BasicBlock::Create(Context&: M.getContext(), Name: "", Parent: User));
2077 auto *Load = IRB.CreateLoad(Ty: Int32Ty, Ptr: Var);
2078 IRB.CreateRet(V: Load);
2079
2080 // Mark the function as used so that it isn't stripped out.
2081 CompilerUsedVars.push_back(x: User);
2082 }
2083 return true;
2084}
2085
2086void InstrLowerer::emitUses() {
2087 // The metadata sections are parallel arrays. Optimizers (e.g.
2088 // GlobalOpt/ConstantMerge) may not discard associated sections as a unit, so
2089 // we conservatively retain all unconditionally in the compiler.
2090 //
2091 // On ELF and Mach-O, the linker can guarantee the associated sections will be
2092 // retained or discarded as a unit, so llvm.compiler.used is sufficient.
2093 // Similarly on COFF, if prof data is not referenced by code we use one comdat
2094 // and ensure this GC property as well. Otherwise, we have to conservatively
2095 // make all of the sections retained by the linker.
2096 if (TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() ||
2097 (TT.isOSBinFormatCOFF() && !DataReferencedByCode))
2098 appendToCompilerUsed(M, Values: CompilerUsedVars);
2099 else
2100 appendToUsed(M, Values: CompilerUsedVars);
2101
2102 // We do not add proper references from used metadata sections to NamesVar and
2103 // VNodesVar, so we have to be conservative and place them in llvm.used
2104 // regardless of the target,
2105 appendToUsed(M, Values: UsedVars);
2106}
2107
2108void InstrLowerer::emitInitialization() {
2109 // Create ProfileFileName variable. Don't don't this for the
2110 // context-sensitive instrumentation lowering: This lowering is after
2111 // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
2112 // have already create the variable before LTO/ThinLTO linking.
2113 if (!IsCS)
2114 createProfileFileNameVar(M, InstrProfileOutput: Options.InstrProfileOutput);
2115 Function *RegisterF = M.getFunction(Name: getInstrProfRegFuncsName());
2116 if (!RegisterF)
2117 return;
2118
2119 // Create the initialization function.
2120 auto *VoidTy = Type::getVoidTy(C&: M.getContext());
2121 auto *F = Function::Create(Ty: FunctionType::get(Result: VoidTy, isVarArg: false),
2122 Linkage: GlobalValue::InternalLinkage,
2123 N: getInstrProfInitFuncName(), M);
2124 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
2125 F->addFnAttr(Kind: Attribute::NoInline);
2126 if (Options.NoRedZone)
2127 F->addFnAttr(Kind: Attribute::NoRedZone);
2128
2129 // Add the basic block and the necessary calls.
2130 IRBuilder<> IRB(BasicBlock::Create(Context&: M.getContext(), Name: "", Parent: F));
2131 IRB.CreateCall(Callee: RegisterF, Args: {});
2132 IRB.CreateRetVoid();
2133
2134 appendToGlobalCtors(M, F, Priority: 0);
2135}
2136
2137namespace llvm {
2138// Create the variable for profile sampling.
2139void createProfileSamplingVar(Module &M) {
2140 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR));
2141 IntegerType *SamplingVarTy;
2142 Constant *ValueZero;
2143 if (getSampledInstrumentationConfig().UseShort) {
2144 SamplingVarTy = Type::getInt16Ty(C&: M.getContext());
2145 ValueZero = Constant::getIntegerValue(Ty: SamplingVarTy, V: APInt(16, 0));
2146 } else {
2147 SamplingVarTy = Type::getInt32Ty(C&: M.getContext());
2148 ValueZero = Constant::getIntegerValue(Ty: SamplingVarTy, V: APInt(32, 0));
2149 }
2150 auto SamplingVar = new GlobalVariable(
2151 M, SamplingVarTy, false, GlobalValue::WeakAnyLinkage, ValueZero, VarName);
2152 SamplingVar->setVisibility(GlobalValue::DefaultVisibility);
2153 SamplingVar->setThreadLocal(true);
2154 Triple TT(M.getTargetTriple());
2155 if (TT.supportsCOMDAT()) {
2156 SamplingVar->setLinkage(GlobalValue::ExternalLinkage);
2157 SamplingVar->setComdat(M.getOrInsertComdat(Name: VarName));
2158 }
2159 appendToCompilerUsed(M, Values: SamplingVar);
2160}
2161} // namespace llvm
2162