1//===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass lowers instrprof_* intrinsics emitted by an instrumentor.
10// It also builds the data structures and initialization code needed for
11// updating execution counts and emitting the profile at runtime.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/ADT/Twine.h"
21#include "llvm/Analysis/BlockFrequencyInfo.h"
22#include "llvm/Analysis/BranchProbabilityInfo.h"
23#include "llvm/Analysis/CFG.h"
24#include "llvm/Analysis/LoopInfo.h"
25#include "llvm/Analysis/TargetLibraryInfo.h"
26#include "llvm/IR/Attributes.h"
27#include "llvm/IR/BasicBlock.h"
28#include "llvm/IR/CFG.h"
29#include "llvm/IR/Constant.h"
30#include "llvm/IR/Constants.h"
31#include "llvm/IR/DIBuilder.h"
32#include "llvm/IR/DerivedTypes.h"
33#include "llvm/IR/DiagnosticInfo.h"
34#include "llvm/IR/Dominators.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/GlobalValue.h"
37#include "llvm/IR/GlobalVariable.h"
38#include "llvm/IR/IRBuilder.h"
39#include "llvm/IR/Instruction.h"
40#include "llvm/IR/Instructions.h"
41#include "llvm/IR/IntrinsicInst.h"
42#include "llvm/IR/MDBuilder.h"
43#include "llvm/IR/Module.h"
44#include "llvm/IR/Type.h"
45#include "llvm/Pass.h"
46#include "llvm/ProfileData/InstrProf.h"
47#include "llvm/ProfileData/InstrProfCorrelator.h"
48#include "llvm/Support/Casting.h"
49#include "llvm/Support/CommandLine.h"
50#include "llvm/Support/Compiler.h"
51#include "llvm/Support/Error.h"
52#include "llvm/Support/ErrorHandling.h"
53#include "llvm/TargetParser/Triple.h"
54#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
55#include "llvm/Transforms/Utils/BasicBlockUtils.h"
56#include "llvm/Transforms/Utils/Instrumentation.h"
57#include "llvm/Transforms/Utils/ModuleUtils.h"
58#include "llvm/Transforms/Utils/SSAUpdater.h"
59#include <algorithm>
60#include <cassert>
61#include <cstdint>
62#include <string>
63
64using namespace llvm;
65
66#define DEBUG_TYPE "instrprof"
67
68namespace llvm {
69// Command line option to enable vtable value profiling. Defined in
70// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
71extern cl::opt<bool> EnableVTableValueProfiling;
72// TODO: Remove -debug-info-correlate in next LLVM release, in favor of
73// -profile-correlate=debug-info.
74cl::opt<bool> DebugInfoCorrelate(
75 "debug-info-correlate",
76 cl::desc("Use debug info to correlate profiles. (Deprecated, use "
77 "-profile-correlate=debug-info)"),
78 cl::init(Val: false));
79
80LLVM_ABI cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate(
81 "profile-correlate",
82 cl::desc("Use debug info or binary file to correlate profiles."),
83 cl::init(Val: InstrProfCorrelator::NONE),
84 cl::values(clEnumValN(InstrProfCorrelator::NONE, "",
85 "No profile correlation"),
86 clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info",
87 "Use debug info to correlate"),
88 clEnumValN(InstrProfCorrelator::BINARY, "binary",
89 "Use binary to correlate")));
90} // namespace llvm
91
92namespace {
93
94cl::opt<bool> DoHashBasedCounterSplit(
95 "hash-based-counter-split",
96 cl::desc("Rename counter variable of a comdat function based on cfg hash"),
97 cl::init(Val: true));
98
99cl::opt<bool>
100 RuntimeCounterRelocation("runtime-counter-relocation",
101 cl::desc("Enable relocating counters at runtime."),
102 cl::init(Val: false));
103
104cl::opt<bool> ValueProfileStaticAlloc(
105 "vp-static-alloc",
106 cl::desc("Do static counter allocation for value profiler"),
107 cl::init(Val: true));
108
109cl::opt<double> NumCountersPerValueSite(
110 "vp-counters-per-site",
111 cl::desc("The average number of profile counters allocated "
112 "per value profiling site."),
113 // This is set to a very small value because in real programs, only
114 // a very small percentage of value sites have non-zero targets, e.g, 1/30.
115 // For those sites with non-zero profile, the average number of targets
116 // is usually smaller than 2.
117 cl::init(Val: 1.0));
118
119cl::opt<bool> AtomicCounterUpdateAll(
120 "instrprof-atomic-counter-update-all",
121 cl::desc("Make all profile counter updates atomic (for testing only)"),
122 cl::init(Val: false));
123
124cl::opt<bool> AtomicCounterUpdatePromoted(
125 "atomic-counter-update-promoted",
126 cl::desc("Do counter update using atomic fetch add "
127 " for promoted counters only"),
128 cl::init(Val: false));
129
130cl::opt<bool> AtomicFirstCounter(
131 "atomic-first-counter",
132 cl::desc("Use atomic fetch add for first counter in a function (usually "
133 "the entry counter)"),
134 cl::init(Val: false));
135
136cl::opt<bool> ConditionalCounterUpdate(
137 "conditional-counter-update",
138 cl::desc("Do conditional counter updates in single byte counters mode)"),
139 cl::init(Val: false));
140
141// If the option is not specified, the default behavior about whether
142// counter promotion is done depends on how instrumentaiton lowering
143// pipeline is setup, i.e., the default value of true of this option
144// does not mean the promotion will be done by default. Explicitly
145// setting this option can override the default behavior.
146cl::opt<bool> DoCounterPromotion("do-counter-promotion",
147 cl::desc("Do counter register promotion"),
148 cl::init(Val: false));
149cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
150 "max-counter-promotions-per-loop", cl::init(Val: 20),
151 cl::desc("Max number counter promotions per loop to avoid"
152 " increasing register pressure too much"));
153
154// A debug option
155cl::opt<int>
156 MaxNumOfPromotions("max-counter-promotions", cl::init(Val: -1),
157 cl::desc("Max number of allowed counter promotions"));
158
159cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
160 "speculative-counter-promotion-max-exiting", cl::init(Val: 3),
161 cl::desc("The max number of exiting blocks of a loop to allow "
162 " speculative counter promotion"));
163
164cl::opt<bool> SpeculativeCounterPromotionToLoop(
165 "speculative-counter-promotion-to-loop",
166 cl::desc("When the option is false, if the target block is in a loop, "
167 "the promotion will be disallowed unless the promoted counter "
168 " update can be further/iteratively promoted into an acyclic "
169 " region."));
170
171cl::opt<bool> IterativeCounterPromotion(
172 "iterative-counter-promotion", cl::init(Val: true),
173 cl::desc("Allow counter promotion across the whole loop nest."));
174
175cl::opt<bool> SkipRetExitBlock(
176 "skip-ret-exit-block", cl::init(Val: true),
177 cl::desc("Suppress counter promotion if exit blocks contain ret."));
178
179static cl::opt<bool> SampledInstr("sampled-instrumentation", cl::ZeroOrMore,
180 cl::init(Val: false),
181 cl::desc("Do PGO instrumentation sampling"));
182
183static cl::opt<unsigned> SampledInstrPeriod(
184 "sampled-instr-period",
185 cl::desc("Set the profile instrumentation sample period. A sample period "
186 "of 0 is invalid. For each sample period, a fixed number of "
187 "consecutive samples will be recorded. The number is controlled "
188 "by 'sampled-instr-burst-duration' flag. The default sample "
189 "period of 65536 is optimized for generating efficient code that "
190 "leverages unsigned short integer wrapping in overflow, but this "
191 "is disabled under simple sampling (burst duration = 1)."),
192 cl::init(USHRT_MAX + 1));
193
194static cl::opt<unsigned> SampledInstrBurstDuration(
195 "sampled-instr-burst-duration",
196 cl::desc("Set the profile instrumentation burst duration, which can range "
197 "from 1 to the value of 'sampled-instr-period' (0 is invalid). "
198 "This number of samples will be recorded for each "
199 "'sampled-instr-period' count update. Setting to 1 enables simple "
200 "sampling, in which case it is recommended to set "
201 "'sampled-instr-period' to a prime number."),
202 cl::init(Val: 200));
203
204struct SampledInstrumentationConfig {
205 unsigned BurstDuration;
206 unsigned Period;
207 bool UseShort;
208 bool IsSimpleSampling;
209 bool IsFastSampling;
210};
211
212static SampledInstrumentationConfig getSampledInstrumentationConfig() {
213 SampledInstrumentationConfig config;
214 config.BurstDuration = SampledInstrBurstDuration.getValue();
215 config.Period = SampledInstrPeriod.getValue();
216 if (config.BurstDuration > config.Period)
217 report_fatal_error(
218 reason: "SampledBurstDuration must be less than or equal to SampledPeriod");
219 if (config.Period == 0 || config.BurstDuration == 0)
220 report_fatal_error(
221 reason: "SampledPeriod and SampledBurstDuration must be greater than 0");
222 config.IsSimpleSampling = (config.BurstDuration == 1);
223 // If (BurstDuration == 1 && Period == 65536), generate the simple sampling
224 // style code.
225 config.IsFastSampling =
226 (!config.IsSimpleSampling && config.Period == USHRT_MAX + 1);
227 config.UseShort = (config.Period <= USHRT_MAX) || config.IsFastSampling;
228 return config;
229}
230
231using LoadStorePair = std::pair<Instruction *, Instruction *>;
232
233static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) {
234 auto *MD = dyn_cast_or_null<ConstantAsMetadata>(Val: M.getModuleFlag(Key: Flag));
235 if (!MD)
236 return 0;
237
238 // If the flag is a ConstantAsMetadata, it should be an integer representable
239 // in 64-bits.
240 return cast<ConstantInt>(Val: MD->getValue())->getZExtValue();
241}
242
243static bool enablesValueProfiling(const Module &M) {
244 return isIRPGOFlagSet(M: &M) ||
245 getIntModuleFlagOrZero(M, Flag: "EnableValueProfiling") != 0;
246}
247
248// Conservatively returns true if value profiling is enabled.
249static bool profDataReferencedByCode(const Module &M) {
250 return enablesValueProfiling(M);
251}
252
253class InstrLowerer final {
254public:
255 InstrLowerer(Module &M, const InstrProfOptions &Options,
256 std::function<const TargetLibraryInfo &(Function &F)> GetTLI,
257 bool IsCS)
258 : M(M), Options(Options), TT(M.getTargetTriple()), IsCS(IsCS),
259 GetTLI(GetTLI), DataReferencedByCode(profDataReferencedByCode(M)) {}
260
261 bool lower();
262
263private:
264 Module &M;
265 const InstrProfOptions Options;
266 const Triple TT;
267 // Is this lowering for the context-sensitive instrumentation.
268 const bool IsCS;
269
270 std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
271
272 const bool DataReferencedByCode;
273
274 struct PerFunctionProfileData {
275 uint32_t NumValueSites[IPVK_Last + 1] = {};
276 GlobalVariable *RegionCounters = nullptr;
277 GlobalVariable *DataVar = nullptr;
278 GlobalVariable *RegionBitmaps = nullptr;
279 uint32_t NumBitmapBytes = 0;
280
281 PerFunctionProfileData() = default;
282 };
283 DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
284 // Key is virtual table variable, value is 'VTableProfData' in the form of
285 // GlobalVariable.
286 DenseMap<GlobalVariable *, GlobalVariable *> VTableDataMap;
287 /// If runtime relocation is enabled, this maps functions to the load
288 /// instruction that produces the profile relocation bias.
289 DenseMap<const Function *, LoadInst *> FunctionToProfileBiasMap;
290 std::vector<GlobalValue *> CompilerUsedVars;
291 std::vector<GlobalValue *> UsedVars;
292 std::vector<GlobalVariable *> ReferencedNames;
293 // The list of virtual table variables of which the VTableProfData is
294 // collected.
295 std::vector<GlobalVariable *> ReferencedVTables;
296 GlobalVariable *NamesVar = nullptr;
297 size_t NamesSize = 0;
298
299 // vector of counter load/store pairs to be register promoted.
300 std::vector<LoadStorePair> PromotionCandidates;
301
302 int64_t TotalCountersPromoted = 0;
303
304 /// Lower instrumentation intrinsics in the function. Returns true if there
305 /// any lowering.
306 bool lowerIntrinsics(Function *F);
307
308 /// Register-promote counter loads and stores in loops.
309 void promoteCounterLoadStores(Function *F);
310
311 /// Returns true if relocating counters at runtime is enabled.
312 bool isRuntimeCounterRelocationEnabled() const;
313
314 /// Returns true if profile counter update register promotion is enabled.
315 bool isCounterPromotionEnabled() const;
316
317 /// Return true if profile sampling is enabled.
318 bool isSamplingEnabled() const;
319
320 /// Count the number of instrumented value sites for the function.
321 void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins);
322
323 /// Replace instrprof.value.profile with a call to runtime library.
324 void lowerValueProfileInst(InstrProfValueProfileInst *Ins);
325
326 /// Replace instrprof.cover with a store instruction to the coverage byte.
327 void lowerCover(InstrProfCoverInst *Inc);
328
329 /// Replace instrprof.timestamp with a call to
330 /// INSTR_PROF_PROFILE_SET_TIMESTAMP.
331 void lowerTimestamp(InstrProfTimestampInst *TimestampInstruction);
332
333 /// Replace instrprof.increment with an increment of the appropriate value.
334 void lowerIncrement(InstrProfIncrementInst *Inc);
335
336 /// Force emitting of name vars for unused functions.
337 void lowerCoverageData(GlobalVariable *CoverageNamesVar);
338
339 /// Replace instrprof.mcdc.tvbitmask.update with a shift and or instruction
340 /// using the index represented by the a temp value into a bitmap.
341 void lowerMCDCTestVectorBitmapUpdate(InstrProfMCDCTVBitmapUpdate *Ins);
342
343 /// Get the Bias value for data to access mmap-ed area.
344 /// Create it if it hasn't been seen.
345 GlobalVariable *getOrCreateBiasVar(StringRef VarName);
346
347 /// Compute the address of the counter value that this profiling instruction
348 /// acts on.
349 Value *getCounterAddress(InstrProfCntrInstBase *I);
350
351 /// Lower the incremental instructions under profile sampling predicates.
352 void doSampling(Instruction *I);
353
354 /// Get the region counters for an increment, creating them if necessary.
355 ///
356 /// If the counter array doesn't yet exist, the profile data variables
357 /// referring to them will also be created.
358 GlobalVariable *getOrCreateRegionCounters(InstrProfCntrInstBase *Inc);
359
360 /// Create the region counters.
361 GlobalVariable *createRegionCounters(InstrProfCntrInstBase *Inc,
362 StringRef Name,
363 GlobalValue::LinkageTypes Linkage);
364
365 /// Compute the address of the test vector bitmap that this profiling
366 /// instruction acts on.
367 Value *getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I);
368
369 /// Get the region bitmaps for an increment, creating them if necessary.
370 ///
371 /// If the bitmap array doesn't yet exist, the profile data variables
372 /// referring to them will also be created.
373 GlobalVariable *getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc);
374
375 /// Create the MC/DC bitmap as a byte-aligned array of bytes associated with
376 /// an MC/DC Decision region. The number of bytes required is indicated by
377 /// the intrinsic used (type InstrProfMCDCBitmapInstBase). This is called
378 /// as part of setupProfileSection() and is conceptually very similar to
379 /// what is done for profile data counters in createRegionCounters().
380 GlobalVariable *createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
381 StringRef Name,
382 GlobalValue::LinkageTypes Linkage);
383
384 /// Set Comdat property of GV, if required.
385 void maybeSetComdat(GlobalVariable *GV, GlobalObject *GO, StringRef VarName);
386
387 /// Setup the sections into which counters and bitmaps are allocated.
388 GlobalVariable *setupProfileSection(InstrProfInstBase *Inc,
389 InstrProfSectKind IPSK);
390
391 /// Create INSTR_PROF_DATA variable for counters and bitmaps.
392 void createDataVariable(InstrProfCntrInstBase *Inc);
393
394 /// Get the counters for virtual table values, creating them if necessary.
395 void getOrCreateVTableProfData(GlobalVariable *GV);
396
397 /// Emit the section with compressed function names.
398 void emitNameData();
399
400 /// Emit the section with compressed vtable names.
401 void emitVTableNames();
402
403 /// Emit value nodes section for value profiling.
404 void emitVNodes();
405
406 /// Emit runtime registration functions for each profile data variable.
407 void emitRegistration();
408
409 /// Emit the necessary plumbing to pull in the runtime initialization.
410 /// Returns true if a change was made.
411 bool emitRuntimeHook();
412
413 /// Add uses of our data variables and runtime hook.
414 void emitUses();
415
416 /// Create a static initializer for our data, on platforms that need it,
417 /// and for any profile output file that was specified.
418 void emitInitialization();
419};
420
421///
422/// A helper class to promote one counter RMW operation in the loop
423/// into register update.
424///
425/// RWM update for the counter will be sinked out of the loop after
426/// the transformation.
427///
428class PGOCounterPromoterHelper : public LoadAndStorePromoter {
429public:
430 PGOCounterPromoterHelper(
431 Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
432 BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
433 ArrayRef<Instruction *> InsertPts,
434 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
435 LoopInfo &LI)
436 : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
437 InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
438 assert(isa<LoadInst>(L));
439 assert(isa<StoreInst>(S));
440 SSA.AddAvailableValue(BB: PH, V: Init);
441 }
442
443 void doExtraRewritesBeforeFinalDeletion() override {
444 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
445 BasicBlock *ExitBlock = ExitBlocks[i];
446 Instruction *InsertPos = InsertPts[i];
447 // Get LiveIn value into the ExitBlock. If there are multiple
448 // predecessors, the value is defined by a PHI node in this
449 // block.
450 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(BB: ExitBlock);
451 Value *Addr = cast<StoreInst>(Val: Store)->getPointerOperand();
452 Type *Ty = LiveInValue->getType();
453 IRBuilder<> Builder(InsertPos);
454 if (auto *AddrInst = dyn_cast_or_null<IntToPtrInst>(Val: Addr)) {
455 // If isRuntimeCounterRelocationEnabled() is true then the address of
456 // the store instruction is computed with two instructions in
457 // InstrProfiling::getCounterAddress(). We need to copy those
458 // instructions to this block to compute Addr correctly.
459 // %BiasAdd = add i64 ptrtoint <__profc_>, <__llvm_profile_counter_bias>
460 // %Addr = inttoptr i64 %BiasAdd to i64*
461 auto *OrigBiasInst = dyn_cast<BinaryOperator>(Val: AddrInst->getOperand(i_nocapture: 0));
462 assert(OrigBiasInst->getOpcode() == Instruction::BinaryOps::Add);
463 Value *BiasInst = Builder.Insert(I: OrigBiasInst->clone());
464 Addr = Builder.CreateIntToPtr(V: BiasInst,
465 DestTy: PointerType::getUnqual(C&: Ty->getContext()));
466 }
467 if (AtomicCounterUpdatePromoted)
468 // automic update currently can only be promoted across the current
469 // loop, not the whole loop nest.
470 Builder.CreateAtomicRMW(Op: AtomicRMWInst::Add, Ptr: Addr, Val: LiveInValue,
471 Align: MaybeAlign(),
472 Ordering: AtomicOrdering::SequentiallyConsistent);
473 else {
474 LoadInst *OldVal = Builder.CreateLoad(Ty, Ptr: Addr, Name: "pgocount.promoted");
475 auto *NewVal = Builder.CreateAdd(LHS: OldVal, RHS: LiveInValue);
476 auto *NewStore = Builder.CreateStore(Val: NewVal, Ptr: Addr);
477
478 // Now update the parent loop's candidate list:
479 if (IterativeCounterPromotion) {
480 auto *TargetLoop = LI.getLoopFor(BB: ExitBlock);
481 if (TargetLoop)
482 LoopToCandidates[TargetLoop].emplace_back(Args&: OldVal, Args&: NewStore);
483 }
484 }
485 }
486 }
487
488private:
489 Instruction *Store;
490 ArrayRef<BasicBlock *> ExitBlocks;
491 ArrayRef<Instruction *> InsertPts;
492 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
493 LoopInfo &LI;
494};
495
496/// A helper class to do register promotion for all profile counter
497/// updates in a loop.
498///
499class PGOCounterPromoter {
500public:
501 PGOCounterPromoter(
502 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
503 Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI)
504 : LoopToCandidates(LoopToCands), L(CurLoop), LI(LI), BFI(BFI) {
505
506 // Skip collection of ExitBlocks and InsertPts for loops that will not be
507 // able to have counters promoted.
508 SmallVector<BasicBlock *, 8> LoopExitBlocks;
509 SmallPtrSet<BasicBlock *, 8> BlockSet;
510
511 L.getExitBlocks(ExitBlocks&: LoopExitBlocks);
512 if (!isPromotionPossible(LP: &L, LoopExitBlocks))
513 return;
514
515 for (BasicBlock *ExitBlock : LoopExitBlocks) {
516 if (BlockSet.insert(Ptr: ExitBlock).second &&
517 llvm::none_of(Range: predecessors(BB: ExitBlock), P: [&](const BasicBlock *Pred) {
518 return llvm::isPresplitCoroSuspendExitEdge(Src: *Pred, Dest: *ExitBlock);
519 })) {
520 ExitBlocks.push_back(Elt: ExitBlock);
521 InsertPts.push_back(Elt: &*ExitBlock->getFirstInsertionPt());
522 }
523 }
524 }
525
526 bool run(int64_t *NumPromoted) {
527 // Skip 'infinite' loops:
528 if (ExitBlocks.size() == 0)
529 return false;
530
531 // Skip if any of the ExitBlocks contains a ret instruction.
532 // This is to prevent dumping of incomplete profile -- if the
533 // the loop is a long running loop and dump is called in the middle
534 // of the loop, the result profile is incomplete.
535 // FIXME: add other heuristics to detect long running loops.
536 if (SkipRetExitBlock) {
537 for (auto *BB : ExitBlocks)
538 if (isa<ReturnInst>(Val: BB->getTerminator()))
539 return false;
540 }
541
542 unsigned MaxProm = getMaxNumOfPromotionsInLoop(LP: &L);
543 if (MaxProm == 0)
544 return false;
545
546 unsigned Promoted = 0;
547 for (auto &Cand : LoopToCandidates[&L]) {
548
549 SmallVector<PHINode *, 4> NewPHIs;
550 SSAUpdater SSA(&NewPHIs);
551 Value *InitVal = ConstantInt::get(Ty: Cand.first->getType(), V: 0);
552
553 // If BFI is set, we will use it to guide the promotions.
554 if (BFI) {
555 auto *BB = Cand.first->getParent();
556 auto InstrCount = BFI->getBlockProfileCount(BB);
557 if (!InstrCount)
558 continue;
559 auto PreheaderCount = BFI->getBlockProfileCount(BB: L.getLoopPreheader());
560 // If the average loop trip count is not greater than 1.5, we skip
561 // promotion.
562 if (PreheaderCount && (*PreheaderCount * 3) >= (*InstrCount * 2))
563 continue;
564 }
565
566 PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
567 L.getLoopPreheader(), ExitBlocks,
568 InsertPts, LoopToCandidates, LI);
569 Promoter.run(Insts: SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
570 Promoted++;
571 if (Promoted >= MaxProm)
572 break;
573
574 (*NumPromoted)++;
575 if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
576 break;
577 }
578
579 LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
580 << L.getLoopDepth() << ")\n");
581 return Promoted != 0;
582 }
583
584private:
585 bool allowSpeculativeCounterPromotion(Loop *LP) {
586 SmallVector<BasicBlock *, 8> ExitingBlocks;
587 L.getExitingBlocks(ExitingBlocks);
588 // Not considierered speculative.
589 if (ExitingBlocks.size() == 1)
590 return true;
591 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
592 return false;
593 return true;
594 }
595
596 // Check whether the loop satisfies the basic conditions needed to perform
597 // Counter Promotions.
598 bool
599 isPromotionPossible(Loop *LP,
600 const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) {
601 // We can't insert into a catchswitch.
602 if (llvm::any_of(Range: LoopExitBlocks, P: [](BasicBlock *Exit) {
603 return isa<CatchSwitchInst>(Val: Exit->getTerminator());
604 }))
605 return false;
606
607 if (!LP->hasDedicatedExits())
608 return false;
609
610 BasicBlock *PH = LP->getLoopPreheader();
611 if (!PH)
612 return false;
613
614 return true;
615 }
616
617 // Returns the max number of Counter Promotions for LP.
618 unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
619 SmallVector<BasicBlock *, 8> LoopExitBlocks;
620 LP->getExitBlocks(ExitBlocks&: LoopExitBlocks);
621 if (!isPromotionPossible(LP, LoopExitBlocks))
622 return 0;
623
624 SmallVector<BasicBlock *, 8> ExitingBlocks;
625 LP->getExitingBlocks(ExitingBlocks);
626
627 // If BFI is set, we do more aggressive promotions based on BFI.
628 if (BFI)
629 return (unsigned)-1;
630
631 // Not considierered speculative.
632 if (ExitingBlocks.size() == 1)
633 return MaxNumOfPromotionsPerLoop;
634
635 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
636 return 0;
637
638 // Whether the target block is in a loop does not matter:
639 if (SpeculativeCounterPromotionToLoop)
640 return MaxNumOfPromotionsPerLoop;
641
642 // Now check the target block:
643 unsigned MaxProm = MaxNumOfPromotionsPerLoop;
644 for (auto *TargetBlock : LoopExitBlocks) {
645 auto *TargetLoop = LI.getLoopFor(BB: TargetBlock);
646 if (!TargetLoop)
647 continue;
648 unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(LP: TargetLoop);
649 unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
650 MaxProm =
651 std::min(a: MaxProm, b: std::max(a: MaxPromForTarget, b: PendingCandsInTarget) -
652 PendingCandsInTarget);
653 }
654 return MaxProm;
655 }
656
657 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
658 SmallVector<BasicBlock *, 8> ExitBlocks;
659 SmallVector<Instruction *, 8> InsertPts;
660 Loop &L;
661 LoopInfo &LI;
662 BlockFrequencyInfo *BFI;
663};
664
665enum class ValueProfilingCallType {
666 // Individual values are tracked. Currently used for indiret call target
667 // profiling.
668 Default,
669
670 // MemOp: the memop size value profiling.
671 MemOp
672};
673
674} // end anonymous namespace
675
676PreservedAnalyses InstrProfilingLoweringPass::run(Module &M,
677 ModuleAnalysisManager &AM) {
678 FunctionAnalysisManager &FAM =
679 AM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
680 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
681 return FAM.getResult<TargetLibraryAnalysis>(IR&: F);
682 };
683 InstrLowerer Lowerer(M, Options, GetTLI, IsCS);
684 if (!Lowerer.lower())
685 return PreservedAnalyses::all();
686
687 return PreservedAnalyses::none();
688}
689
690//
691// Perform instrumentation sampling.
692//
693// There are 3 favors of sampling:
694// (1) Full burst sampling: We transform:
695// Increment_Instruction;
696// to:
697// if (__llvm_profile_sampling__ <= SampledInstrBurstDuration - 1) {
698// Increment_Instruction;
699// }
700// __llvm_profile_sampling__ += 1;
701// if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
702// __llvm_profile_sampling__ = 0;
703// }
704//
705// "__llvm_profile_sampling__" is a thread-local global shared by all PGO
706// counters (value-instrumentation and edge instrumentation).
707//
708// (2) Fast burst sampling:
709// "__llvm_profile_sampling__" variable is an unsigned type, meaning it will
710// wrap around to zero when overflows. In this case, the second check is
711// unnecessary, so we won't generate check2 when the SampledInstrPeriod is
712// set to 65536 (64K). The code after:
713// if (__llvm_profile_sampling__ <= SampledInstrBurstDuration - 1) {
714// Increment_Instruction;
715// }
716// __llvm_profile_sampling__ += 1;
717//
718// (3) Simple sampling:
719// When SampledInstrBurstDuration is set to 1, we do a simple sampling:
720// __llvm_profile_sampling__ += 1;
721// if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
722// __llvm_profile_sampling__ = 0;
723// Increment_Instruction;
724// }
725//
726// Note that, the code snippet after the transformation can still be counter
727// promoted. However, with sampling enabled, counter updates are expected to
728// be infrequent, making the benefits of counter promotion negligible.
729// Moreover, counter promotion can potentially cause issues in server
730// applications, particularly when the counters are dumped without a clean
731// exit. To mitigate this risk, counter promotion is disabled by default when
732// sampling is enabled. This behavior can be overridden using the internal
733// option.
734void InstrLowerer::doSampling(Instruction *I) {
735 if (!isSamplingEnabled())
736 return;
737
738 SampledInstrumentationConfig config = getSampledInstrumentationConfig();
739 auto GetConstant = [&config](IRBuilder<> &Builder, uint32_t C) {
740 if (config.UseShort)
741 return Builder.getInt16(C);
742 else
743 return Builder.getInt32(C);
744 };
745
746 IntegerType *SamplingVarTy;
747 if (config.UseShort)
748 SamplingVarTy = Type::getInt16Ty(C&: M.getContext());
749 else
750 SamplingVarTy = Type::getInt32Ty(C&: M.getContext());
751 auto *SamplingVar =
752 M.getGlobalVariable(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR));
753 assert(SamplingVar && "SamplingVar not set properly");
754
755 // Create the condition for checking the burst duration.
756 Instruction *SamplingVarIncr;
757 Value *NewSamplingVarVal;
758 MDBuilder MDB(I->getContext());
759 MDNode *BranchWeight;
760 IRBuilder<> CondBuilder(I);
761 auto *LoadSamplingVar = CondBuilder.CreateLoad(Ty: SamplingVarTy, Ptr: SamplingVar);
762 if (config.IsSimpleSampling) {
763 // For the simple sampling, just create the load and increments.
764 IRBuilder<> IncBuilder(I);
765 NewSamplingVarVal =
766 IncBuilder.CreateAdd(LHS: LoadSamplingVar, RHS: GetConstant(IncBuilder, 1));
767 SamplingVarIncr = IncBuilder.CreateStore(Val: NewSamplingVarVal, Ptr: SamplingVar);
768 } else {
769 // For the burst-sampling, create the conditional update.
770 auto *DurationCond = CondBuilder.CreateICmpULE(
771 LHS: LoadSamplingVar, RHS: GetConstant(CondBuilder, config.BurstDuration - 1));
772 BranchWeight = MDB.createBranchWeights(
773 TrueWeight: config.BurstDuration, FalseWeight: config.Period - config.BurstDuration);
774 Instruction *ThenTerm = SplitBlockAndInsertIfThen(
775 Cond: DurationCond, SplitBefore: I, /* Unreachable */ false, BranchWeights: BranchWeight);
776 IRBuilder<> IncBuilder(I);
777 NewSamplingVarVal =
778 IncBuilder.CreateAdd(LHS: LoadSamplingVar, RHS: GetConstant(IncBuilder, 1));
779 SamplingVarIncr = IncBuilder.CreateStore(Val: NewSamplingVarVal, Ptr: SamplingVar);
780 I->moveBefore(InsertPos: ThenTerm->getIterator());
781 }
782
783 if (config.IsFastSampling)
784 return;
785
786 // Create the condition for checking the period.
787 Instruction *ThenTerm, *ElseTerm;
788 IRBuilder<> PeriodCondBuilder(SamplingVarIncr);
789 auto *PeriodCond = PeriodCondBuilder.CreateICmpUGE(
790 LHS: NewSamplingVarVal, RHS: GetConstant(PeriodCondBuilder, config.Period));
791 BranchWeight = MDB.createBranchWeights(TrueWeight: 1, FalseWeight: config.Period - 1);
792 SplitBlockAndInsertIfThenElse(Cond: PeriodCond, SplitBefore: SamplingVarIncr, ThenTerm: &ThenTerm,
793 ElseTerm: &ElseTerm, BranchWeights: BranchWeight);
794
795 // For the simple sampling, the counter update happens in sampling var reset.
796 if (config.IsSimpleSampling)
797 I->moveBefore(InsertPos: ThenTerm->getIterator());
798
799 IRBuilder<> ResetBuilder(ThenTerm);
800 ResetBuilder.CreateStore(Val: GetConstant(ResetBuilder, 0), Ptr: SamplingVar);
801 SamplingVarIncr->moveBefore(InsertPos: ElseTerm->getIterator());
802}
803
804bool InstrLowerer::lowerIntrinsics(Function *F) {
805 bool MadeChange = false;
806 PromotionCandidates.clear();
807 SmallVector<InstrProfInstBase *, 8> InstrProfInsts;
808
809 // To ensure compatibility with sampling, we save the intrinsics into
810 // a buffer to prevent potential breakage of the iterator (as the
811 // intrinsics will be moved to a different BB).
812 for (BasicBlock &BB : *F) {
813 for (Instruction &Instr : llvm::make_early_inc_range(Range&: BB)) {
814 if (auto *IP = dyn_cast<InstrProfInstBase>(Val: &Instr))
815 InstrProfInsts.push_back(Elt: IP);
816 }
817 }
818
819 for (auto *Instr : InstrProfInsts) {
820 doSampling(I: Instr);
821 if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(Val: Instr)) {
822 lowerIncrement(Inc: IPIS);
823 MadeChange = true;
824 } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(Val: Instr)) {
825 lowerIncrement(Inc: IPI);
826 MadeChange = true;
827 } else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(Val: Instr)) {
828 lowerTimestamp(TimestampInstruction: IPC);
829 MadeChange = true;
830 } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(Val: Instr)) {
831 lowerCover(Inc: IPC);
832 MadeChange = true;
833 } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(Val: Instr)) {
834 lowerValueProfileInst(Ins: IPVP);
835 MadeChange = true;
836 } else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(Val: Instr)) {
837 IPMP->eraseFromParent();
838 MadeChange = true;
839 } else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(Val: Instr)) {
840 lowerMCDCTestVectorBitmapUpdate(Ins: IPBU);
841 MadeChange = true;
842 }
843 }
844
845 if (!MadeChange)
846 return false;
847
848 promoteCounterLoadStores(F);
849 return true;
850}
851
852bool InstrLowerer::isRuntimeCounterRelocationEnabled() const {
853 // Mach-O don't support weak external references.
854 if (TT.isOSBinFormatMachO())
855 return false;
856
857 if (RuntimeCounterRelocation.getNumOccurrences() > 0)
858 return RuntimeCounterRelocation;
859
860 // Fuchsia uses runtime counter relocation by default.
861 return TT.isOSFuchsia();
862}
863
864bool InstrLowerer::isSamplingEnabled() const {
865 if (SampledInstr.getNumOccurrences() > 0)
866 return SampledInstr;
867 return Options.Sampling;
868}
869
870bool InstrLowerer::isCounterPromotionEnabled() const {
871 if (DoCounterPromotion.getNumOccurrences() > 0)
872 return DoCounterPromotion;
873
874 return Options.DoCounterPromotion;
875}
876
877void InstrLowerer::promoteCounterLoadStores(Function *F) {
878 if (!isCounterPromotionEnabled())
879 return;
880
881 DominatorTree DT(*F);
882 LoopInfo LI(DT);
883 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
884
885 std::unique_ptr<BlockFrequencyInfo> BFI;
886 if (Options.UseBFIInPromotion) {
887 std::unique_ptr<BranchProbabilityInfo> BPI;
888 BPI.reset(p: new BranchProbabilityInfo(*F, LI, &GetTLI(*F)));
889 BFI.reset(p: new BlockFrequencyInfo(*F, *BPI, LI));
890 }
891
892 for (const auto &LoadStore : PromotionCandidates) {
893 auto *CounterLoad = LoadStore.first;
894 auto *CounterStore = LoadStore.second;
895 BasicBlock *BB = CounterLoad->getParent();
896 Loop *ParentLoop = LI.getLoopFor(BB);
897 if (!ParentLoop)
898 continue;
899 LoopPromotionCandidates[ParentLoop].emplace_back(Args&: CounterLoad, Args&: CounterStore);
900 }
901
902 SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
903
904 // Do a post-order traversal of the loops so that counter updates can be
905 // iteratively hoisted outside the loop nest.
906 for (auto *Loop : llvm::reverse(C&: Loops)) {
907 PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get());
908 Promoter.run(NumPromoted: &TotalCountersPromoted);
909 }
910}
911
912static bool needsRuntimeHookUnconditionally(const Triple &TT) {
913 // On Fuchsia, we only need runtime hook if any counters are present.
914 if (TT.isOSFuchsia())
915 return false;
916
917 return true;
918}
919
920/// Check if the module contains uses of any profiling intrinsics.
921static bool containsProfilingIntrinsics(Module &M) {
922 auto containsIntrinsic = [&](int ID) {
923 if (auto *F = Intrinsic::getDeclarationIfExists(M: &M, id: ID))
924 return !F->use_empty();
925 return false;
926 };
927 return containsIntrinsic(Intrinsic::instrprof_cover) ||
928 containsIntrinsic(Intrinsic::instrprof_increment) ||
929 containsIntrinsic(Intrinsic::instrprof_increment_step) ||
930 containsIntrinsic(Intrinsic::instrprof_timestamp) ||
931 containsIntrinsic(Intrinsic::instrprof_value_profile);
932}
933
934bool InstrLowerer::lower() {
935 bool MadeChange = false;
936 bool NeedsRuntimeHook = needsRuntimeHookUnconditionally(TT);
937 if (NeedsRuntimeHook)
938 MadeChange = emitRuntimeHook();
939
940 if (!IsCS && isSamplingEnabled())
941 createProfileSamplingVar(M);
942
943 bool ContainsProfiling = containsProfilingIntrinsics(M);
944 GlobalVariable *CoverageNamesVar =
945 M.getNamedGlobal(Name: getCoverageUnusedNamesVarName());
946 // Improve compile time by avoiding linear scans when there is no work.
947 if (!ContainsProfiling && !CoverageNamesVar)
948 return MadeChange;
949
950 // We did not know how many value sites there would be inside
951 // the instrumented function. This is counting the number of instrumented
952 // target value sites to enter it as field in the profile data variable.
953 for (Function &F : M) {
954 InstrProfCntrInstBase *FirstProfInst = nullptr;
955 for (BasicBlock &BB : F) {
956 for (auto I = BB.begin(), E = BB.end(); I != E; I++) {
957 if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Val&: I))
958 computeNumValueSiteCounts(Ins: Ind);
959 else {
960 if (FirstProfInst == nullptr &&
961 (isa<InstrProfIncrementInst>(Val: I) || isa<InstrProfCoverInst>(Val: I)))
962 FirstProfInst = dyn_cast<InstrProfCntrInstBase>(Val&: I);
963 // If the MCDCBitmapParameters intrinsic seen, create the bitmaps.
964 if (const auto &Params = dyn_cast<InstrProfMCDCBitmapParameters>(Val&: I))
965 static_cast<void>(getOrCreateRegionBitmaps(Inc: Params));
966 }
967 }
968 }
969
970 // Use a profile intrinsic to create the region counters and data variable.
971 // Also create the data variable based on the MCDCParams.
972 if (FirstProfInst != nullptr) {
973 static_cast<void>(getOrCreateRegionCounters(Inc: FirstProfInst));
974 }
975 }
976
977 if (EnableVTableValueProfiling)
978 for (GlobalVariable &GV : M.globals())
979 // Global variables with type metadata are virtual table variables.
980 if (GV.hasMetadata(KindID: LLVMContext::MD_type))
981 getOrCreateVTableProfData(GV: &GV);
982
983 for (Function &F : M)
984 MadeChange |= lowerIntrinsics(F: &F);
985
986 if (CoverageNamesVar) {
987 lowerCoverageData(CoverageNamesVar);
988 MadeChange = true;
989 }
990
991 if (!MadeChange)
992 return false;
993
994 emitVNodes();
995 emitNameData();
996 emitVTableNames();
997
998 // Emit runtime hook for the cases where the target does not unconditionally
999 // require pulling in profile runtime, and coverage is enabled on code that is
1000 // not eliminated by the front-end, e.g. unused functions with internal
1001 // linkage.
1002 if (!NeedsRuntimeHook && ContainsProfiling)
1003 emitRuntimeHook();
1004
1005 emitRegistration();
1006 emitUses();
1007 emitInitialization();
1008 return true;
1009}
1010
1011static FunctionCallee getOrInsertValueProfilingCall(
1012 Module &M, const TargetLibraryInfo &TLI,
1013 ValueProfilingCallType CallType = ValueProfilingCallType::Default) {
1014 LLVMContext &Ctx = M.getContext();
1015 auto *ReturnTy = Type::getVoidTy(C&: M.getContext());
1016
1017 AttributeList AL;
1018 if (auto AK = TLI.getExtAttrForI32Param(Signed: false))
1019 AL = AL.addParamAttribute(C&: M.getContext(), ArgNo: 2, Kind: AK);
1020
1021 assert((CallType == ValueProfilingCallType::Default ||
1022 CallType == ValueProfilingCallType::MemOp) &&
1023 "Must be Default or MemOp");
1024 Type *ParamTypes[] = {
1025#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
1026#include "llvm/ProfileData/InstrProfData.inc"
1027 };
1028 auto *ValueProfilingCallTy =
1029 FunctionType::get(Result: ReturnTy, Params: ArrayRef(ParamTypes), isVarArg: false);
1030 StringRef FuncName = CallType == ValueProfilingCallType::Default
1031 ? getInstrProfValueProfFuncName()
1032 : getInstrProfValueProfMemOpFuncName();
1033 return M.getOrInsertFunction(Name: FuncName, T: ValueProfilingCallTy, AttributeList: AL);
1034}
1035
1036void InstrLowerer::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
1037 GlobalVariable *Name = Ind->getName();
1038 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
1039 uint64_t Index = Ind->getIndex()->getZExtValue();
1040 auto &PD = ProfileDataMap[Name];
1041 PD.NumValueSites[ValueKind] =
1042 std::max(a: PD.NumValueSites[ValueKind], b: (uint32_t)(Index + 1));
1043}
1044
1045void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
1046 // TODO: Value profiling heavily depends on the data section which is omitted
1047 // in lightweight mode. We need to move the value profile pointer to the
1048 // Counter struct to get this working.
1049 assert(
1050 !DebugInfoCorrelate && ProfileCorrelate == InstrProfCorrelator::NONE &&
1051 "Value profiling is not yet supported with lightweight instrumentation");
1052 GlobalVariable *Name = Ind->getName();
1053 auto It = ProfileDataMap.find(Val: Name);
1054 assert(It != ProfileDataMap.end() && It->second.DataVar &&
1055 "value profiling detected in function with no counter incerement");
1056
1057 GlobalVariable *DataVar = It->second.DataVar;
1058 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
1059 uint64_t Index = Ind->getIndex()->getZExtValue();
1060 for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
1061 Index += It->second.NumValueSites[Kind];
1062
1063 IRBuilder<> Builder(Ind);
1064 bool IsMemOpSize = (Ind->getValueKind()->getZExtValue() ==
1065 llvm::InstrProfValueKind::IPVK_MemOPSize);
1066 CallInst *Call = nullptr;
1067 auto *TLI = &GetTLI(*Ind->getFunction());
1068 auto *NormalizedDataVarPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1069 C: DataVar, Ty: PointerType::get(C&: M.getContext(), AddressSpace: 0));
1070
1071 // To support value profiling calls within Windows exception handlers, funclet
1072 // information contained within operand bundles needs to be copied over to
1073 // the library call. This is required for the IR to be processed by the
1074 // WinEHPrepare pass.
1075 SmallVector<OperandBundleDef, 1> OpBundles;
1076 Ind->getOperandBundlesAsDefs(Defs&: OpBundles);
1077 if (!IsMemOpSize) {
1078 Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
1079 Builder.getInt32(C: Index)};
1080 Call = Builder.CreateCall(Callee: getOrInsertValueProfilingCall(M, TLI: *TLI), Args,
1081 OpBundles);
1082 } else {
1083 Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
1084 Builder.getInt32(C: Index)};
1085 Call = Builder.CreateCall(
1086 Callee: getOrInsertValueProfilingCall(M, TLI: *TLI, CallType: ValueProfilingCallType::MemOp),
1087 Args, OpBundles);
1088 }
1089 if (auto AK = TLI->getExtAttrForI32Param(Signed: false))
1090 Call->addParamAttr(ArgNo: 2, Kind: AK);
1091 Ind->replaceAllUsesWith(V: Call);
1092 Ind->eraseFromParent();
1093}
1094
1095GlobalVariable *InstrLowerer::getOrCreateBiasVar(StringRef VarName) {
1096 GlobalVariable *Bias = M.getGlobalVariable(Name: VarName);
1097 if (Bias)
1098 return Bias;
1099
1100 Type *Int64Ty = Type::getInt64Ty(C&: M.getContext());
1101
1102 // Compiler must define this variable when runtime counter relocation
1103 // is being used. Runtime has a weak external reference that is used
1104 // to check whether that's the case or not.
1105 Bias = new GlobalVariable(M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
1106 Constant::getNullValue(Ty: Int64Ty), VarName);
1107 Bias->setVisibility(GlobalVariable::HiddenVisibility);
1108 // A definition that's weak (linkonce_odr) without being in a COMDAT
1109 // section wouldn't lead to link errors, but it would lead to a dead
1110 // data word from every TU but one. Putting it in COMDAT ensures there
1111 // will be exactly one data slot in the link.
1112 if (TT.supportsCOMDAT())
1113 Bias->setComdat(M.getOrInsertComdat(Name: VarName));
1114
1115 return Bias;
1116}
1117
1118Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) {
1119 auto *Counters = getOrCreateRegionCounters(Inc: I);
1120 IRBuilder<> Builder(I);
1121
1122 if (isa<InstrProfTimestampInst>(Val: I))
1123 Counters->setAlignment(Align(8));
1124
1125 auto *Addr = Builder.CreateConstInBoundsGEP2_32(
1126 Ty: Counters->getValueType(), Ptr: Counters, Idx0: 0, Idx1: I->getIndex()->getZExtValue());
1127
1128 if (!isRuntimeCounterRelocationEnabled())
1129 return Addr;
1130
1131 Type *Int64Ty = Type::getInt64Ty(C&: M.getContext());
1132 Function *Fn = I->getParent()->getParent();
1133 LoadInst *&BiasLI = FunctionToProfileBiasMap[Fn];
1134 if (!BiasLI) {
1135 IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());
1136 auto *Bias = getOrCreateBiasVar(VarName: getInstrProfCounterBiasVarName());
1137 BiasLI = EntryBuilder.CreateLoad(Ty: Int64Ty, Ptr: Bias, Name: "profc_bias");
1138 // Bias doesn't change after startup.
1139 BiasLI->setMetadata(KindID: LLVMContext::MD_invariant_load,
1140 Node: MDNode::get(Context&: M.getContext(), MDs: {}));
1141 }
1142 auto *Add = Builder.CreateAdd(LHS: Builder.CreatePtrToInt(V: Addr, DestTy: Int64Ty), RHS: BiasLI);
1143 return Builder.CreateIntToPtr(V: Add, DestTy: Addr->getType());
1144}
1145
1146Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) {
1147 auto *Bitmaps = getOrCreateRegionBitmaps(Inc: I);
1148 if (!isRuntimeCounterRelocationEnabled())
1149 return Bitmaps;
1150
1151 // Put BiasLI onto the entry block.
1152 Type *Int64Ty = Type::getInt64Ty(C&: M.getContext());
1153 Function *Fn = I->getFunction();
1154 IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());
1155 auto *Bias = getOrCreateBiasVar(VarName: getInstrProfBitmapBiasVarName());
1156 auto *BiasLI = EntryBuilder.CreateLoad(Ty: Int64Ty, Ptr: Bias, Name: "profbm_bias");
1157 // Assume BiasLI invariant (in the function at least)
1158 BiasLI->setMetadata(KindID: LLVMContext::MD_invariant_load,
1159 Node: MDNode::get(Context&: M.getContext(), MDs: {}));
1160
1161 // Add Bias to Bitmaps and put it before the intrinsic.
1162 IRBuilder<> Builder(I);
1163 return Builder.CreatePtrAdd(Ptr: Bitmaps, Offset: BiasLI, Name: "profbm_addr");
1164}
1165
1166void InstrLowerer::lowerCover(InstrProfCoverInst *CoverInstruction) {
1167 auto *Addr = getCounterAddress(I: CoverInstruction);
1168 IRBuilder<> Builder(CoverInstruction);
1169 if (ConditionalCounterUpdate) {
1170 Instruction *SplitBefore = CoverInstruction->getNextNode();
1171 auto &Ctx = CoverInstruction->getParent()->getContext();
1172 auto *Int8Ty = llvm::Type::getInt8Ty(C&: Ctx);
1173 Value *Load = Builder.CreateLoad(Ty: Int8Ty, Ptr: Addr, Name: "pgocount");
1174 Value *Cmp = Builder.CreateIsNotNull(Arg: Load, Name: "pgocount.ifnonzero");
1175 Instruction *ThenBranch =
1176 SplitBlockAndInsertIfThen(Cond: Cmp, SplitBefore, Unreachable: false);
1177 Builder.SetInsertPoint(ThenBranch);
1178 }
1179
1180 // We store zero to represent that this block is covered.
1181 Builder.CreateStore(Val: Builder.getInt8(C: 0), Ptr: Addr);
1182 CoverInstruction->eraseFromParent();
1183}
1184
1185void InstrLowerer::lowerTimestamp(
1186 InstrProfTimestampInst *TimestampInstruction) {
1187 assert(TimestampInstruction->getIndex()->isZeroValue() &&
1188 "timestamp probes are always the first probe for a function");
1189 auto &Ctx = M.getContext();
1190 auto *TimestampAddr = getCounterAddress(I: TimestampInstruction);
1191 IRBuilder<> Builder(TimestampInstruction);
1192 auto *CalleeTy =
1193 FunctionType::get(Result: Type::getVoidTy(C&: Ctx), Params: TimestampAddr->getType(), isVarArg: false);
1194 auto Callee = M.getOrInsertFunction(
1195 INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SET_TIMESTAMP), T: CalleeTy);
1196 Builder.CreateCall(Callee, Args: {TimestampAddr});
1197 TimestampInstruction->eraseFromParent();
1198}
1199
1200void InstrLowerer::lowerIncrement(InstrProfIncrementInst *Inc) {
1201 auto *Addr = getCounterAddress(I: Inc);
1202
1203 IRBuilder<> Builder(Inc);
1204 if (Options.Atomic || AtomicCounterUpdateAll ||
1205 (Inc->getIndex()->isZeroValue() && AtomicFirstCounter)) {
1206 Builder.CreateAtomicRMW(Op: AtomicRMWInst::Add, Ptr: Addr, Val: Inc->getStep(),
1207 Align: MaybeAlign(), Ordering: AtomicOrdering::Monotonic);
1208 } else {
1209 Value *IncStep = Inc->getStep();
1210 Value *Load = Builder.CreateLoad(Ty: IncStep->getType(), Ptr: Addr, Name: "pgocount");
1211 auto *Count = Builder.CreateAdd(LHS: Load, RHS: Inc->getStep());
1212 auto *Store = Builder.CreateStore(Val: Count, Ptr: Addr);
1213 if (isCounterPromotionEnabled())
1214 PromotionCandidates.emplace_back(args: cast<Instruction>(Val: Load), args&: Store);
1215 }
1216 Inc->eraseFromParent();
1217}
1218
1219void InstrLowerer::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
1220 ConstantArray *Names =
1221 cast<ConstantArray>(Val: CoverageNamesVar->getInitializer());
1222 for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
1223 Constant *NC = Names->getOperand(i_nocapture: I);
1224 Value *V = NC->stripPointerCasts();
1225 assert(isa<GlobalVariable>(V) && "Missing reference to function name");
1226 GlobalVariable *Name = cast<GlobalVariable>(Val: V);
1227
1228 Name->setLinkage(GlobalValue::PrivateLinkage);
1229 ReferencedNames.push_back(x: Name);
1230 if (isa<ConstantExpr>(Val: NC))
1231 NC->dropAllReferences();
1232 }
1233 CoverageNamesVar->eraseFromParent();
1234}
1235
1236void InstrLowerer::lowerMCDCTestVectorBitmapUpdate(
1237 InstrProfMCDCTVBitmapUpdate *Update) {
1238 auto &Ctx = M.getContext();
1239 IRBuilder<> Builder(Update);
1240 auto *Int8Ty = Type::getInt8Ty(C&: Ctx);
1241 auto *Int32Ty = Type::getInt32Ty(C&: Ctx);
1242 auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr();
1243 auto *BitmapAddr = getBitmapAddress(I: Update);
1244
1245 // Load Temp Val + BitmapIdx.
1246 // %mcdc.temp = load i32, ptr %mcdc.addr, align 4
1247 auto *Temp = Builder.CreateAdd(
1248 LHS: Builder.CreateLoad(Ty: Int32Ty, Ptr: MCDCCondBitmapAddr, Name: "mcdc.temp"),
1249 RHS: Update->getBitmapIndex());
1250
1251 // Calculate byte offset using div8.
1252 // %1 = lshr i32 %mcdc.temp, 3
1253 auto *BitmapByteOffset = Builder.CreateLShr(LHS: Temp, RHS: 0x3);
1254
1255 // Add byte offset to section base byte address.
1256 // %4 = getelementptr inbounds i8, ptr @__profbm_test, i32 %1
1257 auto *BitmapByteAddr =
1258 Builder.CreateInBoundsPtrAdd(Ptr: BitmapAddr, Offset: BitmapByteOffset);
1259
1260 // Calculate bit offset into bitmap byte by using div8 remainder (AND ~8)
1261 // %5 = and i32 %mcdc.temp, 7
1262 // %6 = trunc i32 %5 to i8
1263 auto *BitToSet = Builder.CreateTrunc(V: Builder.CreateAnd(LHS: Temp, RHS: 0x7), DestTy: Int8Ty);
1264
1265 // Shift bit offset left to form a bitmap.
1266 // %7 = shl i8 1, %6
1267 auto *ShiftedVal = Builder.CreateShl(LHS: Builder.getInt8(C: 0x1), RHS: BitToSet);
1268
1269 // Load profile bitmap byte.
1270 // %mcdc.bits = load i8, ptr %4, align 1
1271 auto *Bitmap = Builder.CreateLoad(Ty: Int8Ty, Ptr: BitmapByteAddr, Name: "mcdc.bits");
1272
1273 if (Options.Atomic || AtomicCounterUpdateAll) {
1274 // If ((Bitmap & Val) != Val), then execute atomic (Bitmap |= Val).
1275 // Note, just-loaded Bitmap might not be up-to-date. Use it just for
1276 // early testing.
1277 auto *Masked = Builder.CreateAnd(LHS: Bitmap, RHS: ShiftedVal);
1278 auto *ShouldStore = Builder.CreateICmpNE(LHS: Masked, RHS: ShiftedVal);
1279
1280 // Assume updating will be rare.
1281 auto *Unlikely = MDBuilder(Ctx).createUnlikelyBranchWeights();
1282 Instruction *ThenBranch =
1283 SplitBlockAndInsertIfThen(Cond: ShouldStore, SplitBefore: Update, Unreachable: false, BranchWeights: Unlikely);
1284
1285 // Execute if (unlikely(ShouldStore)).
1286 Builder.SetInsertPoint(ThenBranch);
1287 Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: BitmapByteAddr, Val: ShiftedVal,
1288 Align: MaybeAlign(), Ordering: AtomicOrdering::Monotonic);
1289 } else {
1290 // Perform logical OR of profile bitmap byte and shifted bit offset.
1291 // %8 = or i8 %mcdc.bits, %7
1292 auto *Result = Builder.CreateOr(LHS: Bitmap, RHS: ShiftedVal);
1293
1294 // Store the updated profile bitmap byte.
1295 // store i8 %8, ptr %3, align 1
1296 Builder.CreateStore(Val: Result, Ptr: BitmapByteAddr);
1297 }
1298
1299 Update->eraseFromParent();
1300}
1301
1302/// Get the name of a profiling variable for a particular function.
1303static std::string getVarName(InstrProfInstBase *Inc, StringRef Prefix,
1304 bool &Renamed) {
1305 StringRef NamePrefix = getInstrProfNameVarPrefix();
1306 StringRef Name = Inc->getName()->getName().substr(Start: NamePrefix.size());
1307 Function *F = Inc->getParent()->getParent();
1308 Module *M = F->getParent();
1309 if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
1310 !canRenameComdatFunc(F: *F)) {
1311 Renamed = false;
1312 return (Prefix + Name).str();
1313 }
1314 Renamed = true;
1315 uint64_t FuncHash = Inc->getHash()->getZExtValue();
1316 SmallVector<char, 24> HashPostfix;
1317 if (Name.ends_with(Suffix: (Twine(".") + Twine(FuncHash)).toStringRef(Out&: HashPostfix)))
1318 return (Prefix + Name).str();
1319 return (Prefix + Name + "." + Twine(FuncHash)).str();
1320}
1321
1322static inline bool shouldRecordFunctionAddr(Function *F) {
1323 // Only record function addresses if IR PGO is enabled or if clang value
1324 // profiling is enabled. Recording function addresses greatly increases object
1325 // file size, because it prevents the inliner from deleting functions that
1326 // have been inlined everywhere.
1327 if (!profDataReferencedByCode(M: *F->getParent()))
1328 return false;
1329
1330 // Check the linkage
1331 bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
1332 if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
1333 !HasAvailableExternallyLinkage)
1334 return true;
1335
1336 // A function marked 'alwaysinline' with available_externally linkage can't
1337 // have its address taken. Doing so would create an undefined external ref to
1338 // the function, which would fail to link.
1339 if (HasAvailableExternallyLinkage &&
1340 F->hasFnAttribute(Kind: Attribute::AlwaysInline))
1341 return false;
1342
1343 // Prohibit function address recording if the function is both internal and
1344 // COMDAT. This avoids the profile data variable referencing internal symbols
1345 // in COMDAT.
1346 if (F->hasLocalLinkage() && F->hasComdat())
1347 return false;
1348
1349 // Check uses of this function for other than direct calls or invokes to it.
1350 // Inline virtual functions have linkeOnceODR linkage. When a key method
1351 // exists, the vtable will only be emitted in the TU where the key method
1352 // is defined. In a TU where vtable is not available, the function won't
1353 // be 'addresstaken'. If its address is not recorded here, the profile data
1354 // with missing address may be picked by the linker leading to missing
1355 // indirect call target info.
1356 return F->hasAddressTaken() || F->hasLinkOnceLinkage();
1357}
1358
1359static inline bool shouldUsePublicSymbol(Function *Fn) {
1360 // It isn't legal to make an alias of this function at all
1361 if (Fn->isDeclarationForLinker())
1362 return true;
1363
1364 // Symbols with local linkage can just use the symbol directly without
1365 // introducing relocations
1366 if (Fn->hasLocalLinkage())
1367 return true;
1368
1369 // PGO + ThinLTO + CFI cause duplicate symbols to be introduced due to some
1370 // unfavorable interaction between the new alias and the alias renaming done
1371 // in LowerTypeTests under ThinLTO. For comdat functions that would normally
1372 // be deduplicated, but the renaming scheme ends up preventing renaming, since
1373 // it creates unique names for each alias, resulting in duplicated symbols. In
1374 // the future, we should update the CFI related passes to migrate these
1375 // aliases to the same module as the jump-table they refer to will be defined.
1376 if (Fn->hasMetadata(KindID: LLVMContext::MD_type))
1377 return true;
1378
1379 // For comdat functions, an alias would need the same linkage as the original
1380 // function and hidden visibility. There is no point in adding an alias with
1381 // identical linkage an visibility to avoid introducing symbolic relocations.
1382 if (Fn->hasComdat() &&
1383 (Fn->getVisibility() == GlobalValue::VisibilityTypes::HiddenVisibility))
1384 return true;
1385
1386 // its OK to use an alias
1387 return false;
1388}
1389
1390static inline Constant *getFuncAddrForProfData(Function *Fn) {
1391 auto *Int8PtrTy = PointerType::getUnqual(C&: Fn->getContext());
1392 // Store a nullptr in __llvm_profd, if we shouldn't use a real address
1393 if (!shouldRecordFunctionAddr(F: Fn))
1394 return ConstantPointerNull::get(T: Int8PtrTy);
1395
1396 // If we can't use an alias, we must use the public symbol, even though this
1397 // may require a symbolic relocation.
1398 if (shouldUsePublicSymbol(Fn))
1399 return Fn;
1400
1401 // When possible use a private alias to avoid symbolic relocations.
1402 auto *GA = GlobalAlias::create(Linkage: GlobalValue::LinkageTypes::PrivateLinkage,
1403 Name: Fn->getName() + ".local", Aliasee: Fn);
1404
1405 // When the instrumented function is a COMDAT function, we cannot use a
1406 // private alias. If we did, we would create reference to a local label in
1407 // this function's section. If this version of the function isn't selected by
1408 // the linker, then the metadata would introduce a reference to a discarded
1409 // section. So, for COMDAT functions, we need to adjust the linkage of the
1410 // alias. Using hidden visibility avoids a dynamic relocation and an entry in
1411 // the dynamic symbol table.
1412 //
1413 // Note that this handles COMDAT functions with visibility other than Hidden,
1414 // since that case is covered in shouldUsePublicSymbol()
1415 if (Fn->hasComdat()) {
1416 GA->setLinkage(Fn->getLinkage());
1417 GA->setVisibility(GlobalValue::VisibilityTypes::HiddenVisibility);
1418 }
1419
1420 // appendToCompilerUsed(*Fn->getParent(), {GA});
1421
1422 return GA;
1423}
1424
1425static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
1426 // compiler-rt uses linker support to get data/counters/name start/end for
1427 // ELF, COFF, Mach-O, XCOFF, and Wasm.
1428 if (TT.isOSBinFormatELF() || TT.isOSBinFormatCOFF() ||
1429 TT.isOSBinFormatMachO() || TT.isOSBinFormatXCOFF() ||
1430 TT.isOSBinFormatWasm())
1431 return false;
1432
1433 return true;
1434}
1435
1436void InstrLowerer::maybeSetComdat(GlobalVariable *GV, GlobalObject *GO,
1437 StringRef CounterGroupName) {
1438 // Place lowered global variables in a comdat group if the associated function
1439 // or global variable is a COMDAT. This will make sure that only one copy of
1440 // global variable (e.g. function counters) of the COMDAT function will be
1441 // emitted after linking.
1442 bool NeedComdat = needsComdatForCounter(GV: *GO, M);
1443 bool UseComdat = (NeedComdat || TT.isOSBinFormatELF());
1444
1445 if (!UseComdat)
1446 return;
1447
1448 // Keep in mind that this pass may run before the inliner, so we need to
1449 // create a new comdat group (for counters, profiling data, etc). If we use
1450 // the comdat of the parent function, that will result in relocations against
1451 // discarded sections.
1452 //
1453 // If the data variable is referenced by code, non-counter variables (notably
1454 // profiling data) and counters have to be in different comdats for COFF
1455 // because the Visual C++ linker will report duplicate symbol errors if there
1456 // are multiple external symbols with the same name marked
1457 // IMAGE_COMDAT_SELECT_ASSOCIATIVE.
1458 StringRef GroupName = TT.isOSBinFormatCOFF() && DataReferencedByCode
1459 ? GV->getName()
1460 : CounterGroupName;
1461 Comdat *C = M.getOrInsertComdat(Name: GroupName);
1462
1463 if (!NeedComdat) {
1464 // Object file format must be ELF since `UseComdat && !NeedComdat` is true.
1465 //
1466 // For ELF, when not using COMDAT, put counters, data and values into a
1467 // nodeduplicate COMDAT which is lowered to a zero-flag section group. This
1468 // allows -z start-stop-gc to discard the entire group when the function is
1469 // discarded.
1470 C->setSelectionKind(Comdat::NoDeduplicate);
1471 }
1472 GV->setComdat(C);
1473 // COFF doesn't allow the comdat group leader to have private linkage, so
1474 // upgrade private linkage to internal linkage to produce a symbol table
1475 // entry.
1476 if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage())
1477 GV->setLinkage(GlobalValue::InternalLinkage);
1478}
1479
1480static inline bool shouldRecordVTableAddr(GlobalVariable *GV) {
1481 if (!profDataReferencedByCode(M: *GV->getParent()))
1482 return false;
1483
1484 if (!GV->hasLinkOnceLinkage() && !GV->hasLocalLinkage() &&
1485 !GV->hasAvailableExternallyLinkage())
1486 return true;
1487
1488 // This avoids the profile data from referencing internal symbols in
1489 // COMDAT.
1490 if (GV->hasLocalLinkage() && GV->hasComdat())
1491 return false;
1492
1493 return true;
1494}
1495
1496// FIXME: Introduce an internal alias like what's done for functions to reduce
1497// the number of relocation entries.
1498static inline Constant *getVTableAddrForProfData(GlobalVariable *GV) {
1499 // Store a nullptr in __profvt_ if a real address shouldn't be used.
1500 if (!shouldRecordVTableAddr(GV))
1501 return ConstantPointerNull::get(T: PointerType::getUnqual(C&: GV->getContext()));
1502
1503 return GV;
1504}
1505
1506void InstrLowerer::getOrCreateVTableProfData(GlobalVariable *GV) {
1507 assert(!DebugInfoCorrelate &&
1508 "Value profiling is not supported with lightweight instrumentation");
1509 if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
1510 return;
1511
1512 // Skip llvm internal global variable or __prof variables.
1513 if (GV->getName().starts_with(Prefix: "llvm.") ||
1514 GV->getName().starts_with(Prefix: "__llvm") ||
1515 GV->getName().starts_with(Prefix: "__prof"))
1516 return;
1517
1518 // VTableProfData already created
1519 auto It = VTableDataMap.find(Val: GV);
1520 if (It != VTableDataMap.end() && It->second)
1521 return;
1522
1523 GlobalValue::LinkageTypes Linkage = GV->getLinkage();
1524 GlobalValue::VisibilityTypes Visibility = GV->getVisibility();
1525
1526 // This is to keep consistent with per-function profile data
1527 // for correctness.
1528 if (TT.isOSBinFormatXCOFF()) {
1529 Linkage = GlobalValue::InternalLinkage;
1530 Visibility = GlobalValue::DefaultVisibility;
1531 }
1532
1533 LLVMContext &Ctx = M.getContext();
1534 Type *DataTypes[] = {
1535#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) LLVMType,
1536#include "llvm/ProfileData/InstrProfData.inc"
1537#undef INSTR_PROF_VTABLE_DATA
1538 };
1539
1540 auto *DataTy = StructType::get(Context&: Ctx, Elements: ArrayRef(DataTypes));
1541
1542 // Used by INSTR_PROF_VTABLE_DATA MACRO
1543 Constant *VTableAddr = getVTableAddrForProfData(GV);
1544 const std::string PGOVTableName = getPGOName(V: *GV);
1545 // Record the length of the vtable. This is needed since vtable pointers
1546 // loaded from C++ objects might be from the middle of a vtable definition.
1547 uint32_t VTableSizeVal =
1548 M.getDataLayout().getTypeAllocSize(Ty: GV->getValueType());
1549
1550 Constant *DataVals[] = {
1551#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) Init,
1552#include "llvm/ProfileData/InstrProfData.inc"
1553#undef INSTR_PROF_VTABLE_DATA
1554 };
1555
1556 auto *Data =
1557 new GlobalVariable(M, DataTy, /*constant=*/false, Linkage,
1558 ConstantStruct::get(T: DataTy, V: DataVals),
1559 getInstrProfVTableVarPrefix() + PGOVTableName);
1560
1561 Data->setVisibility(Visibility);
1562 Data->setSection(getInstrProfSectionName(IPSK: IPSK_vtab, OF: TT.getObjectFormat()));
1563 Data->setAlignment(Align(8));
1564
1565 maybeSetComdat(GV: Data, GO: GV, CounterGroupName: Data->getName());
1566
1567 VTableDataMap[GV] = Data;
1568
1569 ReferencedVTables.push_back(x: GV);
1570
1571 // VTable <Hash, Addr> is used by runtime but not referenced by other
1572 // sections. Conservatively mark it linker retained.
1573 UsedVars.push_back(x: Data);
1574}
1575
1576GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc,
1577 InstrProfSectKind IPSK) {
1578 GlobalVariable *NamePtr = Inc->getName();
1579
1580 // Match the linkage and visibility of the name global.
1581 Function *Fn = Inc->getParent()->getParent();
1582 GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
1583 GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
1584
1585 // Use internal rather than private linkage so the counter variable shows up
1586 // in the symbol table when using debug info for correlation.
1587 if ((DebugInfoCorrelate ||
1588 ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) &&
1589 TT.isOSBinFormatMachO() && Linkage == GlobalValue::PrivateLinkage)
1590 Linkage = GlobalValue::InternalLinkage;
1591
1592 // Due to the limitation of binder as of 2021/09/28, the duplicate weak
1593 // symbols in the same csect won't be discarded. When there are duplicate weak
1594 // symbols, we can NOT guarantee that the relocations get resolved to the
1595 // intended weak symbol, so we can not ensure the correctness of the relative
1596 // CounterPtr, so we have to use private linkage for counter and data symbols.
1597 if (TT.isOSBinFormatXCOFF()) {
1598 Linkage = GlobalValue::PrivateLinkage;
1599 Visibility = GlobalValue::DefaultVisibility;
1600 }
1601 // Move the name variable to the right section.
1602 bool Renamed;
1603 GlobalVariable *Ptr;
1604 StringRef VarPrefix;
1605 std::string VarName;
1606 if (IPSK == IPSK_cnts) {
1607 VarPrefix = getInstrProfCountersVarPrefix();
1608 VarName = getVarName(Inc, Prefix: VarPrefix, Renamed);
1609 InstrProfCntrInstBase *CntrIncrement = dyn_cast<InstrProfCntrInstBase>(Val: Inc);
1610 Ptr = createRegionCounters(Inc: CntrIncrement, Name: VarName, Linkage);
1611 } else if (IPSK == IPSK_bitmap) {
1612 VarPrefix = getInstrProfBitmapVarPrefix();
1613 VarName = getVarName(Inc, Prefix: VarPrefix, Renamed);
1614 InstrProfMCDCBitmapInstBase *BitmapUpdate =
1615 dyn_cast<InstrProfMCDCBitmapInstBase>(Val: Inc);
1616 Ptr = createRegionBitmaps(Inc: BitmapUpdate, Name: VarName, Linkage);
1617 } else {
1618 llvm_unreachable("Profile Section must be for Counters or Bitmaps");
1619 }
1620
1621 Ptr->setVisibility(Visibility);
1622 // Put the counters and bitmaps in their own sections so linkers can
1623 // remove unneeded sections.
1624 Ptr->setSection(getInstrProfSectionName(IPSK, OF: TT.getObjectFormat()));
1625 Ptr->setLinkage(Linkage);
1626 maybeSetComdat(GV: Ptr, GO: Fn, CounterGroupName: VarName);
1627 return Ptr;
1628}
1629
1630GlobalVariable *
1631InstrLowerer::createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
1632 StringRef Name,
1633 GlobalValue::LinkageTypes Linkage) {
1634 uint64_t NumBytes = Inc->getNumBitmapBytes();
1635 auto *BitmapTy = ArrayType::get(ElementType: Type::getInt8Ty(C&: M.getContext()), NumElements: NumBytes);
1636 auto GV = new GlobalVariable(M, BitmapTy, false, Linkage,
1637 Constant::getNullValue(Ty: BitmapTy), Name);
1638 GV->setAlignment(Align(1));
1639 return GV;
1640}
1641
1642GlobalVariable *
1643InstrLowerer::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) {
1644 GlobalVariable *NamePtr = Inc->getName();
1645 auto &PD = ProfileDataMap[NamePtr];
1646 if (PD.RegionBitmaps)
1647 return PD.RegionBitmaps;
1648
1649 // If RegionBitmaps doesn't already exist, create it by first setting up
1650 // the corresponding profile section.
1651 auto *BitmapPtr = setupProfileSection(Inc, IPSK: IPSK_bitmap);
1652 PD.RegionBitmaps = BitmapPtr;
1653 PD.NumBitmapBytes = Inc->getNumBitmapBytes();
1654 return PD.RegionBitmaps;
1655}
1656
1657GlobalVariable *
1658InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name,
1659 GlobalValue::LinkageTypes Linkage) {
1660 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
1661 auto &Ctx = M.getContext();
1662 GlobalVariable *GV;
1663 if (isa<InstrProfCoverInst>(Val: Inc)) {
1664 auto *CounterTy = Type::getInt8Ty(C&: Ctx);
1665 auto *CounterArrTy = ArrayType::get(ElementType: CounterTy, NumElements: NumCounters);
1666 // TODO: `Constant::getAllOnesValue()` does not yet accept an array type.
1667 std::vector<Constant *> InitialValues(NumCounters,
1668 Constant::getAllOnesValue(Ty: CounterTy));
1669 GV = new GlobalVariable(M, CounterArrTy, false, Linkage,
1670 ConstantArray::get(T: CounterArrTy, V: InitialValues),
1671 Name);
1672 GV->setAlignment(Align(1));
1673 } else {
1674 auto *CounterTy = ArrayType::get(ElementType: Type::getInt64Ty(C&: Ctx), NumElements: NumCounters);
1675 GV = new GlobalVariable(M, CounterTy, false, Linkage,
1676 Constant::getNullValue(Ty: CounterTy), Name);
1677 GV->setAlignment(Align(8));
1678 }
1679 return GV;
1680}
1681
1682GlobalVariable *
1683InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) {
1684 GlobalVariable *NamePtr = Inc->getName();
1685 auto &PD = ProfileDataMap[NamePtr];
1686 if (PD.RegionCounters)
1687 return PD.RegionCounters;
1688
1689 // If RegionCounters doesn't already exist, create it by first setting up
1690 // the corresponding profile section.
1691 auto *CounterPtr = setupProfileSection(Inc, IPSK: IPSK_cnts);
1692 PD.RegionCounters = CounterPtr;
1693
1694 if (DebugInfoCorrelate ||
1695 ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) {
1696 LLVMContext &Ctx = M.getContext();
1697 Function *Fn = Inc->getParent()->getParent();
1698 if (auto *SP = Fn->getSubprogram()) {
1699 DIBuilder DB(M, true, SP->getUnit());
1700 Metadata *FunctionNameAnnotation[] = {
1701 MDString::get(Context&: Ctx, Str: InstrProfCorrelator::FunctionNameAttributeName),
1702 MDString::get(Context&: Ctx, Str: getPGOFuncNameVarInitializer(NameVar: NamePtr)),
1703 };
1704 Metadata *CFGHashAnnotation[] = {
1705 MDString::get(Context&: Ctx, Str: InstrProfCorrelator::CFGHashAttributeName),
1706 ConstantAsMetadata::get(C: Inc->getHash()),
1707 };
1708 Metadata *NumCountersAnnotation[] = {
1709 MDString::get(Context&: Ctx, Str: InstrProfCorrelator::NumCountersAttributeName),
1710 ConstantAsMetadata::get(C: Inc->getNumCounters()),
1711 };
1712 auto Annotations = DB.getOrCreateArray(Elements: {
1713 MDNode::get(Context&: Ctx, MDs: FunctionNameAnnotation),
1714 MDNode::get(Context&: Ctx, MDs: CFGHashAnnotation),
1715 MDNode::get(Context&: Ctx, MDs: NumCountersAnnotation),
1716 });
1717 auto *DICounter = DB.createGlobalVariableExpression(
1718 Context: SP, Name: CounterPtr->getName(), /*LinkageName=*/StringRef(), File: SP->getFile(),
1719 /*LineNo=*/0, Ty: DB.createUnspecifiedType(Name: "Profile Data Type"),
1720 IsLocalToUnit: CounterPtr->hasLocalLinkage(), /*IsDefined=*/isDefined: true, /*Expr=*/nullptr,
1721 /*Decl=*/nullptr, /*TemplateParams=*/nullptr, /*AlignInBits=*/0,
1722 Annotations);
1723 CounterPtr->addDebugInfo(GV: DICounter);
1724 DB.finalize();
1725 }
1726
1727 // Mark the counter variable as used so that it isn't optimized out.
1728 CompilerUsedVars.push_back(x: PD.RegionCounters);
1729 }
1730
1731 // Create the data variable (if it doesn't already exist).
1732 createDataVariable(Inc);
1733
1734 return PD.RegionCounters;
1735}
1736
1737void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
1738 // When debug information is correlated to profile data, a data variable
1739 // is not needed.
1740 if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
1741 return;
1742
1743 GlobalVariable *NamePtr = Inc->getName();
1744 auto &PD = ProfileDataMap[NamePtr];
1745
1746 // Return if data variable was already created.
1747 if (PD.DataVar)
1748 return;
1749
1750 LLVMContext &Ctx = M.getContext();
1751
1752 Function *Fn = Inc->getParent()->getParent();
1753 GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
1754 GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
1755
1756 // Due to the limitation of binder as of 2021/09/28, the duplicate weak
1757 // symbols in the same csect won't be discarded. When there are duplicate weak
1758 // symbols, we can NOT guarantee that the relocations get resolved to the
1759 // intended weak symbol, so we can not ensure the correctness of the relative
1760 // CounterPtr, so we have to use private linkage for counter and data symbols.
1761 if (TT.isOSBinFormatXCOFF()) {
1762 Linkage = GlobalValue::PrivateLinkage;
1763 Visibility = GlobalValue::DefaultVisibility;
1764 }
1765
1766 bool NeedComdat = needsComdatForCounter(GV: *Fn, M);
1767 bool Renamed;
1768
1769 // The Data Variable section is anchored to profile counters.
1770 std::string CntsVarName =
1771 getVarName(Inc, Prefix: getInstrProfCountersVarPrefix(), Renamed);
1772 std::string DataVarName =
1773 getVarName(Inc, Prefix: getInstrProfDataVarPrefix(), Renamed);
1774
1775 auto *Int8PtrTy = PointerType::getUnqual(C&: Ctx);
1776 // Allocate statically the array of pointers to value profile nodes for
1777 // the current function.
1778 Constant *ValuesPtrExpr = ConstantPointerNull::get(T: Int8PtrTy);
1779 uint64_t NS = 0;
1780 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1781 NS += PD.NumValueSites[Kind];
1782 if (NS > 0 && ValueProfileStaticAlloc &&
1783 !needsRuntimeRegistrationOfSectionRange(TT)) {
1784 ArrayType *ValuesTy = ArrayType::get(ElementType: Type::getInt64Ty(C&: Ctx), NumElements: NS);
1785 auto *ValuesVar = new GlobalVariable(
1786 M, ValuesTy, false, Linkage, Constant::getNullValue(Ty: ValuesTy),
1787 getVarName(Inc, Prefix: getInstrProfValuesVarPrefix(), Renamed));
1788 ValuesVar->setVisibility(Visibility);
1789 setGlobalVariableLargeSection(TargetTriple: TT, GV&: *ValuesVar);
1790 ValuesVar->setSection(
1791 getInstrProfSectionName(IPSK: IPSK_vals, OF: TT.getObjectFormat()));
1792 ValuesVar->setAlignment(Align(8));
1793 maybeSetComdat(GV: ValuesVar, GO: Fn, CounterGroupName: CntsVarName);
1794 ValuesPtrExpr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1795 C: ValuesVar, Ty: PointerType::get(C&: Fn->getContext(), AddressSpace: 0));
1796 }
1797
1798 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
1799 auto *CounterPtr = PD.RegionCounters;
1800
1801 uint64_t NumBitmapBytes = PD.NumBitmapBytes;
1802
1803 // Create data variable.
1804 auto *IntPtrTy = M.getDataLayout().getIntPtrType(C&: M.getContext());
1805 auto *Int16Ty = Type::getInt16Ty(C&: Ctx);
1806 auto *Int16ArrayTy = ArrayType::get(ElementType: Int16Ty, NumElements: IPVK_Last + 1);
1807 Type *DataTypes[] = {
1808#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
1809#include "llvm/ProfileData/InstrProfData.inc"
1810 };
1811 auto *DataTy = StructType::get(Context&: Ctx, Elements: ArrayRef(DataTypes));
1812
1813 Constant *FunctionAddr = getFuncAddrForProfData(Fn);
1814
1815 Constant *Int16ArrayVals[IPVK_Last + 1];
1816 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1817 Int16ArrayVals[Kind] = ConstantInt::get(Ty: Int16Ty, V: PD.NumValueSites[Kind]);
1818
1819 if (isGPUProfTarget(M)) {
1820 Linkage = GlobalValue::ExternalLinkage;
1821 Visibility = GlobalValue::ProtectedVisibility;
1822 }
1823 // If the data variable is not referenced by code (if we don't emit
1824 // @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
1825 // data variable live under linker GC, the data variable can be private. This
1826 // optimization applies to ELF.
1827 //
1828 // On COFF, a comdat leader cannot be local so we require DataReferencedByCode
1829 // to be false.
1830 //
1831 // If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees
1832 // that other copies must have the same CFG and cannot have value profiling.
1833 // If no hash suffix, other profd copies may be referenced by code.
1834 else if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
1835 (TT.isOSBinFormatELF() ||
1836 (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
1837 Linkage = GlobalValue::PrivateLinkage;
1838 Visibility = GlobalValue::DefaultVisibility;
1839 }
1840 auto *Data =
1841 new GlobalVariable(M, DataTy, false, Linkage, nullptr, DataVarName);
1842 Constant *RelativeCounterPtr;
1843 GlobalVariable *BitmapPtr = PD.RegionBitmaps;
1844 Constant *RelativeBitmapPtr = ConstantInt::get(Ty: IntPtrTy, V: 0);
1845 InstrProfSectKind DataSectionKind;
1846 // With binary profile correlation, profile data is not loaded into memory.
1847 // profile data must reference profile counter with an absolute relocation.
1848 if (ProfileCorrelate == InstrProfCorrelator::BINARY) {
1849 DataSectionKind = IPSK_covdata;
1850 RelativeCounterPtr = ConstantExpr::getPtrToInt(C: CounterPtr, Ty: IntPtrTy);
1851 if (BitmapPtr != nullptr)
1852 RelativeBitmapPtr = ConstantExpr::getPtrToInt(C: BitmapPtr, Ty: IntPtrTy);
1853 } else {
1854 // Reference the counter variable with a label difference (link-time
1855 // constant).
1856 DataSectionKind = IPSK_data;
1857 RelativeCounterPtr =
1858 ConstantExpr::getSub(C1: ConstantExpr::getPtrToInt(C: CounterPtr, Ty: IntPtrTy),
1859 C2: ConstantExpr::getPtrToInt(C: Data, Ty: IntPtrTy));
1860 if (BitmapPtr != nullptr)
1861 RelativeBitmapPtr =
1862 ConstantExpr::getSub(C1: ConstantExpr::getPtrToInt(C: BitmapPtr, Ty: IntPtrTy),
1863 C2: ConstantExpr::getPtrToInt(C: Data, Ty: IntPtrTy));
1864 }
1865
1866 Constant *DataVals[] = {
1867#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
1868#include "llvm/ProfileData/InstrProfData.inc"
1869 };
1870 Data->setInitializer(ConstantStruct::get(T: DataTy, V: DataVals));
1871
1872 Data->setVisibility(Visibility);
1873 Data->setSection(
1874 getInstrProfSectionName(IPSK: DataSectionKind, OF: TT.getObjectFormat()));
1875 Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
1876 maybeSetComdat(GV: Data, GO: Fn, CounterGroupName: CntsVarName);
1877
1878 PD.DataVar = Data;
1879
1880 // Mark the data variable as used so that it isn't stripped out.
1881 CompilerUsedVars.push_back(x: Data);
1882 // Now that the linkage set by the FE has been passed to the data and counter
1883 // variables, reset Name variable's linkage and visibility to private so that
1884 // it can be removed later by the compiler.
1885 NamePtr->setLinkage(GlobalValue::PrivateLinkage);
1886 // Collect the referenced names to be used by emitNameData.
1887 ReferencedNames.push_back(x: NamePtr);
1888}
1889
1890void InstrLowerer::emitVNodes() {
1891 if (!ValueProfileStaticAlloc)
1892 return;
1893
1894 // For now only support this on platforms that do
1895 // not require runtime registration to discover
1896 // named section start/end.
1897 if (needsRuntimeRegistrationOfSectionRange(TT))
1898 return;
1899
1900 size_t TotalNS = 0;
1901 for (auto &PD : ProfileDataMap) {
1902 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1903 TotalNS += PD.second.NumValueSites[Kind];
1904 }
1905
1906 if (!TotalNS)
1907 return;
1908
1909 uint64_t NumCounters = TotalNS * NumCountersPerValueSite;
1910// Heuristic for small programs with very few total value sites.
1911// The default value of vp-counters-per-site is chosen based on
1912// the observation that large apps usually have a low percentage
1913// of value sites that actually have any profile data, and thus
1914// the average number of counters per site is low. For small
1915// apps with very few sites, this may not be true. Bump up the
1916// number of counters in this case.
1917#define INSTR_PROF_MIN_VAL_COUNTS 10
1918 if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)
1919 NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, b: (int)NumCounters * 2);
1920
1921 auto &Ctx = M.getContext();
1922 Type *VNodeTypes[] = {
1923#define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
1924#include "llvm/ProfileData/InstrProfData.inc"
1925 };
1926 auto *VNodeTy = StructType::get(Context&: Ctx, Elements: ArrayRef(VNodeTypes));
1927
1928 ArrayType *VNodesTy = ArrayType::get(ElementType: VNodeTy, NumElements: NumCounters);
1929 auto *VNodesVar = new GlobalVariable(
1930 M, VNodesTy, false, GlobalValue::PrivateLinkage,
1931 Constant::getNullValue(Ty: VNodesTy), getInstrProfVNodesVarName());
1932 setGlobalVariableLargeSection(TargetTriple: TT, GV&: *VNodesVar);
1933 VNodesVar->setSection(
1934 getInstrProfSectionName(IPSK: IPSK_vnodes, OF: TT.getObjectFormat()));
1935 VNodesVar->setAlignment(M.getDataLayout().getABITypeAlign(Ty: VNodesTy));
1936 // VNodesVar is used by runtime but not referenced via relocation by other
1937 // sections. Conservatively make it linker retained.
1938 UsedVars.push_back(x: VNodesVar);
1939}
1940
1941void InstrLowerer::emitNameData() {
1942 if (ReferencedNames.empty())
1943 return;
1944
1945 std::string CompressedNameStr;
1946 if (Error E = collectPGOFuncNameStrings(NameVars: ReferencedNames, Result&: CompressedNameStr,
1947 doCompression: DoInstrProfNameCompression)) {
1948 report_fatal_error(reason: Twine(toString(E: std::move(E))), gen_crash_diag: false);
1949 }
1950
1951 auto &Ctx = M.getContext();
1952 auto *NamesVal =
1953 ConstantDataArray::getString(Context&: Ctx, Initializer: StringRef(CompressedNameStr), AddNull: false);
1954 NamesVar = new GlobalVariable(M, NamesVal->getType(), true,
1955 GlobalValue::PrivateLinkage, NamesVal,
1956 getInstrProfNamesVarName());
1957
1958 NamesSize = CompressedNameStr.size();
1959 setGlobalVariableLargeSection(TargetTriple: TT, GV&: *NamesVar);
1960 NamesVar->setSection(
1961 ProfileCorrelate == InstrProfCorrelator::BINARY
1962 ? getInstrProfSectionName(IPSK: IPSK_covname, OF: TT.getObjectFormat())
1963 : getInstrProfSectionName(IPSK: IPSK_name, OF: TT.getObjectFormat()));
1964 // On COFF, it's important to reduce the alignment down to 1 to prevent the
1965 // linker from inserting padding before the start of the names section or
1966 // between names entries.
1967 NamesVar->setAlignment(Align(1));
1968 // NamesVar is used by runtime but not referenced via relocation by other
1969 // sections. Conservatively make it linker retained.
1970 UsedVars.push_back(x: NamesVar);
1971
1972 for (auto *NamePtr : ReferencedNames)
1973 NamePtr->eraseFromParent();
1974}
1975
1976void InstrLowerer::emitVTableNames() {
1977 if (!EnableVTableValueProfiling || ReferencedVTables.empty())
1978 return;
1979
1980 // Collect the PGO names of referenced vtables and compress them.
1981 std::string CompressedVTableNames;
1982 if (Error E = collectVTableStrings(VTables: ReferencedVTables, Result&: CompressedVTableNames,
1983 doCompression: DoInstrProfNameCompression)) {
1984 report_fatal_error(reason: Twine(toString(E: std::move(E))), gen_crash_diag: false);
1985 }
1986
1987 auto &Ctx = M.getContext();
1988 auto *VTableNamesVal = ConstantDataArray::getString(
1989 Context&: Ctx, Initializer: StringRef(CompressedVTableNames), AddNull: false /* AddNull */);
1990 GlobalVariable *VTableNamesVar =
1991 new GlobalVariable(M, VTableNamesVal->getType(), true /* constant */,
1992 GlobalValue::PrivateLinkage, VTableNamesVal,
1993 getInstrProfVTableNamesVarName());
1994 VTableNamesVar->setSection(
1995 getInstrProfSectionName(IPSK: IPSK_vname, OF: TT.getObjectFormat()));
1996 VTableNamesVar->setAlignment(Align(1));
1997 // Make VTableNames linker retained.
1998 UsedVars.push_back(x: VTableNamesVar);
1999}
2000
2001void InstrLowerer::emitRegistration() {
2002 if (!needsRuntimeRegistrationOfSectionRange(TT))
2003 return;
2004
2005 // Construct the function.
2006 auto *VoidTy = Type::getVoidTy(C&: M.getContext());
2007 auto *VoidPtrTy = PointerType::getUnqual(C&: M.getContext());
2008 auto *Int64Ty = Type::getInt64Ty(C&: M.getContext());
2009 auto *RegisterFTy = FunctionType::get(Result: VoidTy, isVarArg: false);
2010 auto *RegisterF = Function::Create(Ty: RegisterFTy, Linkage: GlobalValue::InternalLinkage,
2011 N: getInstrProfRegFuncsName(), M);
2012 RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
2013 if (Options.NoRedZone)
2014 RegisterF->addFnAttr(Kind: Attribute::NoRedZone);
2015
2016 auto *RuntimeRegisterTy = FunctionType::get(Result: VoidTy, Params: VoidPtrTy, isVarArg: false);
2017 auto *RuntimeRegisterF =
2018 Function::Create(Ty: RuntimeRegisterTy, Linkage: GlobalVariable::ExternalLinkage,
2019 N: getInstrProfRegFuncName(), M);
2020
2021 IRBuilder<> IRB(BasicBlock::Create(Context&: M.getContext(), Name: "", Parent: RegisterF));
2022 for (Value *Data : CompilerUsedVars)
2023 if (!isa<Function>(Val: Data))
2024 // Check for addrspace cast when profiling GPU
2025 IRB.CreateCall(Callee: RuntimeRegisterF,
2026 Args: IRB.CreatePointerBitCastOrAddrSpaceCast(V: Data, DestTy: VoidPtrTy));
2027 for (Value *Data : UsedVars)
2028 if (Data != NamesVar && !isa<Function>(Val: Data))
2029 IRB.CreateCall(Callee: RuntimeRegisterF,
2030 Args: IRB.CreatePointerBitCastOrAddrSpaceCast(V: Data, DestTy: VoidPtrTy));
2031
2032 if (NamesVar) {
2033 Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
2034 auto *NamesRegisterTy =
2035 FunctionType::get(Result: VoidTy, Params: ArrayRef(ParamTypes), isVarArg: false);
2036 auto *NamesRegisterF =
2037 Function::Create(Ty: NamesRegisterTy, Linkage: GlobalVariable::ExternalLinkage,
2038 N: getInstrProfNamesRegFuncName(), M);
2039 IRB.CreateCall(Callee: NamesRegisterF, Args: {IRB.CreatePointerBitCastOrAddrSpaceCast(
2040 V: NamesVar, DestTy: VoidPtrTy),
2041 IRB.getInt64(C: NamesSize)});
2042 }
2043
2044 IRB.CreateRetVoid();
2045}
2046
2047bool InstrLowerer::emitRuntimeHook() {
2048 // We expect the linker to be invoked with -u<hook_var> flag for Linux
2049 // in which case there is no need to emit the external variable.
2050 if (TT.isOSLinux() || TT.isOSAIX())
2051 return false;
2052
2053 // If the module's provided its own runtime, we don't need to do anything.
2054 if (M.getGlobalVariable(Name: getInstrProfRuntimeHookVarName()))
2055 return false;
2056
2057 // Declare an external variable that will pull in the runtime initialization.
2058 auto *Int32Ty = Type::getInt32Ty(C&: M.getContext());
2059 auto *Var =
2060 new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
2061 nullptr, getInstrProfRuntimeHookVarName());
2062 if (isGPUProfTarget(M))
2063 Var->setVisibility(GlobalValue::ProtectedVisibility);
2064 else
2065 Var->setVisibility(GlobalValue::HiddenVisibility);
2066
2067 if (TT.isOSBinFormatELF() && !TT.isPS()) {
2068 // Mark the user variable as used so that it isn't stripped out.
2069 CompilerUsedVars.push_back(x: Var);
2070 } else {
2071 // Make a function that uses it.
2072 auto *User = Function::Create(Ty: FunctionType::get(Result: Int32Ty, isVarArg: false),
2073 Linkage: GlobalValue::LinkOnceODRLinkage,
2074 N: getInstrProfRuntimeHookVarUseFuncName(), M);
2075 User->addFnAttr(Kind: Attribute::NoInline);
2076 if (Options.NoRedZone)
2077 User->addFnAttr(Kind: Attribute::NoRedZone);
2078 User->setVisibility(GlobalValue::HiddenVisibility);
2079 if (TT.supportsCOMDAT())
2080 User->setComdat(M.getOrInsertComdat(Name: User->getName()));
2081
2082 IRBuilder<> IRB(BasicBlock::Create(Context&: M.getContext(), Name: "", Parent: User));
2083 auto *Load = IRB.CreateLoad(Ty: Int32Ty, Ptr: Var);
2084 IRB.CreateRet(V: Load);
2085
2086 // Mark the function as used so that it isn't stripped out.
2087 CompilerUsedVars.push_back(x: User);
2088 }
2089 return true;
2090}
2091
2092void InstrLowerer::emitUses() {
2093 // The metadata sections are parallel arrays. Optimizers (e.g.
2094 // GlobalOpt/ConstantMerge) may not discard associated sections as a unit, so
2095 // we conservatively retain all unconditionally in the compiler.
2096 //
2097 // On ELF and Mach-O, the linker can guarantee the associated sections will be
2098 // retained or discarded as a unit, so llvm.compiler.used is sufficient.
2099 // Similarly on COFF, if prof data is not referenced by code we use one comdat
2100 // and ensure this GC property as well. Otherwise, we have to conservatively
2101 // make all of the sections retained by the linker.
2102 if (TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() ||
2103 (TT.isOSBinFormatCOFF() && !DataReferencedByCode))
2104 appendToCompilerUsed(M, Values: CompilerUsedVars);
2105 else
2106 appendToUsed(M, Values: CompilerUsedVars);
2107
2108 // We do not add proper references from used metadata sections to NamesVar and
2109 // VNodesVar, so we have to be conservative and place them in llvm.used
2110 // regardless of the target,
2111 appendToUsed(M, Values: UsedVars);
2112}
2113
2114void InstrLowerer::emitInitialization() {
2115 // Create ProfileFileName variable. Don't don't this for the
2116 // context-sensitive instrumentation lowering: This lowering is after
2117 // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
2118 // have already create the variable before LTO/ThinLTO linking.
2119 if (!IsCS)
2120 createProfileFileNameVar(M, InstrProfileOutput: Options.InstrProfileOutput);
2121 Function *RegisterF = M.getFunction(Name: getInstrProfRegFuncsName());
2122 if (!RegisterF)
2123 return;
2124
2125 // Create the initialization function.
2126 auto *VoidTy = Type::getVoidTy(C&: M.getContext());
2127 auto *F = Function::Create(Ty: FunctionType::get(Result: VoidTy, isVarArg: false),
2128 Linkage: GlobalValue::InternalLinkage,
2129 N: getInstrProfInitFuncName(), M);
2130 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
2131 F->addFnAttr(Kind: Attribute::NoInline);
2132 if (Options.NoRedZone)
2133 F->addFnAttr(Kind: Attribute::NoRedZone);
2134
2135 // Add the basic block and the necessary calls.
2136 IRBuilder<> IRB(BasicBlock::Create(Context&: M.getContext(), Name: "", Parent: F));
2137 IRB.CreateCall(Callee: RegisterF, Args: {});
2138 IRB.CreateRetVoid();
2139
2140 appendToGlobalCtors(M, F, Priority: 0);
2141}
2142
2143namespace llvm {
2144// Create the variable for profile sampling.
2145void createProfileSamplingVar(Module &M) {
2146 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR));
2147 IntegerType *SamplingVarTy;
2148 Constant *ValueZero;
2149 if (getSampledInstrumentationConfig().UseShort) {
2150 SamplingVarTy = Type::getInt16Ty(C&: M.getContext());
2151 ValueZero = Constant::getIntegerValue(Ty: SamplingVarTy, V: APInt(16, 0));
2152 } else {
2153 SamplingVarTy = Type::getInt32Ty(C&: M.getContext());
2154 ValueZero = Constant::getIntegerValue(Ty: SamplingVarTy, V: APInt(32, 0));
2155 }
2156 auto SamplingVar = new GlobalVariable(
2157 M, SamplingVarTy, false, GlobalValue::WeakAnyLinkage, ValueZero, VarName);
2158 SamplingVar->setVisibility(GlobalValue::DefaultVisibility);
2159 SamplingVar->setThreadLocal(true);
2160 Triple TT(M.getTargetTriple());
2161 if (TT.supportsCOMDAT()) {
2162 SamplingVar->setLinkage(GlobalValue::ExternalLinkage);
2163 SamplingVar->setComdat(M.getOrInsertComdat(Name: VarName));
2164 }
2165 appendToCompilerUsed(M, Values: SamplingVar);
2166}
2167} // namespace llvm
2168