1//===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass lowers instrprof_* intrinsics emitted by an instrumentor.
10// It also builds the data structures and initialization code needed for
11// updating execution counts and emitting the profile at runtime.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/ADT/Twine.h"
21#include "llvm/Analysis/BlockFrequencyInfo.h"
22#include "llvm/Analysis/BranchProbabilityInfo.h"
23#include "llvm/Analysis/CFG.h"
24#include "llvm/Analysis/LoopInfo.h"
25#include "llvm/Analysis/TargetLibraryInfo.h"
26#include "llvm/Frontend/Offloading/Utility.h"
27#include "llvm/IR/Attributes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/CFG.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DIBuilder.h"
33#include "llvm/IR/DerivedTypes.h"
34#include "llvm/IR/DiagnosticInfo.h"
35#include "llvm/IR/Dominators.h"
36#include "llvm/IR/Function.h"
37#include "llvm/IR/GlobalAlias.h"
38#include "llvm/IR/GlobalValue.h"
39#include "llvm/IR/GlobalVariable.h"
40#include "llvm/IR/IRBuilder.h"
41#include "llvm/IR/InstIterator.h"
42#include "llvm/IR/Instruction.h"
43#include "llvm/IR/Instructions.h"
44#include "llvm/IR/IntrinsicInst.h"
45#include "llvm/IR/Intrinsics.h"
46#include "llvm/IR/MDBuilder.h"
47#include "llvm/IR/Module.h"
48#include "llvm/IR/RuntimeLibcalls.h"
49#include "llvm/IR/Type.h"
50#include "llvm/Pass.h"
51#include "llvm/ProfileData/InstrProf.h"
52#include "llvm/ProfileData/InstrProfCorrelator.h"
53#include "llvm/Support/Casting.h"
54#include "llvm/Support/CommandLine.h"
55#include "llvm/Support/Compiler.h"
56#include "llvm/Support/Error.h"
57#include "llvm/Support/ErrorHandling.h"
58#include "llvm/TargetParser/Triple.h"
59#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
60#include "llvm/Transforms/Utils/BasicBlockUtils.h"
61#include "llvm/Transforms/Utils/Instrumentation.h"
62#include "llvm/Transforms/Utils/ModuleUtils.h"
63#include "llvm/Transforms/Utils/SSAUpdater.h"
64#include <algorithm>
65#include <cassert>
66#include <cstdint>
67#include <string>
68
69using namespace llvm;
70
71#define DEBUG_TYPE "instrprof"
72
73namespace llvm {
74// Command line option to enable vtable value profiling. Defined in
75// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
76extern cl::opt<bool> EnableVTableValueProfiling;
77LLVM_ABI cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate(
78 "profile-correlate",
79 cl::desc("Use debug info or binary file to correlate profiles."),
80 cl::init(Val: InstrProfCorrelator::NONE),
81 cl::values(clEnumValN(InstrProfCorrelator::NONE, "",
82 "No profile correlation"),
83 clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info",
84 "Use debug info to correlate"),
85 clEnumValN(InstrProfCorrelator::BINARY, "binary",
86 "Use binary to correlate")));
87} // namespace llvm
88
89namespace {
90
91cl::opt<bool> DoHashBasedCounterSplit(
92 "hash-based-counter-split",
93 cl::desc("Rename counter variable of a comdat function based on cfg hash"),
94 cl::init(Val: true));
95
96cl::opt<bool>
97 RuntimeCounterRelocation("runtime-counter-relocation",
98 cl::desc("Enable relocating counters at runtime."),
99 cl::init(Val: false));
100
101cl::opt<bool> ValueProfileStaticAlloc(
102 "vp-static-alloc",
103 cl::desc("Do static counter allocation for value profiler"),
104 cl::init(Val: true));
105
106cl::opt<double> NumCountersPerValueSite(
107 "vp-counters-per-site",
108 cl::desc("The average number of profile counters allocated "
109 "per value profiling site."),
110 // This is set to a very small value because in real programs, only
111 // a very small percentage of value sites have non-zero targets, e.g, 1/30.
112 // For those sites with non-zero profile, the average number of targets
113 // is usually smaller than 2.
114 cl::init(Val: 1.0));
115
116cl::opt<bool> AtomicCounterUpdateAll(
117 "instrprof-atomic-counter-update-all",
118 cl::desc("Make all profile counter updates atomic (for testing only)"),
119 cl::init(Val: false));
120
121cl::opt<bool> VerifyAtomicPromotion(
122 "verify-atomic-counter-promoted",
123 cl::desc("Check that all profile counter updates were made atomic; no-op "
124 "if atomic updates are not requested (-fprofile-update=atomic)"),
125 cl::init(Val: false));
126
127cl::opt<bool> AtomicCounterUpdatePromoted(
128 "atomic-counter-update-promoted",
129 cl::desc("Do counter update using atomic fetch add "
130 " for promoted counters only"),
131 cl::init(Val: false));
132
133cl::opt<bool> AtomicFirstCounter(
134 "atomic-first-counter",
135 cl::desc("Use atomic fetch add for first counter in a function (usually "
136 "the entry counter)"),
137 cl::init(Val: false));
138
139cl::opt<bool> ConditionalCounterUpdate(
140 "conditional-counter-update",
141 cl::desc("Do conditional counter updates in single byte counters mode)"),
142 cl::init(Val: false));
143
144// If the option is not specified, the default behavior about whether
145// counter promotion is done depends on how instrumentation lowering
146// pipeline is setup, i.e., the default value of true of this option
147// does not mean the promotion will be done by default. Explicitly
148// setting this option can override the default behavior.
149cl::opt<bool> DoCounterPromotion("do-counter-promotion",
150 cl::desc("Do counter register promotion"),
151 cl::init(Val: false));
152cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
153 "max-counter-promotions-per-loop", cl::init(Val: 20),
154 cl::desc("Max number counter promotions per loop to avoid"
155 " increasing register pressure too much"));
156
157// A debug option
158cl::opt<int>
159 MaxNumOfPromotions("max-counter-promotions", cl::init(Val: -1),
160 cl::desc("Max number of allowed counter promotions"));
161
162cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
163 "speculative-counter-promotion-max-exiting", cl::init(Val: 3),
164 cl::desc("The max number of exiting blocks of a loop to allow "
165 " speculative counter promotion"));
166
167cl::opt<bool> SpeculativeCounterPromotionToLoop(
168 "speculative-counter-promotion-to-loop",
169 cl::desc("When the option is false, if the target block is in a loop, "
170 "the promotion will be disallowed unless the promoted counter "
171 " update can be further/iteratively promoted into an acyclic "
172 " region."));
173
174static cl::opt<unsigned> OffloadPGOSampling(
175 "offload-pgo-sampling",
176 cl::desc("Log2 of the sampling period for offload PGO instrumentation. "
177 "Only 1 in every 2^N blocks is instrumented. "
178 "0 = all blocks, 1 = 50%, 2 = 25%, 3 = 12.5% (default). "
179 "Higher values reduce overhead at the cost of sparser profiles."),
180 cl::init(Val: 3));
181
182cl::opt<bool> IterativeCounterPromotion(
183 "iterative-counter-promotion", cl::init(Val: true),
184 cl::desc("Allow counter promotion across the whole loop nest."));
185
186cl::opt<bool> SkipRetExitBlock(
187 "skip-ret-exit-block", cl::init(Val: true),
188 cl::desc("Suppress counter promotion if exit blocks contain ret."));
189
190static cl::opt<bool> SampledInstr("sampled-instrumentation",
191 cl::desc("Do PGO instrumentation sampling"));
192
193static cl::opt<unsigned> SampledInstrPeriod(
194 "sampled-instr-period",
195 cl::desc("Set the profile instrumentation sample period. A sample period "
196 "of 0 is invalid. For each sample period, a fixed number of "
197 "consecutive samples will be recorded. The number is controlled "
198 "by 'sampled-instr-burst-duration' flag. The default sample "
199 "period of 65536 is optimized for generating efficient code that "
200 "leverages unsigned short integer wrapping in overflow, but this "
201 "is disabled under simple sampling (burst duration = 1)."),
202 cl::init(USHRT_MAX + 1));
203
204static cl::opt<unsigned> SampledInstrBurstDuration(
205 "sampled-instr-burst-duration",
206 cl::desc("Set the profile instrumentation burst duration, which can range "
207 "from 1 to the value of 'sampled-instr-period' (0 is invalid). "
208 "This number of samples will be recorded for each "
209 "'sampled-instr-period' count update. Setting to 1 enables simple "
210 "sampling, in which case it is recommended to set "
211 "'sampled-instr-period' to a prime number."),
212 cl::init(Val: 200));
213
214struct SampledInstrumentationConfig {
215 unsigned BurstDuration;
216 unsigned Period;
217 bool UseShort;
218 bool IsSimpleSampling;
219 bool IsFastSampling;
220};
221
222static SampledInstrumentationConfig getSampledInstrumentationConfig() {
223 SampledInstrumentationConfig config;
224 config.BurstDuration = SampledInstrBurstDuration.getValue();
225 config.Period = SampledInstrPeriod.getValue();
226 if (config.BurstDuration > config.Period)
227 report_fatal_error(
228 reason: "SampledBurstDuration must be less than or equal to SampledPeriod");
229 if (config.Period == 0 || config.BurstDuration == 0)
230 report_fatal_error(
231 reason: "SampledPeriod and SampledBurstDuration must be greater than 0");
232 config.IsSimpleSampling = (config.BurstDuration == 1);
233 // If (BurstDuration == 1 && Period == 65536), generate the simple sampling
234 // style code.
235 config.IsFastSampling =
236 (!config.IsSimpleSampling && config.Period == USHRT_MAX + 1);
237 config.UseShort = (config.Period <= USHRT_MAX) || config.IsFastSampling;
238 return config;
239}
240
241using LoadStorePair = std::pair<Instruction *, Instruction *>;
242
243static void makeAtomic(Instruction *Load, Instruction *Store) {
244 auto *Addition = dyn_cast<BinaryOperator>(Val: Store->getOperand(i: 0));
245 assert(Addition && Addition->getOpcode() == Instruction::BinaryOps::Add);
246 auto *Addend = Addition->getOperand(i_nocapture: 1);
247
248 IRBuilder<> Builder(Load);
249 Builder.CreateAtomicRMW(Op: AtomicRMWInst::Add, Ptr: Store->getOperand(i: 1), Val: Addend,
250 Align: MaybeAlign(), Ordering: AtomicOrdering::Monotonic);
251 Store->eraseFromParent();
252 Addition->eraseFromParent();
253 Load->eraseFromParent();
254}
255
256static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) {
257 auto *MD = dyn_cast_or_null<ConstantAsMetadata>(Val: M.getModuleFlag(Key: Flag));
258 if (!MD)
259 return 0;
260
261 // If the flag is a ConstantAsMetadata, it should be an integer representable
262 // in 64-bits.
263 return cast<ConstantInt>(Val: MD->getValue())->getZExtValue();
264}
265
266static bool enablesValueProfiling(const Module &M) {
267 return isIRPGOFlagSet(M: &M) ||
268 getIntModuleFlagOrZero(M, Flag: "EnableValueProfiling") != 0;
269}
270
271// Conservatively returns true if value profiling is enabled.
272static bool profDataReferencedByCode(const Module &M) {
273 return enablesValueProfiling(M);
274}
275
276class InstrLowerer final {
277public:
278 InstrLowerer(Module &M, const InstrProfOptions &Options,
279 std::function<const TargetLibraryInfo &(Function &F)> GetTLI,
280 bool IsCS)
281 : M(M), Options(Options), TT(M.getTargetTriple()), IsCS(IsCS),
282 GetTLI(GetTLI), DataReferencedByCode(profDataReferencedByCode(M)) {}
283
284 bool lower();
285
286private:
287 Module &M;
288 const InstrProfOptions Options;
289 const Triple TT;
290 // Is this lowering for the context-sensitive instrumentation.
291 const bool IsCS;
292
293 std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
294
295 const bool DataReferencedByCode;
296
297 struct PerFunctionProfileData {
298 uint32_t NumValueSites[IPVK_Last + 1] = {};
299 GlobalVariable *RegionCounters = nullptr;
300 GlobalVariable *UniformCounters =
301 nullptr; // Per-block uniform-entry counters
302 GlobalVariable *DataVar = nullptr;
303 GlobalVariable *RegionBitmaps = nullptr;
304 uint32_t NumBitmapBytes = 0;
305
306 PerFunctionProfileData() = default;
307 };
308 DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
309 // Key is virtual table variable, value is 'VTableProfData' in the form of
310 // GlobalVariable.
311 DenseMap<GlobalVariable *, GlobalVariable *> VTableDataMap;
312 /// If runtime relocation is enabled, this maps functions to the load
313 /// instruction that produces the profile relocation bias.
314 DenseMap<const Function *, LoadInst *> FunctionToProfileBiasMap;
315 std::vector<GlobalValue *> CompilerUsedVars;
316 std::vector<GlobalValue *> UsedVars;
317 std::vector<GlobalVariable *> ReferencedNames;
318 // The list of virtual table variables of which the VTableProfData is
319 // collected.
320 std::vector<GlobalVariable *> ReferencedVTables;
321 GlobalVariable *NamesVar = nullptr;
322 size_t NamesSize = 0;
323
324 StructType *ProfileDataTy = nullptr;
325
326 // vector of counter load/store pairs to be register promoted.
327 std::vector<LoadStorePair> PromotionCandidates;
328
329 int64_t TotalCountersPromoted = 0;
330
331 // Per-function cache of invariant values for GPU PGO instrumentation.
332 // Computed once at the function entry and reused across all instrumentation
333 // points to avoid redundant IR and help the optimizer.
334 struct GPUPGOInvariants {
335 Value *Matched = nullptr;
336 bool WaveSizeStored = false;
337 };
338 DenseMap<Function *, GPUPGOInvariants> GPUInvariantsCache;
339
340 /// Emit invariant PGO values at the function entry block and cache them.
341 GPUPGOInvariants &getOrCreateGPUInvariants(Function *F);
342
343 /// Lower instrumentation intrinsics in the function. Returns true if there
344 /// any lowering.
345 bool lowerIntrinsics(Function *F);
346
347 /// Register-promote counter loads and stores in loops.
348 void promoteCounterLoadStores(Function *F);
349
350 /// Returns true if relocating counters at runtime is enabled.
351 bool isRuntimeCounterRelocationEnabled() const;
352
353 /// Returns true if profile counter update register promotion is enabled.
354 bool isCounterPromotionEnabled() const;
355
356 /// Returns true if profile counter updates should be atomic.
357 bool isAtomic() const;
358
359 /// Return true if profile sampling is enabled.
360 bool isSamplingEnabled() const;
361
362 /// Count the number of instrumented value sites for the function.
363 void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins);
364
365 /// Replace instrprof.value.profile with a call to runtime library.
366 void lowerValueProfileInst(InstrProfValueProfileInst *Ins);
367
368 /// Replace instrprof.cover with a store instruction to the coverage byte.
369 void lowerCover(InstrProfCoverInst *Inc);
370
371 /// Replace instrprof.timestamp with a call to
372 /// INSTR_PROF_PROFILE_SET_TIMESTAMP.
373 void lowerTimestamp(InstrProfTimestampInst *TimestampInstruction);
374
375 /// Replace instrprof.increment with an increment of the appropriate value.
376 void lowerIncrement(InstrProfIncrementInst *Inc);
377
378 /// Force emitting of name vars for unused functions.
379 void lowerCoverageData(GlobalVariable *CoverageNamesVar);
380
381 /// Replace instrprof.mcdc.tvbitmask.update with a shift and or instruction
382 /// using the index represented by the a temp value into a bitmap.
383 void lowerMCDCTestVectorBitmapUpdate(InstrProfMCDCTVBitmapUpdate *Ins);
384
385 /// Get the Bias value for data to access mmap-ed area.
386 /// Create it if it hasn't been seen.
387 GlobalVariable *getOrCreateBiasVar(StringRef VarName);
388
389 /// Compute the address of the counter value that this profiling instruction
390 /// acts on.
391 Value *getCounterAddress(InstrProfCntrInstBase *I);
392
393 /// Lower the incremental instructions under profile sampling predicates.
394 void doSampling(Instruction *I);
395
396 /// Get the region counters for an increment, creating them if necessary.
397 ///
398 /// If the counter array doesn't yet exist, the profile data variables
399 /// referring to them will also be created.
400 GlobalVariable *getOrCreateRegionCounters(InstrProfCntrInstBase *Inc);
401
402 /// Get the uniform entry counters for GPU divergence tracking.
403 /// These counters track how often blocks are entered with all lanes active.
404 GlobalVariable *getOrCreateUniformCounters(InstrProfCntrInstBase *Inc);
405
406 /// Create the region counters.
407 GlobalVariable *createRegionCounters(InstrProfCntrInstBase *Inc,
408 StringRef Name,
409 GlobalValue::LinkageTypes Linkage);
410
411 /// Compute the address of the test vector bitmap that this profiling
412 /// instruction acts on.
413 Value *getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I);
414
415 /// Get the region bitmaps for an increment, creating them if necessary.
416 ///
417 /// If the bitmap array doesn't yet exist, the profile data variables
418 /// referring to them will also be created.
419 GlobalVariable *getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc);
420
421 /// Create the MC/DC bitmap as a byte-aligned array of bytes associated with
422 /// an MC/DC Decision region. The number of bytes required is indicated by
423 /// the intrinsic used (type InstrProfMCDCBitmapInstBase). This is called
424 /// as part of setupProfileSection() and is conceptually very similar to
425 /// what is done for profile data counters in createRegionCounters().
426 GlobalVariable *createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
427 StringRef Name,
428 GlobalValue::LinkageTypes Linkage);
429
430 /// Set Comdat property of GV, if required.
431 void maybeSetComdat(GlobalVariable *GV, GlobalObject *GO, StringRef VarName);
432
433 /// Setup the sections into which counters and bitmaps are allocated.
434 GlobalVariable *setupProfileSection(InstrProfInstBase *Inc,
435 InstrProfSectKind IPSK);
436
437 /// Create INSTR_PROF_DATA variable for counters and bitmaps.
438 void createDataVariable(InstrProfCntrInstBase *Inc);
439
440 /// Get the counters for virtual table values, creating them if necessary.
441 void getOrCreateVTableProfData(GlobalVariable *GV);
442
443 /// Emit the section with compressed function names.
444 void emitNameData();
445
446 /// Emit the section with compressed vtable names.
447 void emitVTableNames();
448
449 /// Emit value nodes section for value profiling.
450 void emitVNodes();
451
452 /// Emit runtime registration functions for each profile data variable.
453 void emitRegistration();
454
455 /// Emit the necessary plumbing to pull in the runtime initialization.
456 /// Returns true if a change was made.
457 bool emitRuntimeHook();
458
459 /// Add uses of our data variables and runtime hook.
460 void emitUses();
461
462 /// Create a static initializer for our data, on platforms that need it,
463 /// and for any profile output file that was specified.
464 void emitInitialization();
465
466 /// Return the __llvm_profile_data struct type.
467 StructType *getProfileDataTy();
468};
469
470///
471/// A helper class to promote one counter RMW operation in the loop
472/// into register update.
473///
474/// RWM update for the counter will be sinked out of the loop after
475/// the transformation.
476///
477class PGOCounterPromoterHelper : public LoadAndStorePromoter {
478public:
479 PGOCounterPromoterHelper(
480 Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
481 BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
482 ArrayRef<Instruction *> InsertPts,
483 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
484 LoopInfo &LI, bool IsAtomic)
485 : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
486 InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI),
487 IsAtomic(IsAtomic) {
488 assert(isa<LoadInst>(L));
489 assert(isa<StoreInst>(S));
490 SSA.AddAvailableValue(BB: PH, V: Init);
491 }
492
493 void doExtraRewritesBeforeFinalDeletion() override {
494 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
495 BasicBlock *ExitBlock = ExitBlocks[i];
496 Instruction *InsertPos = InsertPts[i];
497 // Get LiveIn value into the ExitBlock. If there are multiple
498 // predecessors, the value is defined by a PHI node in this
499 // block.
500 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(BB: ExitBlock);
501 Value *Addr = cast<StoreInst>(Val: Store)->getPointerOperand();
502 Type *Ty = LiveInValue->getType();
503 IRBuilder<> Builder(InsertPos);
504 if (auto *AddrInst = dyn_cast_or_null<IntToPtrInst>(Val: Addr)) {
505 // If isRuntimeCounterRelocationEnabled() is true then the address of
506 // the store instruction is computed with two instructions in
507 // InstrProfiling::getCounterAddress(). We need to copy those
508 // instructions to this block to compute Addr correctly.
509 // %BiasAdd = add i64 ptrtoint <__profc_>, <__llvm_profile_counter_bias>
510 // %Addr = inttoptr i64 %BiasAdd to i64*
511 auto *OrigBiasInst = dyn_cast<BinaryOperator>(Val: AddrInst->getOperand(i_nocapture: 0));
512 assert(OrigBiasInst->getOpcode() == Instruction::BinaryOps::Add);
513 Value *BiasInst = Builder.Insert(I: OrigBiasInst->clone());
514 Addr = Builder.CreateIntToPtr(V: BiasInst,
515 DestTy: PointerType::getUnqual(C&: Ty->getContext()));
516 }
517 auto *TargetLoop =
518 IterativeCounterPromotion ? LI.getLoopFor(BB: ExitBlock) : nullptr;
519 // Generate the relaxed atomic RMW if we've asked for it and no more
520 // promotion is possible.
521 if ((IsAtomic && !TargetLoop) || AtomicCounterUpdatePromoted)
522 Builder.CreateAtomicRMW(Op: AtomicRMWInst::Add, Ptr: Addr, Val: LiveInValue,
523 Align: MaybeAlign(), Ordering: AtomicOrdering::Monotonic);
524 else {
525 LoadInst *OldVal = Builder.CreateLoad(Ty, Ptr: Addr, Name: "pgocount.promoted");
526 auto *NewVal = Builder.CreateAdd(LHS: OldVal, RHS: LiveInValue);
527 auto *NewStore = Builder.CreateStore(Val: NewVal, Ptr: Addr);
528
529 // Now update the parent loop's candidate list:
530 if (TargetLoop)
531 LoopToCandidates[TargetLoop].emplace_back(Args&: OldVal, Args&: NewStore);
532 }
533 }
534 }
535
536private:
537 Instruction *Store;
538 ArrayRef<BasicBlock *> ExitBlocks;
539 ArrayRef<Instruction *> InsertPts;
540 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
541 LoopInfo &LI;
542 const bool IsAtomic;
543};
544
545/// A helper class to do register promotion for all profile counter
546/// updates in a loop.
547///
548class PGOCounterPromoter {
549public:
550 PGOCounterPromoter(
551 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
552 Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI, bool IsAtomic)
553 : LoopToCandidates(LoopToCands), L(CurLoop), LI(LI), BFI(BFI),
554 IsAtomic(IsAtomic) {
555
556 // Skip collection of ExitBlocks and InsertPts for loops that will not be
557 // able to have counters promoted.
558 SmallVector<BasicBlock *, 8> LoopExitBlocks;
559 SmallPtrSet<BasicBlock *, 8> BlockSet;
560
561 L.getExitBlocks(ExitBlocks&: LoopExitBlocks);
562 if (!isPromotionPossible(LP: &L, LoopExitBlocks))
563 return;
564
565 for (BasicBlock *ExitBlock : LoopExitBlocks) {
566 if (BlockSet.insert(Ptr: ExitBlock).second &&
567 llvm::none_of(Range: predecessors(BB: ExitBlock), P: [&](const BasicBlock *Pred) {
568 return llvm::isPresplitCoroSuspendExitEdge(Src: *Pred, Dest: *ExitBlock);
569 })) {
570 ExitBlocks.push_back(Elt: ExitBlock);
571 InsertPts.push_back(Elt: &*ExitBlock->getFirstInsertionPt());
572 }
573 }
574 }
575
576 bool run(int64_t *NumPromoted) {
577 bool RC = promoteCandidates(NumPromoted);
578 // In certain case, e.g. with -fprofile-update=atomic, we want to generate
579 // atomic updates of the PGO counters, but also perform promotion of these
580 // updates out of loops to reduce train time. The strategy is:
581 // 1) generate non-atomic load-increment-store sequence of instructions
582 // during lowerIntrinsics phase,
583 // 2) perform the promotion (in promoteCandidates function), then
584 // 3) convert all (promoted and unpromotable) updates to atomicRMW.
585 // This requires that promoted candidates are set to nullptr in the
586 // LoopToCandidates[&L] array by the promoteCandidates() function.
587 if (IsAtomic)
588 for (auto &Cand : LoopToCandidates[&L])
589 if (Cand.first != nullptr && Cand.second != nullptr)
590 makeAtomic(Load: Cand.first, Store: Cand.second);
591 return RC;
592 }
593
594private:
595 bool promoteCandidates(int64_t *NumPromoted) {
596 // Skip 'infinite' loops:
597 if (ExitBlocks.size() == 0)
598 return false;
599
600 // Skip if any of the ExitBlocks contains a ret instruction.
601 // This is to prevent dumping of incomplete profile -- if the
602 // the loop is a long running loop and dump is called in the middle
603 // of the loop, the result profile is incomplete.
604 // FIXME: add other heuristics to detect long running loops.
605 if (SkipRetExitBlock) {
606 for (auto *BB : ExitBlocks)
607 if (isa<ReturnInst>(Val: BB->getTerminator()))
608 return false;
609 }
610
611 unsigned MaxProm = getMaxNumOfPromotionsInLoop(LP: &L);
612 if (MaxProm == 0)
613 return false;
614
615 [[maybe_unused]] auto *Ptr = LoopToCandidates.getPointerIntoBucketsArray();
616 unsigned Promoted = 0;
617 for (auto &Cand : LoopToCandidates[&L]) {
618 SmallVector<PHINode *, 4> NewPHIs;
619 SSAUpdater SSA(&NewPHIs);
620 Value *InitVal = ConstantInt::get(Ty: Cand.first->getType(), V: 0);
621
622 // If BFI is set, we will use it to guide the promotions.
623 if (BFI) {
624 auto *BB = Cand.first->getParent();
625 auto InstrCount = BFI->getBlockProfileCount(BB);
626 if (!InstrCount)
627 continue;
628 auto PreheaderCount = BFI->getBlockProfileCount(BB: L.getLoopPreheader());
629 // If the average loop trip count is not greater than 1.5, we skip
630 // promotion.
631 if (PreheaderCount && (*PreheaderCount * 3) >= (*InstrCount * 2))
632 continue;
633 }
634
635 PGOCounterPromoterHelper Promoter(
636 Cand.first, Cand.second, SSA, InitVal, L.getLoopPreheader(),
637 ExitBlocks, InsertPts, LoopToCandidates, LI, IsAtomic);
638 Promoter.run(Insts: SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
639
640 assert(LoopToCandidates.isPointerIntoBucketsArray(Ptr) &&
641 "References into LoopToCandidates might be invalid");
642 Cand = {nullptr, nullptr};
643
644 Promoted++;
645 if (Promoted >= MaxProm)
646 break;
647
648 (*NumPromoted)++;
649 if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
650 break;
651 }
652
653 LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
654 << L.getLoopDepth() << ")\n");
655 return Promoted != 0;
656 }
657
658private:
659 bool allowSpeculativeCounterPromotion(Loop *LP) {
660 SmallVector<BasicBlock *, 8> ExitingBlocks;
661 L.getExitingBlocks(ExitingBlocks);
662 // Not considierered speculative.
663 if (ExitingBlocks.size() == 1)
664 return true;
665 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
666 return false;
667 return true;
668 }
669
670 // Check whether the loop satisfies the basic conditions needed to perform
671 // Counter Promotions.
672 bool
673 isPromotionPossible(Loop *LP,
674 const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) {
675 // We can't insert into a catchswitch.
676 if (llvm::any_of(Range: LoopExitBlocks, P: [](BasicBlock *Exit) {
677 return isa<CatchSwitchInst>(Val: Exit->getTerminator());
678 }))
679 return false;
680
681 if (!LP->hasDedicatedExits())
682 return false;
683
684 BasicBlock *PH = LP->getLoopPreheader();
685 if (!PH)
686 return false;
687
688 return true;
689 }
690
691 // Returns the max number of Counter Promotions for LP.
692 unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
693 SmallVector<BasicBlock *, 8> LoopExitBlocks;
694 LP->getExitBlocks(ExitBlocks&: LoopExitBlocks);
695 if (!isPromotionPossible(LP, LoopExitBlocks))
696 return 0;
697
698 SmallVector<BasicBlock *, 8> ExitingBlocks;
699 LP->getExitingBlocks(ExitingBlocks);
700
701 // If BFI is set, we do more aggressive promotions based on BFI.
702 if (BFI)
703 return (unsigned)-1;
704
705 // Not considierered speculative.
706 if (ExitingBlocks.size() == 1)
707 return MaxNumOfPromotionsPerLoop;
708
709 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
710 return 0;
711
712 // Whether the target block is in a loop does not matter:
713 if (SpeculativeCounterPromotionToLoop)
714 return MaxNumOfPromotionsPerLoop;
715
716 // Now check the target block:
717 unsigned MaxProm = MaxNumOfPromotionsPerLoop;
718 for (auto *TargetBlock : LoopExitBlocks) {
719 auto *TargetLoop = LI.getLoopFor(BB: TargetBlock);
720 if (!TargetLoop)
721 continue;
722 unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(LP: TargetLoop);
723 unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
724 MaxProm =
725 std::min(a: MaxProm, b: std::max(a: MaxPromForTarget, b: PendingCandsInTarget) -
726 PendingCandsInTarget);
727 }
728 return MaxProm;
729 }
730
731 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
732 SmallVector<BasicBlock *, 8> ExitBlocks;
733 SmallVector<Instruction *, 8> InsertPts;
734 Loop &L;
735 LoopInfo &LI;
736 BlockFrequencyInfo *BFI;
737 const bool IsAtomic; // Whether to convert counter updates to atomics.
738};
739
740enum class ValueProfilingCallType {
741 // Individual values are tracked. Currently used for indiret call target
742 // profiling.
743 Default,
744
745 // MemOp: the memop size value profiling.
746 MemOp
747};
748
749} // end anonymous namespace
750
751PreservedAnalyses InstrProfilingLoweringPass::run(Module &M,
752 ModuleAnalysisManager &AM) {
753 FunctionAnalysisManager &FAM =
754 AM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
755 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
756 return FAM.getResult<TargetLibraryAnalysis>(IR&: F);
757 };
758 InstrLowerer Lowerer(M, Options, GetTLI, IsCS);
759 if (!Lowerer.lower())
760 return PreservedAnalyses::all();
761
762 return PreservedAnalyses::none();
763}
764
765//
766// Perform instrumentation sampling.
767//
768// There are 3 favors of sampling:
769// (1) Full burst sampling: We transform:
770// Increment_Instruction;
771// to:
772// if (__llvm_profile_sampling__ <= SampledInstrBurstDuration - 1) {
773// Increment_Instruction;
774// }
775// __llvm_profile_sampling__ += 1;
776// if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
777// __llvm_profile_sampling__ = 0;
778// }
779//
780// "__llvm_profile_sampling__" is a thread-local global shared by all PGO
781// counters (value-instrumentation and edge instrumentation).
782//
783// (2) Fast burst sampling:
784// "__llvm_profile_sampling__" variable is an unsigned type, meaning it will
785// wrap around to zero when overflows. In this case, the second check is
786// unnecessary, so we won't generate check2 when the SampledInstrPeriod is
787// set to 65536 (64K). The code after:
788// if (__llvm_profile_sampling__ <= SampledInstrBurstDuration - 1) {
789// Increment_Instruction;
790// }
791// __llvm_profile_sampling__ += 1;
792//
793// (3) Simple sampling:
794// When SampledInstrBurstDuration is set to 1, we do a simple sampling:
795// __llvm_profile_sampling__ += 1;
796// if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
797// __llvm_profile_sampling__ = 0;
798// Increment_Instruction;
799// }
800//
801// Note that, the code snippet after the transformation can still be counter
802// promoted. However, with sampling enabled, counter updates are expected to
803// be infrequent, making the benefits of counter promotion negligible.
804// Moreover, counter promotion can potentially cause issues in server
805// applications, particularly when the counters are dumped without a clean
806// exit. To mitigate this risk, counter promotion is disabled by default when
807// sampling is enabled. This behavior can be overridden using the internal
808// option.
809void InstrLowerer::doSampling(Instruction *I) {
810 if (!isSamplingEnabled())
811 return;
812
813 SampledInstrumentationConfig config = getSampledInstrumentationConfig();
814 auto GetConstant = [&config](IRBuilder<> &Builder, uint32_t C) {
815 if (config.UseShort)
816 return Builder.getInt16(C);
817 else
818 return Builder.getInt32(C);
819 };
820
821 IntegerType *SamplingVarTy;
822 if (config.UseShort)
823 SamplingVarTy = Type::getInt16Ty(C&: M.getContext());
824 else
825 SamplingVarTy = Type::getInt32Ty(C&: M.getContext());
826 auto *SamplingVar =
827 M.getGlobalVariable(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR));
828 assert(SamplingVar && "SamplingVar not set properly");
829
830 // Create the condition for checking the burst duration.
831 Instruction *SamplingVarIncr;
832 Value *NewSamplingVarVal;
833 MDBuilder MDB(I->getContext());
834 MDNode *BranchWeight;
835 IRBuilder<> CondBuilder(I);
836 auto *LoadSamplingVar = CondBuilder.CreateLoad(Ty: SamplingVarTy, Ptr: SamplingVar);
837 if (config.IsSimpleSampling) {
838 // For the simple sampling, just create the load and increments.
839 IRBuilder<> IncBuilder(I);
840 NewSamplingVarVal =
841 IncBuilder.CreateAdd(LHS: LoadSamplingVar, RHS: GetConstant(IncBuilder, 1));
842 SamplingVarIncr = IncBuilder.CreateStore(Val: NewSamplingVarVal, Ptr: SamplingVar);
843 } else {
844 // For the burst-sampling, create the conditional update.
845 auto *DurationCond = CondBuilder.CreateICmpULE(
846 LHS: LoadSamplingVar, RHS: GetConstant(CondBuilder, config.BurstDuration - 1));
847 BranchWeight = MDB.createBranchWeights(
848 TrueWeight: config.BurstDuration, FalseWeight: config.Period - config.BurstDuration);
849 Instruction *ThenTerm = SplitBlockAndInsertIfThen(
850 Cond: DurationCond, SplitBefore: I, /* Unreachable */ false, BranchWeights: BranchWeight);
851 IRBuilder<> IncBuilder(I);
852 NewSamplingVarVal =
853 IncBuilder.CreateAdd(LHS: LoadSamplingVar, RHS: GetConstant(IncBuilder, 1));
854 SamplingVarIncr = IncBuilder.CreateStore(Val: NewSamplingVarVal, Ptr: SamplingVar);
855 I->moveBefore(InsertPos: ThenTerm->getIterator());
856 }
857
858 if (config.IsFastSampling)
859 return;
860
861 // Create the condition for checking the period.
862 Instruction *ThenTerm, *ElseTerm;
863 IRBuilder<> PeriodCondBuilder(SamplingVarIncr);
864 auto *PeriodCond = PeriodCondBuilder.CreateICmpUGE(
865 LHS: NewSamplingVarVal, RHS: GetConstant(PeriodCondBuilder, config.Period));
866 BranchWeight = MDB.createBranchWeights(TrueWeight: 1, FalseWeight: config.Period - 1);
867 SplitBlockAndInsertIfThenElse(Cond: PeriodCond, SplitBefore: SamplingVarIncr, ThenTerm: &ThenTerm,
868 ElseTerm: &ElseTerm, BranchWeights: BranchWeight);
869
870 // For the simple sampling, the counter update happens in sampling var reset.
871 if (config.IsSimpleSampling)
872 I->moveBefore(InsertPos: ThenTerm->getIterator());
873
874 IRBuilder<> ResetBuilder(ThenTerm);
875 ResetBuilder.CreateStore(Val: GetConstant(ResetBuilder, 0), Ptr: SamplingVar);
876 SamplingVarIncr->moveBefore(InsertPos: ElseTerm->getIterator());
877}
878
879bool InstrLowerer::lowerIntrinsics(Function *F) {
880 bool MadeChange = false;
881 PromotionCandidates.clear();
882 SmallVector<InstrProfInstBase *, 8> InstrProfInsts;
883
884 // To ensure compatibility with sampling, we save the intrinsics into
885 // a buffer to prevent potential breakage of the iterator (as the
886 // intrinsics will be moved to a different BB).
887 for (BasicBlock &BB : *F) {
888 for (Instruction &Instr : llvm::make_early_inc_range(Range&: BB)) {
889 if (auto *IP = dyn_cast<InstrProfInstBase>(Val: &Instr))
890 InstrProfInsts.push_back(Elt: IP);
891 }
892 }
893
894 for (auto *Instr : InstrProfInsts) {
895 doSampling(I: Instr);
896 if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(Val: Instr)) {
897 lowerIncrement(Inc: IPIS);
898 MadeChange = true;
899 } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(Val: Instr)) {
900 lowerIncrement(Inc: IPI);
901 MadeChange = true;
902 } else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(Val: Instr)) {
903 lowerTimestamp(TimestampInstruction: IPC);
904 MadeChange = true;
905 } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(Val: Instr)) {
906 lowerCover(Inc: IPC);
907 MadeChange = true;
908 } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(Val: Instr)) {
909 lowerValueProfileInst(Ins: IPVP);
910 MadeChange = true;
911 } else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(Val: Instr)) {
912 IPMP->eraseFromParent();
913 MadeChange = true;
914 } else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(Val: Instr)) {
915 lowerMCDCTestVectorBitmapUpdate(Ins: IPBU);
916 MadeChange = true;
917 }
918 }
919
920 if (!MadeChange)
921 return false;
922
923 promoteCounterLoadStores(F);
924 return true;
925}
926
927bool InstrLowerer::isRuntimeCounterRelocationEnabled() const {
928 // Mach-O don't support weak external references.
929 if (TT.isOSBinFormatMachO())
930 return false;
931
932 if (RuntimeCounterRelocation.getNumOccurrences() > 0)
933 return RuntimeCounterRelocation;
934
935 // Fuchsia uses runtime counter relocation by default.
936 return TT.isOSFuchsia();
937}
938
939bool InstrLowerer::isSamplingEnabled() const {
940 if (SampledInstr.getNumOccurrences() > 0)
941 return SampledInstr;
942 return Options.Sampling;
943}
944
945bool InstrLowerer::isCounterPromotionEnabled() const {
946 if (DoCounterPromotion.getNumOccurrences() > 0)
947 return DoCounterPromotion;
948 return Options.DoCounterPromotion;
949}
950
951bool InstrLowerer::isAtomic() const {
952 return Options.Atomic || AtomicCounterUpdateAll;
953}
954
955static void doAtomicCheck(Function *F) {
956 for (const llvm::Instruction &I : llvm::instructions(F)) {
957 const Value *Addr = nullptr;
958 if (const LoadInst *LI = dyn_cast<LoadInst>(Val: &I))
959 Addr = LI->getOperand(i_nocapture: 0);
960 else if (const StoreInst *LI = dyn_cast<StoreInst>(Val: &I))
961 Addr = LI->getOperand(i_nocapture: 1);
962
963 if (Addr && Addr->stripInBoundsOffsets()->getName().starts_with(
964 Prefix: getInstrProfCountersVarPrefix())) {
965 LLVM_DEBUG(dbgs() << "Missed candidate: "; I.dump());
966 report_fatal_error(reason: "Candidate load/store not converted to atomic");
967 }
968 }
969}
970
971void InstrLowerer::promoteCounterLoadStores(Function *F) {
972 if (!isCounterPromotionEnabled())
973 return;
974
975 DominatorTree DT(*F);
976 LoopInfo LI(DT);
977 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
978
979 std::unique_ptr<BlockFrequencyInfo> BFI;
980 if (Options.UseBFIInPromotion) {
981 std::unique_ptr<BranchProbabilityInfo> BPI;
982 BPI.reset(p: new BranchProbabilityInfo(*F, LI, &GetTLI(*F)));
983 BFI.reset(p: new BlockFrequencyInfo(*F, *BPI, LI));
984 }
985
986 for (const auto &LoadStore : PromotionCandidates) {
987 auto *CounterLoad = LoadStore.first;
988 auto *CounterStore = LoadStore.second;
989 BasicBlock *BB = CounterLoad->getParent();
990 Loop *ParentLoop = LI.getLoopFor(BB);
991 if (!ParentLoop) {
992 if (isAtomic())
993 makeAtomic(Load: CounterLoad, Store: CounterStore);
994 continue;
995 }
996 LoopPromotionCandidates[ParentLoop].emplace_back(Args&: CounterLoad, Args&: CounterStore);
997 }
998
999 SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
1000
1001 // Do a post-order traversal of the loops so that counter updates can be
1002 // iteratively hoisted outside the loop nest.
1003 for (auto *Loop : llvm::reverse(C&: Loops)) {
1004 PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get(),
1005 isAtomic());
1006 Promoter.run(NumPromoted: &TotalCountersPromoted);
1007 }
1008
1009 if (isAtomic() && VerifyAtomicPromotion)
1010 doAtomicCheck(F);
1011}
1012
1013static bool needsRuntimeHookUnconditionally(const Triple &TT) {
1014 // On Fuchsia, we only need runtime hook if any counters are present.
1015 if (TT.isOSFuchsia())
1016 return false;
1017
1018 return true;
1019}
1020
1021/// Check if the module contains uses of any profiling intrinsics.
1022static bool containsProfilingIntrinsics(Module &M) {
1023 auto containsIntrinsic = [&](int ID) {
1024 if (auto *F = Intrinsic::getDeclarationIfExists(M: &M, id: ID))
1025 return !F->use_empty();
1026 return false;
1027 };
1028 return containsIntrinsic(Intrinsic::instrprof_cover) ||
1029 containsIntrinsic(Intrinsic::instrprof_increment) ||
1030 containsIntrinsic(Intrinsic::instrprof_increment_step) ||
1031 containsIntrinsic(Intrinsic::instrprof_timestamp) ||
1032 containsIntrinsic(Intrinsic::instrprof_value_profile);
1033}
1034
1035bool InstrLowerer::lower() {
1036 bool MadeChange = false;
1037 bool NeedsRuntimeHook = needsRuntimeHookUnconditionally(TT);
1038 if (NeedsRuntimeHook)
1039 MadeChange = emitRuntimeHook();
1040
1041 if (!IsCS && isSamplingEnabled())
1042 createProfileSamplingVar(M);
1043
1044 bool ContainsProfiling = containsProfilingIntrinsics(M);
1045 GlobalVariable *CoverageNamesVar =
1046 M.getNamedGlobal(Name: getCoverageUnusedNamesVarName());
1047 // Improve compile time by avoiding linear scans when there is no work.
1048 if (!ContainsProfiling && !CoverageNamesVar)
1049 return MadeChange;
1050
1051 // We did not know how many value sites there would be inside
1052 // the instrumented function. This is counting the number of instrumented
1053 // target value sites to enter it as field in the profile data variable.
1054 for (Function &F : M) {
1055 InstrProfCntrInstBase *FirstProfInst = nullptr;
1056 for (BasicBlock &BB : F) {
1057 for (auto I = BB.begin(), E = BB.end(); I != E; I++) {
1058 if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Val&: I))
1059 computeNumValueSiteCounts(Ins: Ind);
1060 else {
1061 if (FirstProfInst == nullptr &&
1062 (isa<InstrProfIncrementInst>(Val: I) || isa<InstrProfCoverInst>(Val: I)))
1063 FirstProfInst = dyn_cast<InstrProfCntrInstBase>(Val&: I);
1064 // If the MCDCBitmapParameters intrinsic seen, create the bitmaps.
1065 if (const auto &Params = dyn_cast<InstrProfMCDCBitmapParameters>(Val&: I))
1066 static_cast<void>(getOrCreateRegionBitmaps(Inc: Params));
1067 }
1068 }
1069 }
1070
1071 // Use a profile intrinsic to create the region counters and data variable.
1072 // Also create the data variable based on the MCDCParams.
1073 if (FirstProfInst != nullptr) {
1074 static_cast<void>(getOrCreateRegionCounters(Inc: FirstProfInst));
1075 }
1076 }
1077
1078 if (EnableVTableValueProfiling)
1079 for (GlobalVariable &GV : M.globals())
1080 // Global variables with type metadata are virtual table variables.
1081 if (GV.hasMetadata(KindID: LLVMContext::MD_type))
1082 getOrCreateVTableProfData(GV: &GV);
1083
1084 for (Function &F : M)
1085 MadeChange |= lowerIntrinsics(F: &F);
1086
1087 if (CoverageNamesVar) {
1088 lowerCoverageData(CoverageNamesVar);
1089 MadeChange = true;
1090 }
1091
1092 if (!MadeChange)
1093 return false;
1094
1095 emitVNodes();
1096 emitNameData();
1097 emitVTableNames();
1098
1099 // Emit runtime hook for the cases where the target does not unconditionally
1100 // require pulling in profile runtime, and coverage is enabled on code that is
1101 // not eliminated by the front-end, e.g. unused functions with internal
1102 // linkage.
1103 if (!NeedsRuntimeHook && ContainsProfiling)
1104 emitRuntimeHook();
1105
1106 emitRegistration();
1107 emitUses();
1108 emitInitialization();
1109 return true;
1110}
1111
1112static FunctionCallee getOrInsertValueProfilingCall(
1113 Module &M, const TargetLibraryInfo &TLI,
1114 ValueProfilingCallType CallType = ValueProfilingCallType::Default) {
1115 LLVMContext &Ctx = M.getContext();
1116 auto *ReturnTy = Type::getVoidTy(C&: M.getContext());
1117
1118 AttributeList AL;
1119 if (auto AK = TLI.getExtAttrForI32Param(Signed: false))
1120 AL = AL.addParamAttribute(C&: M.getContext(), ArgNo: 2, Kind: AK);
1121
1122 assert((CallType == ValueProfilingCallType::Default ||
1123 CallType == ValueProfilingCallType::MemOp) &&
1124 "Must be Default or MemOp");
1125 Type *ParamTypes[] = {
1126#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
1127#include "llvm/ProfileData/InstrProfData.inc"
1128 };
1129 auto *ValueProfilingCallTy =
1130 FunctionType::get(Result: ReturnTy, Params: ArrayRef(ParamTypes), isVarArg: false);
1131 StringRef FuncName = CallType == ValueProfilingCallType::Default
1132 ? getInstrProfValueProfFuncName()
1133 : getInstrProfValueProfMemOpFuncName();
1134 return M.getOrInsertFunction(Name: FuncName, T: ValueProfilingCallTy, AttributeList: AL);
1135}
1136
1137void InstrLowerer::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
1138 GlobalVariable *Name = Ind->getName();
1139 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
1140 uint64_t Index = Ind->getIndex()->getZExtValue();
1141 auto &PD = ProfileDataMap[Name];
1142 PD.NumValueSites[ValueKind] =
1143 std::max(a: PD.NumValueSites[ValueKind], b: (uint32_t)(Index + 1));
1144}
1145
1146void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
1147 // TODO: Value profiling heavily depends on the data section which is omitted
1148 // in lightweight mode. We need to move the value profile pointer to the
1149 // Counter struct to get this working.
1150 assert(
1151 ProfileCorrelate == InstrProfCorrelator::NONE &&
1152 "Value profiling is not yet supported with lightweight instrumentation");
1153 GlobalVariable *Name = Ind->getName();
1154 auto It = ProfileDataMap.find(Val: Name);
1155 assert(It != ProfileDataMap.end() && It->second.DataVar &&
1156 "value profiling detected in function with no counter increment");
1157
1158 GlobalVariable *DataVar = It->second.DataVar;
1159 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
1160 uint64_t Index = Ind->getIndex()->getZExtValue();
1161 for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
1162 Index += It->second.NumValueSites[Kind];
1163
1164 IRBuilder<> Builder(Ind);
1165 bool IsMemOpSize = (Ind->getValueKind()->getZExtValue() ==
1166 llvm::InstrProfValueKind::IPVK_MemOPSize);
1167 CallInst *Call = nullptr;
1168 auto *TLI = &GetTLI(*Ind->getFunction());
1169 auto *NormalizedDataVarPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1170 C: DataVar, Ty: PointerType::get(C&: M.getContext(), AddressSpace: 0));
1171
1172 // To support value profiling calls within Windows exception handlers, funclet
1173 // information contained within operand bundles needs to be copied over to
1174 // the library call. This is required for the IR to be processed by the
1175 // WinEHPrepare pass.
1176 SmallVector<OperandBundleDef, 1> OpBundles;
1177 Ind->getOperandBundlesAsDefs(Defs&: OpBundles);
1178 if (!IsMemOpSize) {
1179 Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
1180 Builder.getInt32(C: Index)};
1181 Call = Builder.CreateCall(Callee: getOrInsertValueProfilingCall(M, TLI: *TLI), Args,
1182 OpBundles);
1183 } else {
1184 Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
1185 Builder.getInt32(C: Index)};
1186 Call = Builder.CreateCall(
1187 Callee: getOrInsertValueProfilingCall(M, TLI: *TLI, CallType: ValueProfilingCallType::MemOp),
1188 Args, OpBundles);
1189 }
1190 if (auto AK = TLI->getExtAttrForI32Param(Signed: false))
1191 Call->addParamAttr(ArgNo: 2, Kind: AK);
1192 Ind->replaceAllUsesWith(V: Call);
1193 Ind->eraseFromParent();
1194}
1195
1196GlobalVariable *InstrLowerer::getOrCreateBiasVar(StringRef VarName) {
1197 GlobalVariable *Bias = M.getGlobalVariable(Name: VarName);
1198 if (Bias)
1199 return Bias;
1200
1201 Type *Int64Ty = Type::getInt64Ty(C&: M.getContext());
1202
1203 // Compiler must define this variable when runtime counter relocation
1204 // is being used. Runtime has a weak external reference that is used
1205 // to check whether that's the case or not.
1206 Bias = new GlobalVariable(M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
1207 Constant::getNullValue(Ty: Int64Ty), VarName);
1208 Bias->setVisibility(GlobalVariable::HiddenVisibility);
1209 // A definition that's weak (linkonce_odr) without being in a COMDAT
1210 // section wouldn't lead to link errors, but it would lead to a dead
1211 // data word from every TU but one. Putting it in COMDAT ensures there
1212 // will be exactly one data slot in the link.
1213 if (TT.supportsCOMDAT())
1214 Bias->setComdat(M.getOrInsertComdat(Name: VarName));
1215
1216 return Bias;
1217}
1218
1219Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) {
1220 auto *Counters = getOrCreateRegionCounters(Inc: I);
1221 IRBuilder<> Builder(I);
1222
1223 if (isa<InstrProfTimestampInst>(Val: I))
1224 Counters->setAlignment(Align(8));
1225
1226 auto *Addr = Builder.CreateConstInBoundsGEP2_32(
1227 Ty: Counters->getValueType(), Ptr: Counters, Idx0: 0, Idx1: I->getIndex()->getZExtValue());
1228
1229 if (!isRuntimeCounterRelocationEnabled())
1230 return Addr;
1231
1232 Type *Int64Ty = Type::getInt64Ty(C&: M.getContext());
1233 Function *Fn = I->getParent()->getParent();
1234 LoadInst *&BiasLI = FunctionToProfileBiasMap[Fn];
1235 if (!BiasLI) {
1236 IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());
1237 auto *Bias = getOrCreateBiasVar(VarName: getInstrProfCounterBiasVarName());
1238 BiasLI = EntryBuilder.CreateLoad(Ty: Int64Ty, Ptr: Bias, Name: "profc_bias");
1239 // Bias doesn't change after startup.
1240 BiasLI->setMetadata(KindID: LLVMContext::MD_invariant_load,
1241 Node: MDNode::get(Context&: M.getContext(), MDs: {}));
1242 }
1243 auto *Add = Builder.CreateAdd(LHS: Builder.CreatePtrToInt(V: Addr, DestTy: Int64Ty), RHS: BiasLI);
1244 return Builder.CreateIntToPtr(V: Add, DestTy: Addr->getType());
1245}
1246
1247Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) {
1248 auto *Bitmaps = getOrCreateRegionBitmaps(Inc: I);
1249 if (!isRuntimeCounterRelocationEnabled())
1250 return Bitmaps;
1251
1252 // Put BiasLI onto the entry block.
1253 Type *Int64Ty = Type::getInt64Ty(C&: M.getContext());
1254 Function *Fn = I->getFunction();
1255 IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());
1256 auto *Bias = getOrCreateBiasVar(VarName: getInstrProfBitmapBiasVarName());
1257 auto *BiasLI = EntryBuilder.CreateLoad(Ty: Int64Ty, Ptr: Bias, Name: "profbm_bias");
1258 // Assume BiasLI invariant (in the function at least)
1259 BiasLI->setMetadata(KindID: LLVMContext::MD_invariant_load,
1260 Node: MDNode::get(Context&: M.getContext(), MDs: {}));
1261
1262 // Add Bias to Bitmaps and put it before the intrinsic.
1263 IRBuilder<> Builder(I);
1264 return Builder.CreatePtrAdd(Ptr: Bitmaps, Offset: BiasLI, Name: "profbm_addr");
1265}
1266
1267void InstrLowerer::lowerCover(InstrProfCoverInst *CoverInstruction) {
1268 auto *Addr = getCounterAddress(I: CoverInstruction);
1269 IRBuilder<> Builder(CoverInstruction);
1270 if (ConditionalCounterUpdate) {
1271 Instruction *SplitBefore = CoverInstruction->getNextNode();
1272 auto &Ctx = CoverInstruction->getParent()->getContext();
1273 auto *Int8Ty = llvm::Type::getInt8Ty(C&: Ctx);
1274 Value *Load = Builder.CreateLoad(Ty: Int8Ty, Ptr: Addr, Name: "pgocount");
1275 Value *Cmp = Builder.CreateIsNotNull(Arg: Load, Name: "pgocount.ifnonzero");
1276 Instruction *ThenBranch =
1277 SplitBlockAndInsertIfThen(Cond: Cmp, SplitBefore, Unreachable: false);
1278 Builder.SetInsertPoint(ThenBranch);
1279 }
1280
1281 // We store zero to represent that this block is covered.
1282 Builder.CreateStore(Val: Builder.getInt8(C: 0), Ptr: Addr);
1283 CoverInstruction->eraseFromParent();
1284}
1285
1286void InstrLowerer::lowerTimestamp(
1287 InstrProfTimestampInst *TimestampInstruction) {
1288 assert(TimestampInstruction->getIndex()->isNullValue() &&
1289 "timestamp probes are always the first probe for a function");
1290 auto &Ctx = M.getContext();
1291 auto *TimestampAddr = getCounterAddress(I: TimestampInstruction);
1292 IRBuilder<> Builder(TimestampInstruction);
1293 auto *CalleeTy =
1294 FunctionType::get(Result: Type::getVoidTy(C&: Ctx), Params: TimestampAddr->getType(), isVarArg: false);
1295 auto Callee = M.getOrInsertFunction(
1296 INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SET_TIMESTAMP), T: CalleeTy);
1297 Builder.CreateCall(Callee, Args: {TimestampAddr});
1298 TimestampInstruction->eraseFromParent();
1299}
1300
1301InstrLowerer::GPUPGOInvariants &
1302InstrLowerer::getOrCreateGPUInvariants(Function *F) {
1303 auto It = GPUInvariantsCache.find(Val: F);
1304 if (It != GPUInvariantsCache.end())
1305 return It->second;
1306
1307 LLVMContext &Context = M.getContext();
1308 auto *Int32Ty = Type::getInt32Ty(C&: Context);
1309
1310 BasicBlock &EntryBB = F->getEntryBlock();
1311 IRBuilder<> Builder(&*EntryBB.getFirstInsertionPt());
1312
1313 Value *Matched = ConstantInt::getTrue(Context);
1314 if (OffloadPGOSampling > 0) {
1315 FunctionCallee IsSampledFn =
1316 M.getOrInsertFunction(Name: RTLIB::RuntimeLibcallsInfo::getLibcallImplName(
1317 CallImpl: RTLIB::impl___llvm_profile_sampling_gpu),
1318 RetTy: Int32Ty, Args: Int32Ty);
1319 Value *SampledInt = Builder.CreateCall(
1320 Callee: IsSampledFn, Args: {ConstantInt::get(Ty: Int32Ty, V: OffloadPGOSampling)},
1321 Name: "pgo.sampled");
1322 Matched = Builder.CreateICmpNE(LHS: SampledInt, RHS: ConstantInt::get(Ty: Int32Ty, V: 0),
1323 Name: "pgo.matched");
1324 }
1325
1326 auto &Inv = GPUInvariantsCache[F];
1327 Inv.Matched = Matched;
1328 return Inv;
1329}
1330
1331void InstrLowerer::lowerIncrement(InstrProfIncrementInst *Inc) {
1332 IRBuilder<> Builder(Inc);
1333 if (isGPUProfTarget(M)) {
1334 Function *F = Inc->getFunction();
1335 auto &Inv = getOrCreateGPUInvariants(F);
1336
1337 LLVMContext &Context = M.getContext();
1338 auto *Int64Ty = Type::getInt64Ty(C&: Context);
1339 auto *PtrTy = PointerType::getUnqual(C&: Context);
1340
1341 auto *Addr = getCounterAddress(I: Inc);
1342
1343 // Store the device wave/warp size into the profile data struct once per
1344 // function. AMDGPU folds llvm.amdgcn.wavefrontsize to the subtarget's
1345 // constant; other GPUs use their fixed warp size.
1346 if (!Inv.WaveSizeStored) {
1347 Inv.WaveSizeStored = true;
1348 GlobalVariable *NamePtr = Inc->getName();
1349 auto &PD = ProfileDataMap[NamePtr];
1350 if (PD.DataVar) {
1351 IRBuilder<> EntryBuilder(&*F->getEntryBlock().getFirstInsertionPt());
1352 Value *WaveSize16 = nullptr;
1353 // Look the intrinsic up by name so this target-agnostic pass does not
1354 // pull in IntrinsicsAMDGPU.h. AMDGPU folds the intrinsic to the
1355 // subtarget's wavefront size; other GPUs fall back to a 32-lane warp.
1356 if (TT.isAMDGPU()) {
1357 Intrinsic::ID WaveSizeID =
1358 Intrinsic::lookupIntrinsicID(Name: "llvm.amdgcn.wavefrontsize");
1359 if (WaveSizeID != Intrinsic::not_intrinsic) {
1360 Function *WaveSizeFn =
1361 Intrinsic::getOrInsertDeclaration(M: &M, id: WaveSizeID);
1362 Value *WaveSize = EntryBuilder.CreateCall(Callee: WaveSizeFn);
1363 WaveSize16 = EntryBuilder.CreateTrunc(
1364 V: WaveSize, DestTy: Type::getInt16Ty(C&: Context), Name: "wavesize.i16");
1365 }
1366 }
1367 if (!WaveSize16)
1368 WaveSize16 = ConstantInt::get(Ty: Type::getInt16Ty(C&: Context), V: 32);
1369 Value *WaveSizeAddr = EntryBuilder.CreateStructGEP(
1370 Ty: PD.DataVar->getValueType(), Ptr: PD.DataVar, Idx: 9, Name: "profd.wavesize");
1371 EntryBuilder.CreateStore(Val: WaveSize16, Ptr: WaveSizeAddr);
1372 }
1373 }
1374
1375 GlobalVariable *UniformCounters = getOrCreateUniformCounters(Inc);
1376 Value *UniformAddrArg = ConstantPointerNull::get(T: PtrTy);
1377 if (UniformCounters) {
1378 Value *UniformIndices[] = {Builder.getInt32(C: 0), Inc->getIndex()};
1379 Value *UniformAddr = Builder.CreateInBoundsGEP(
1380 Ty: UniformCounters->getValueType(), Ptr: UniformCounters, IdxList: UniformIndices,
1381 Name: "unifctr.addr");
1382 UniformAddrArg =
1383 Builder.CreatePointerBitCastOrAddrSpaceCast(V: UniformAddr, DestTy: PtrTy);
1384 }
1385 Value *CastAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(V: Addr, DestTy: PtrTy);
1386 Value *StepI64 =
1387 Builder.CreateZExtOrTrunc(V: Inc->getStep(), DestTy: Int64Ty, Name: "step.i64");
1388
1389 auto *CalleeTy = FunctionType::get(Result: Type::getVoidTy(C&: Context),
1390 Params: {PtrTy, PtrTy, Int64Ty}, isVarArg: false);
1391 FunctionCallee Callee =
1392 M.getOrInsertFunction(Name: RTLIB::RuntimeLibcallsInfo::getLibcallImplName(
1393 CallImpl: RTLIB::impl___llvm_profile_instrument_gpu),
1394 T: CalleeTy);
1395
1396 if (OffloadPGOSampling > 0) {
1397 BasicBlock *CurBB = Builder.GetInsertBlock();
1398 BasicBlock *ContBB =
1399 CurBB->splitBasicBlock(I: BasicBlock::iterator(Inc), BBName: "po_cont");
1400 BasicBlock *ThenBB = BasicBlock::Create(Context, Name: "po_then", Parent: F);
1401
1402 CurBB->getTerminator()->eraseFromParent();
1403 IRBuilder<> HeadBuilder(CurBB);
1404 HeadBuilder.CreateCondBr(Cond: Inv.Matched, True: ThenBB, False: ContBB);
1405
1406 IRBuilder<> ThenBuilder(ThenBB);
1407 ThenBuilder.CreateCall(Callee, Args: {CastAddr, UniformAddrArg, StepI64});
1408 ThenBuilder.CreateBr(Dest: ContBB);
1409 } else {
1410 Builder.CreateCall(Callee, Args: {CastAddr, UniformAddrArg, StepI64});
1411 }
1412 Inc->eraseFromParent();
1413 return;
1414 }
1415
1416 auto *Addr = getCounterAddress(I: Inc);
1417 // If promotion is enabled then delay generating atomic updates until
1418 // after promotion is done.
1419 if ((!isCounterPromotionEnabled() && isAtomic()) ||
1420 (Inc->getIndex()->isNullValue() && AtomicFirstCounter)) {
1421 Builder.CreateAtomicRMW(Op: AtomicRMWInst::Add, Ptr: Addr, Val: Inc->getStep(),
1422 Align: MaybeAlign(), Ordering: AtomicOrdering::Monotonic);
1423 } else {
1424 Value *IncStep = Inc->getStep();
1425 Value *Load = Builder.CreateLoad(Ty: IncStep->getType(), Ptr: Addr, Name: "pgocount");
1426 auto *Count = Builder.CreateAdd(LHS: Load, RHS: Inc->getStep());
1427 auto *Store = Builder.CreateStore(Val: Count, Ptr: Addr);
1428 if (isCounterPromotionEnabled())
1429 PromotionCandidates.emplace_back(args: cast<Instruction>(Val: Load), args&: Store);
1430 }
1431 Inc->eraseFromParent();
1432}
1433
1434void InstrLowerer::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
1435 ConstantArray *Names =
1436 cast<ConstantArray>(Val: CoverageNamesVar->getInitializer());
1437 for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
1438 Constant *NC = Names->getOperand(i_nocapture: I);
1439 Value *V = NC->stripPointerCasts();
1440 assert(isa<GlobalVariable>(V) && "Missing reference to function name");
1441 GlobalVariable *Name = cast<GlobalVariable>(Val: V);
1442
1443 Name->setLinkage(GlobalValue::PrivateLinkage);
1444 ReferencedNames.push_back(x: Name);
1445 if (isa<ConstantExpr>(Val: NC))
1446 NC->dropAllReferences();
1447 }
1448 CoverageNamesVar->eraseFromParent();
1449}
1450
1451void InstrLowerer::lowerMCDCTestVectorBitmapUpdate(
1452 InstrProfMCDCTVBitmapUpdate *Update) {
1453 auto &Ctx = M.getContext();
1454 IRBuilder<> Builder(Update);
1455 auto *Int8Ty = Type::getInt8Ty(C&: Ctx);
1456 auto *Int32Ty = Type::getInt32Ty(C&: Ctx);
1457 auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr();
1458 auto *BitmapAddr = getBitmapAddress(I: Update);
1459
1460 // Load Temp Val + BitmapIdx.
1461 // %mcdc.temp = load i32, ptr %mcdc.addr, align 4
1462 auto *Temp = Builder.CreateAdd(
1463 LHS: Builder.CreateLoad(Ty: Int32Ty, Ptr: MCDCCondBitmapAddr, Name: "mcdc.temp"),
1464 RHS: Update->getBitmapIndex());
1465
1466 // Calculate byte offset using div8.
1467 // %1 = lshr i32 %mcdc.temp, 3
1468 auto *BitmapByteOffset = Builder.CreateLShr(LHS: Temp, RHS: 0x3);
1469
1470 // Add byte offset to section base byte address.
1471 // %4 = getelementptr inbounds i8, ptr @__profbm_test, i32 %1
1472 auto *BitmapByteAddr =
1473 Builder.CreateInBoundsPtrAdd(Ptr: BitmapAddr, Offset: BitmapByteOffset);
1474
1475 // Calculate bit offset into bitmap byte by using div8 remainder (AND ~8)
1476 // %5 = and i32 %mcdc.temp, 7
1477 // %6 = trunc i32 %5 to i8
1478 auto *BitToSet = Builder.CreateTrunc(V: Builder.CreateAnd(LHS: Temp, RHS: 0x7), DestTy: Int8Ty);
1479
1480 // Shift bit offset left to form a bitmap.
1481 // %7 = shl i8 1, %6
1482 auto *ShiftedVal = Builder.CreateShl(LHS: Builder.getInt8(C: 0x1), RHS: BitToSet);
1483
1484 // Load profile bitmap byte.
1485 // %mcdc.bits = load i8, ptr %4, align 1
1486 auto *Bitmap = Builder.CreateLoad(Ty: Int8Ty, Ptr: BitmapByteAddr, Name: "mcdc.bits");
1487
1488 if (isAtomic()) {
1489 // If ((Bitmap & Val) != Val), then execute atomic (Bitmap |= Val).
1490 // Note, just-loaded Bitmap might not be up-to-date. Use it just for
1491 // early testing.
1492 auto *Masked = Builder.CreateAnd(LHS: Bitmap, RHS: ShiftedVal);
1493 auto *ShouldStore = Builder.CreateICmpNE(LHS: Masked, RHS: ShiftedVal);
1494
1495 // Assume updating will be rare.
1496 auto *Unlikely = MDBuilder(Ctx).createUnlikelyBranchWeights();
1497 Instruction *ThenBranch =
1498 SplitBlockAndInsertIfThen(Cond: ShouldStore, SplitBefore: Update, Unreachable: false, BranchWeights: Unlikely);
1499
1500 // Execute if (unlikely(ShouldStore)).
1501 Builder.SetInsertPoint(ThenBranch);
1502 Builder.CreateAtomicRMW(Op: AtomicRMWInst::Or, Ptr: BitmapByteAddr, Val: ShiftedVal,
1503 Align: MaybeAlign(), Ordering: AtomicOrdering::Monotonic);
1504 } else {
1505 // Perform logical OR of profile bitmap byte and shifted bit offset.
1506 // %8 = or i8 %mcdc.bits, %7
1507 auto *Result = Builder.CreateOr(LHS: Bitmap, RHS: ShiftedVal);
1508
1509 // Store the updated profile bitmap byte.
1510 // store i8 %8, ptr %3, align 1
1511 Builder.CreateStore(Val: Result, Ptr: BitmapByteAddr);
1512 }
1513
1514 Update->eraseFromParent();
1515}
1516
1517/// Get the name of a profiling variable for a particular function.
1518static std::string getVarName(InstrProfInstBase *Inc, StringRef Prefix,
1519 bool &Renamed) {
1520 StringRef NamePrefix = getInstrProfNameVarPrefix();
1521 StringRef Name = Inc->getName()->getName().substr(Start: NamePrefix.size());
1522 Function *F = Inc->getParent()->getParent();
1523 Module *M = F->getParent();
1524 if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
1525 !canRenameComdatFunc(F: *F)) {
1526 Renamed = false;
1527 return (Prefix + Name).str();
1528 }
1529 Renamed = true;
1530 uint64_t FuncHash = Inc->getHash()->getZExtValue();
1531 SmallVector<char, 24> HashPostfix;
1532 if (Name.ends_with(Suffix: (Twine(".") + Twine(FuncHash)).toStringRef(Out&: HashPostfix)))
1533 return (Prefix + Name).str();
1534 return (Prefix + Name + "." + Twine(FuncHash)).str();
1535}
1536
1537static inline bool shouldRecordFunctionAddr(Function *F) {
1538 // Only record function addresses if IR PGO is enabled or if clang value
1539 // profiling is enabled. Recording function addresses greatly increases object
1540 // file size, because it prevents the inliner from deleting functions that
1541 // have been inlined everywhere.
1542 if (!profDataReferencedByCode(M: *F->getParent()))
1543 return false;
1544
1545 // Check the linkage
1546 bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
1547 if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
1548 !HasAvailableExternallyLinkage)
1549 return true;
1550
1551 // A function marked 'alwaysinline' with available_externally linkage can't
1552 // have its address taken. Doing so would create an undefined external ref to
1553 // the function, which would fail to link.
1554 if (HasAvailableExternallyLinkage &&
1555 F->hasFnAttribute(Kind: Attribute::AlwaysInline))
1556 return false;
1557
1558 // Prohibit function address recording if the function is both internal and
1559 // COMDAT. This avoids the profile data variable referencing internal symbols
1560 // in COMDAT.
1561 if (F->hasLocalLinkage() && F->hasComdat())
1562 return false;
1563
1564 // Check uses of this function for other than direct calls or invokes to it.
1565 // Inline virtual functions have linkeOnceODR linkage. When a key method
1566 // exists, the vtable will only be emitted in the TU where the key method
1567 // is defined. In a TU where vtable is not available, the function won't
1568 // be 'addresstaken'. If its address is not recorded here, the profile data
1569 // with missing address may be picked by the linker leading to missing
1570 // indirect call target info.
1571 return F->hasAddressTaken() || F->hasLinkOnceLinkage();
1572}
1573
1574static inline bool shouldUsePublicSymbol(Function *Fn) {
1575 // It isn't legal to make an alias of this function at all
1576 if (Fn->isDeclarationForLinker())
1577 return true;
1578
1579 // Symbols with local linkage can just use the symbol directly without
1580 // introducing relocations
1581 if (Fn->hasLocalLinkage())
1582 return true;
1583
1584 // PGO + ThinLTO + CFI cause duplicate symbols to be introduced due to some
1585 // unfavorable interaction between the new alias and the alias renaming done
1586 // in LowerTypeTests under ThinLTO. For comdat functions that would normally
1587 // be deduplicated, but the renaming scheme ends up preventing renaming, since
1588 // it creates unique names for each alias, resulting in duplicated symbols. In
1589 // the future, we should update the CFI related passes to migrate these
1590 // aliases to the same module as the jump-table they refer to will be defined.
1591 if (Fn->hasMetadata(KindID: LLVMContext::MD_type))
1592 return true;
1593
1594 // For comdat functions, an alias would need the same linkage as the original
1595 // function and hidden visibility. There is no point in adding an alias with
1596 // identical linkage an visibility to avoid introducing symbolic relocations.
1597 if (Fn->hasComdat() &&
1598 (Fn->getVisibility() == GlobalValue::VisibilityTypes::HiddenVisibility))
1599 return true;
1600
1601 // its OK to use an alias
1602 return false;
1603}
1604
1605static inline Constant *getFuncAddrForProfData(Function *Fn) {
1606 auto *Int8PtrTy = PointerType::getUnqual(C&: Fn->getContext());
1607 // Store a nullptr in __llvm_profd, if we shouldn't use a real address
1608 if (!shouldRecordFunctionAddr(F: Fn))
1609 return ConstantPointerNull::get(T: Int8PtrTy);
1610
1611 // If we can't use an alias, we must use the public symbol, even though this
1612 // may require a symbolic relocation.
1613 if (shouldUsePublicSymbol(Fn))
1614 return Fn;
1615
1616 // For GPU targets, weak functions cannot use private aliases because
1617 // LTO may pick a different TU's copy, leaving the alias undefined
1618 if (isGPUProfTarget(M: *Fn->getParent()) &&
1619 GlobalValue::isWeakForLinker(Linkage: Fn->getLinkage()))
1620 return Fn;
1621
1622 // When possible use a private alias to avoid symbolic relocations.
1623 auto *GA = GlobalAlias::create(Linkage: GlobalValue::LinkageTypes::PrivateLinkage,
1624 Name: Fn->getName() + ".local", Aliasee: Fn);
1625
1626 // When the instrumented function is a COMDAT function, we cannot use a
1627 // private alias. If we did, we would create reference to a local label in
1628 // this function's section. If this version of the function isn't selected by
1629 // the linker, then the metadata would introduce a reference to a discarded
1630 // section. So, for COMDAT functions, we need to adjust the linkage of the
1631 // alias. Using hidden visibility avoids a dynamic relocation and an entry in
1632 // the dynamic symbol table.
1633 //
1634 // Note that this handles COMDAT functions with visibility other than Hidden,
1635 // since that case is covered in shouldUsePublicSymbol()
1636 if (Fn->hasComdat()) {
1637 GA->setLinkage(Fn->getLinkage());
1638 GA->setVisibility(GlobalValue::VisibilityTypes::HiddenVisibility);
1639 }
1640
1641 // appendToCompilerUsed(*Fn->getParent(), {GA});
1642
1643 return GA;
1644}
1645
1646static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
1647 // NVPTX is an ELF target but PTX does not expose sections or linker symbols.
1648 if (TT.isNVPTX())
1649 return true;
1650
1651 // compiler-rt uses linker support to get data/counters/name start/end for
1652 // ELF, COFF, Mach-O, XCOFF, and Wasm.
1653 if (TT.isOSBinFormatELF() || TT.isOSBinFormatCOFF() ||
1654 TT.isOSBinFormatMachO() || TT.isOSBinFormatXCOFF() ||
1655 TT.isOSBinFormatWasm())
1656 return false;
1657
1658 return true;
1659}
1660
1661void InstrLowerer::maybeSetComdat(GlobalVariable *GV, GlobalObject *GO,
1662 StringRef CounterGroupName) {
1663 // Place lowered global variables in a comdat group if the associated function
1664 // or global variable is a COMDAT. This will make sure that only one copy of
1665 // global variable (e.g. function counters) of the COMDAT function will be
1666 // emitted after linking.
1667 bool NeedComdat = needsComdatForCounter(GV: *GO, M);
1668 bool UseComdat = (NeedComdat || TT.isOSBinFormatELF());
1669
1670 if (!UseComdat)
1671 return;
1672
1673 // Keep in mind that this pass may run before the inliner, so we need to
1674 // create a new comdat group (for counters, profiling data, etc). If we use
1675 // the comdat of the parent function, that will result in relocations against
1676 // discarded sections.
1677 //
1678 // If the data variable is referenced by code, non-counter variables (notably
1679 // profiling data) and counters have to be in different comdats for COFF
1680 // because the Visual C++ linker will report duplicate symbol errors if there
1681 // are multiple external symbols with the same name marked
1682 // IMAGE_COMDAT_SELECT_ASSOCIATIVE.
1683 StringRef GroupName = TT.isOSBinFormatCOFF() && DataReferencedByCode
1684 ? GV->getName()
1685 : CounterGroupName;
1686 Comdat *C = M.getOrInsertComdat(Name: GroupName);
1687
1688 if (!NeedComdat) {
1689 // Object file format must be ELF since `UseComdat && !NeedComdat` is true.
1690 //
1691 // For ELF, when not using COMDAT, put counters, data and values into a
1692 // nodeduplicate COMDAT which is lowered to a zero-flag section group. This
1693 // allows -z start-stop-gc to discard the entire group when the function is
1694 // discarded.
1695 C->setSelectionKind(Comdat::NoDeduplicate);
1696 }
1697 GV->setComdat(C);
1698 // COFF doesn't allow the comdat group leader to have private linkage, so
1699 // upgrade private linkage to internal linkage to produce a symbol table
1700 // entry.
1701 if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage())
1702 GV->setLinkage(GlobalValue::InternalLinkage);
1703}
1704
1705static inline bool shouldRecordVTableAddr(GlobalVariable *GV) {
1706 if (!profDataReferencedByCode(M: *GV->getParent()))
1707 return false;
1708
1709 if (!GV->hasLinkOnceLinkage() && !GV->hasLocalLinkage() &&
1710 !GV->hasAvailableExternallyLinkage())
1711 return true;
1712
1713 // This avoids the profile data from referencing internal symbols in
1714 // COMDAT.
1715 if (GV->hasLocalLinkage() && GV->hasComdat())
1716 return false;
1717
1718 return true;
1719}
1720
1721// FIXME: Introduce an internal alias like what's done for functions to reduce
1722// the number of relocation entries.
1723static inline Constant *getVTableAddrForProfData(GlobalVariable *GV) {
1724 // Store a nullptr in __profvt_ if a real address shouldn't be used.
1725 if (!shouldRecordVTableAddr(GV))
1726 return ConstantPointerNull::get(T: PointerType::getUnqual(C&: GV->getContext()));
1727
1728 return GV;
1729}
1730
1731void InstrLowerer::getOrCreateVTableProfData(GlobalVariable *GV) {
1732 assert(ProfileCorrelate != InstrProfCorrelator::DEBUG_INFO &&
1733 "Value profiling is not supported with lightweight instrumentation");
1734 if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
1735 return;
1736
1737 // Skip llvm internal global variable or __prof variables.
1738 if (GV->getName().starts_with(Prefix: "llvm.") ||
1739 GV->getName().starts_with(Prefix: "__llvm") ||
1740 GV->getName().starts_with(Prefix: "__prof"))
1741 return;
1742
1743 // VTableProfData already created
1744 auto It = VTableDataMap.find(Val: GV);
1745 if (It != VTableDataMap.end() && It->second)
1746 return;
1747
1748 GlobalValue::LinkageTypes Linkage = GV->getLinkage();
1749 GlobalValue::VisibilityTypes Visibility = GV->getVisibility();
1750
1751 // This is to keep consistent with per-function profile data
1752 // for correctness.
1753 if (TT.isOSBinFormatXCOFF()) {
1754 Linkage = GlobalValue::InternalLinkage;
1755 Visibility = GlobalValue::DefaultVisibility;
1756 }
1757
1758 LLVMContext &Ctx = M.getContext();
1759 Type *DataTypes[] = {
1760#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) LLVMType,
1761#include "llvm/ProfileData/InstrProfData.inc"
1762#undef INSTR_PROF_VTABLE_DATA
1763 };
1764
1765 auto *DataTy = StructType::get(Context&: Ctx, Elements: ArrayRef(DataTypes));
1766
1767 // Used by INSTR_PROF_VTABLE_DATA MACRO
1768 Constant *VTableAddr = getVTableAddrForProfData(GV);
1769 const std::string PGOVTableName = getPGOName(V: *GV);
1770 // Record the length of the vtable. This is needed since vtable pointers
1771 // loaded from C++ objects might be from the middle of a vtable definition.
1772 uint32_t VTableSizeVal = GV->getGlobalSize(DL: M.getDataLayout());
1773
1774 Constant *DataVals[] = {
1775#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) Init,
1776#include "llvm/ProfileData/InstrProfData.inc"
1777#undef INSTR_PROF_VTABLE_DATA
1778 };
1779
1780 auto *Data =
1781 new GlobalVariable(M, DataTy, /*constant=*/false, Linkage,
1782 ConstantStruct::get(T: DataTy, V: DataVals),
1783 getInstrProfVTableVarPrefix() + PGOVTableName);
1784
1785 Data->setVisibility(Visibility);
1786 Data->setSection(getInstrProfSectionName(IPSK: IPSK_vtab, OF: TT.getObjectFormat()));
1787 Data->setAlignment(Align(8));
1788
1789 maybeSetComdat(GV: Data, GO: GV, CounterGroupName: Data->getName());
1790
1791 VTableDataMap[GV] = Data;
1792
1793 ReferencedVTables.push_back(x: GV);
1794
1795 // VTable <Hash, Addr> is used by runtime but not referenced by other
1796 // sections. Conservatively mark it linker retained.
1797 UsedVars.push_back(x: Data);
1798}
1799
1800GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc,
1801 InstrProfSectKind IPSK) {
1802 GlobalVariable *NamePtr = Inc->getName();
1803
1804 // Match the linkage and visibility of the name global.
1805 Function *Fn = Inc->getParent()->getParent();
1806 GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
1807 GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
1808
1809 // Use internal rather than private linkage so the counter variable shows up
1810 // in the symbol table when using debug info for correlation.
1811 if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO &&
1812 TT.isOSBinFormatMachO() && Linkage == GlobalValue::PrivateLinkage)
1813 Linkage = GlobalValue::InternalLinkage;
1814
1815 // Due to the limitation of binder as of 2021/09/28, the duplicate weak
1816 // symbols in the same csect won't be discarded. When there are duplicate weak
1817 // symbols, we can NOT guarantee that the relocations get resolved to the
1818 // intended weak symbol, so we can not ensure the correctness of the relative
1819 // CounterPtr, so we have to use private linkage for counter and data symbols.
1820 if (TT.isOSBinFormatXCOFF()) {
1821 Linkage = GlobalValue::PrivateLinkage;
1822 Visibility = GlobalValue::DefaultVisibility;
1823 }
1824 // Move the name variable to the right section.
1825 bool Renamed;
1826 GlobalVariable *Ptr;
1827 StringRef VarPrefix;
1828 std::string VarName;
1829 if (IPSK == IPSK_cnts) {
1830 VarPrefix = getInstrProfCountersVarPrefix();
1831 VarName = getVarName(Inc, Prefix: VarPrefix, Renamed);
1832 InstrProfCntrInstBase *CntrIncrement = dyn_cast<InstrProfCntrInstBase>(Val: Inc);
1833 Ptr = createRegionCounters(Inc: CntrIncrement, Name: VarName, Linkage);
1834 } else if (IPSK == IPSK_bitmap) {
1835 VarPrefix = getInstrProfBitmapVarPrefix();
1836 VarName = getVarName(Inc, Prefix: VarPrefix, Renamed);
1837 InstrProfMCDCBitmapInstBase *BitmapUpdate =
1838 dyn_cast<InstrProfMCDCBitmapInstBase>(Val: Inc);
1839 Ptr = createRegionBitmaps(Inc: BitmapUpdate, Name: VarName, Linkage);
1840 } else {
1841 llvm_unreachable("Profile Section must be for Counters or Bitmaps");
1842 }
1843
1844 Ptr->setVisibility(Visibility);
1845 Ptr->setSection(getInstrProfSectionName(IPSK, OF: TT.getObjectFormat()));
1846 Ptr->setLinkage(Linkage);
1847 if (isGPUProfTarget(M) && !Ptr->hasComdat()) {
1848 Ptr->setComdat(M.getOrInsertComdat(Name: VarName));
1849 Ptr->setLinkage(GlobalValue::LinkOnceODRLinkage);
1850 Ptr->setVisibility(GlobalValue::ProtectedVisibility);
1851 } else {
1852 maybeSetComdat(GV: Ptr, GO: Fn, CounterGroupName: VarName);
1853 }
1854 return Ptr;
1855}
1856
1857GlobalVariable *
1858InstrLowerer::createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
1859 StringRef Name,
1860 GlobalValue::LinkageTypes Linkage) {
1861 uint64_t NumBytes = Inc->getNumBitmapBytes();
1862 auto *BitmapTy = ArrayType::get(ElementType: Type::getInt8Ty(C&: M.getContext()), NumElements: NumBytes);
1863 auto GV = new GlobalVariable(M, BitmapTy, false, Linkage,
1864 Constant::getNullValue(Ty: BitmapTy), Name);
1865 GV->setAlignment(Align(1));
1866 return GV;
1867}
1868
1869GlobalVariable *
1870InstrLowerer::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) {
1871 GlobalVariable *NamePtr = Inc->getName();
1872 auto &PD = ProfileDataMap[NamePtr];
1873 if (PD.RegionBitmaps)
1874 return PD.RegionBitmaps;
1875
1876 // If RegionBitmaps doesn't already exist, create it by first setting up
1877 // the corresponding profile section.
1878 auto *BitmapPtr = setupProfileSection(Inc, IPSK: IPSK_bitmap);
1879 PD.RegionBitmaps = BitmapPtr;
1880 PD.NumBitmapBytes = Inc->getNumBitmapBytes();
1881 return PD.RegionBitmaps;
1882}
1883
1884GlobalVariable *
1885InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name,
1886 GlobalValue::LinkageTypes Linkage) {
1887 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
1888 auto &Ctx = M.getContext();
1889 GlobalVariable *GV;
1890 if (isa<InstrProfCoverInst>(Val: Inc)) {
1891 auto *CounterTy = Type::getInt8Ty(C&: Ctx);
1892 auto *CounterArrTy = ArrayType::get(ElementType: CounterTy, NumElements: NumCounters);
1893 // TODO: `Constant::getAllOnesValue()` does not yet accept an array type.
1894 std::vector<Constant *> InitialValues(NumCounters,
1895 Constant::getAllOnesValue(Ty: CounterTy));
1896 GV = new GlobalVariable(M, CounterArrTy, false, Linkage,
1897 ConstantArray::get(T: CounterArrTy, V: InitialValues),
1898 Name);
1899 GV->setAlignment(Align(1));
1900 } else {
1901 auto *CounterTy = ArrayType::get(ElementType: Type::getInt64Ty(C&: Ctx), NumElements: NumCounters);
1902 GV = new GlobalVariable(M, CounterTy, false, Linkage,
1903 Constant::getNullValue(Ty: CounterTy), Name);
1904 GV->setAlignment(Align(8));
1905 }
1906 return GV;
1907}
1908
1909GlobalVariable *
1910InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) {
1911 GlobalVariable *NamePtr = Inc->getName();
1912 auto &PD = ProfileDataMap[NamePtr];
1913 if (PD.RegionCounters)
1914 return PD.RegionCounters;
1915
1916 // If RegionCounters doesn't already exist, create it by first setting up
1917 // the corresponding profile section.
1918 auto *CounterPtr = setupProfileSection(Inc, IPSK: IPSK_cnts);
1919 PD.RegionCounters = CounterPtr;
1920
1921 if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) {
1922 LLVMContext &Ctx = M.getContext();
1923 Function *Fn = Inc->getParent()->getParent();
1924 if (auto *SP = Fn->getSubprogram()) {
1925 DIBuilder DB(M, true, SP->getUnit());
1926 Metadata *FunctionNameAnnotation[] = {
1927 MDString::get(Context&: Ctx, Str: InstrProfCorrelator::FunctionNameAttributeName),
1928 MDString::get(Context&: Ctx, Str: getPGOFuncNameVarInitializer(NameVar: NamePtr)),
1929 };
1930 Metadata *CFGHashAnnotation[] = {
1931 MDString::get(Context&: Ctx, Str: InstrProfCorrelator::CFGHashAttributeName),
1932 ConstantAsMetadata::get(C: Inc->getHash()),
1933 };
1934 Metadata *NumCountersAnnotation[] = {
1935 MDString::get(Context&: Ctx, Str: InstrProfCorrelator::NumCountersAttributeName),
1936 ConstantAsMetadata::get(C: Inc->getNumCounters()),
1937 };
1938 auto Annotations = DB.getOrCreateArray(Elements: {
1939 MDNode::get(Context&: Ctx, MDs: FunctionNameAnnotation),
1940 MDNode::get(Context&: Ctx, MDs: CFGHashAnnotation),
1941 MDNode::get(Context&: Ctx, MDs: NumCountersAnnotation),
1942 });
1943 auto *DICounter = DB.createGlobalVariableExpression(
1944 Context: SP, Name: CounterPtr->getName(), /*LinkageName=*/StringRef(), File: SP->getFile(),
1945 /*LineNo=*/0, Ty: DB.createUnspecifiedType(Name: "Profile Data Type"),
1946 IsLocalToUnit: CounterPtr->hasLocalLinkage(), /*IsDefined=*/isDefined: true, /*Expr=*/nullptr,
1947 /*Decl=*/nullptr, /*TemplateParams=*/nullptr, /*AlignInBits=*/0,
1948 Annotations);
1949 CounterPtr->addDebugInfo(GV: DICounter);
1950 DB.finalize();
1951 }
1952
1953 // Mark the counter variable as used so that it isn't optimized out.
1954 CompilerUsedVars.push_back(x: PD.RegionCounters);
1955 }
1956
1957 // Create uniform counters before the data variable so that
1958 // UniformCounterPtr can reference them in createDataVariable().
1959 getOrCreateUniformCounters(Inc);
1960
1961 // Create the data variable (if it doesn't already exist).
1962 createDataVariable(Inc);
1963
1964 return PD.RegionCounters;
1965}
1966
1967GlobalVariable *
1968InstrLowerer::getOrCreateUniformCounters(InstrProfCntrInstBase *Inc) {
1969 // Uniform counters are only meaningful for GPU profile targets.
1970 if (!isGPUProfTarget(M))
1971 return nullptr;
1972
1973 GlobalVariable *NamePtr = Inc->getName();
1974 auto &PD = ProfileDataMap[NamePtr];
1975 if (PD.UniformCounters)
1976 return PD.UniformCounters;
1977
1978 assert(PD.RegionCounters && "region counters must be created first");
1979
1980 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
1981
1982 LLVMContext &Ctx = M.getContext();
1983 ArrayType *CounterTy = ArrayType::get(ElementType: Type::getInt64Ty(C&: Ctx), NumElements: NumCounters);
1984
1985 bool Renamed;
1986 std::string VarName = getVarName(Inc, Prefix: "__llvm_prf_unifcnt_", Renamed);
1987
1988 auto *GV = new GlobalVariable(M, CounterTy, false, NamePtr->getLinkage(),
1989 Constant::getNullValue(Ty: CounterTy), VarName);
1990 GV->setAlignment(Align(8));
1991
1992 GV->setSection(getInstrProfSectionName(IPSK: IPSK_ucnts, OF: TT.getObjectFormat()));
1993
1994 GV->setComdat(M.getOrInsertComdat(Name: VarName));
1995 GV->setLinkage(GlobalValue::LinkOnceODRLinkage);
1996 GV->setVisibility(GlobalValue::ProtectedVisibility);
1997
1998 PD.UniformCounters = GV;
1999 CompilerUsedVars.push_back(x: GV);
2000
2001 return PD.UniformCounters;
2002}
2003
2004void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
2005 // When debug information is correlated to profile data, a data variable
2006 // is not needed.
2007 if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
2008 return;
2009
2010 GlobalVariable *NamePtr = Inc->getName();
2011 auto &PD = ProfileDataMap[NamePtr];
2012
2013 // Return if data variable was already created.
2014 if (PD.DataVar)
2015 return;
2016
2017 LLVMContext &Ctx = M.getContext();
2018
2019 Function *Fn = Inc->getParent()->getParent();
2020 GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
2021 GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
2022
2023 // Due to the limitation of binder as of 2021/09/28, the duplicate weak
2024 // symbols in the same csect won't be discarded. When there are duplicate weak
2025 // symbols, we can NOT guarantee that the relocations get resolved to the
2026 // intended weak symbol, so we can not ensure the correctness of the relative
2027 // CounterPtr, so we have to use private linkage for counter and data symbols.
2028 if (TT.isOSBinFormatXCOFF()) {
2029 Linkage = GlobalValue::PrivateLinkage;
2030 Visibility = GlobalValue::DefaultVisibility;
2031 }
2032
2033 bool NeedComdat = needsComdatForCounter(GV: *Fn, M);
2034 bool Renamed;
2035
2036 // The Data Variable section is anchored to profile counters.
2037 std::string CntsVarName =
2038 getVarName(Inc, Prefix: getInstrProfCountersVarPrefix(), Renamed);
2039 std::string DataVarName =
2040 getVarName(Inc, Prefix: getInstrProfDataVarPrefix(), Renamed);
2041
2042 auto *Int8PtrTy = PointerType::getUnqual(C&: Ctx);
2043 // Allocate statically the array of pointers to value profile nodes for
2044 // the current function.
2045 Constant *ValuesPtrExpr = ConstantPointerNull::get(T: Int8PtrTy);
2046 uint64_t NS = 0;
2047 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
2048 NS += PD.NumValueSites[Kind];
2049 if (NS > 0 && ValueProfileStaticAlloc &&
2050 !needsRuntimeRegistrationOfSectionRange(TT)) {
2051 ArrayType *ValuesTy = ArrayType::get(ElementType: Type::getInt64Ty(C&: Ctx), NumElements: NS);
2052 auto *ValuesVar = new GlobalVariable(
2053 M, ValuesTy, false, Linkage, Constant::getNullValue(Ty: ValuesTy),
2054 getVarName(Inc, Prefix: getInstrProfValuesVarPrefix(), Renamed));
2055 ValuesVar->setVisibility(Visibility);
2056 setGlobalVariableLargeSection(TargetTriple: TT, GV&: *ValuesVar);
2057 ValuesVar->setSection(
2058 getInstrProfSectionName(IPSK: IPSK_vals, OF: TT.getObjectFormat()));
2059 ValuesVar->setAlignment(Align(8));
2060 maybeSetComdat(GV: ValuesVar, GO: Fn, CounterGroupName: CntsVarName);
2061 ValuesPtrExpr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
2062 C: ValuesVar, Ty: PointerType::get(C&: Fn->getContext(), AddressSpace: 0));
2063 }
2064
2065 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
2066
2067 Constant *CounterPtr = PD.RegionCounters;
2068 Constant *UniformCounterPtr = PD.UniformCounters;
2069
2070 uint64_t NumBitmapBytes = PD.NumBitmapBytes;
2071
2072 // Create data variable.
2073 auto *IntPtrTy = M.getDataLayout().getIntPtrType(C&: M.getContext());
2074 auto *Int16Ty = Type::getInt16Ty(C&: Ctx);
2075 auto *Int16ArrayTy = ArrayType::get(ElementType: Int16Ty, NumElements: IPVK_Last + 1);
2076 auto *DataTy = getProfileDataTy();
2077
2078 Constant *FunctionAddr = getFuncAddrForProfData(Fn);
2079
2080 Constant *Int16ArrayVals[IPVK_Last + 1];
2081 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
2082 Int16ArrayVals[Kind] = ConstantInt::get(Ty: Int16Ty, V: PD.NumValueSites[Kind]);
2083
2084 uint16_t OffloadDeviceWaveSizeVal = 0;
2085
2086 if (isGPUProfTarget(M)) {
2087 // For GPU targets, weak functions need weak linkage for their profile data
2088 // aliases to allow linker deduplication across TUs
2089 if (GlobalValue::isWeakForLinker(Linkage: Fn->getLinkage()))
2090 Linkage = Fn->getLinkage();
2091 else
2092 Linkage = GlobalValue::ExternalLinkage;
2093 Visibility = GlobalValue::ProtectedVisibility;
2094 }
2095 // If the data variable is not referenced by code (if we don't emit
2096 // @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
2097 // data variable live under linker GC, the data variable can be private. This
2098 // optimization applies to ELF.
2099 //
2100 // On COFF, a comdat leader cannot be local so we require DataReferencedByCode
2101 // to be false.
2102 //
2103 // If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees
2104 // that other copies must have the same CFG and cannot have value profiling.
2105 // If no hash suffix, other profd copies may be referenced by code.
2106 if (!isGPUProfTarget(M) && NS == 0 &&
2107 !(DataReferencedByCode && NeedComdat && !Renamed) &&
2108 (TT.isOSBinFormatELF() ||
2109 (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
2110 Linkage = GlobalValue::PrivateLinkage;
2111 Visibility = GlobalValue::DefaultVisibility;
2112 }
2113 // GPU-target ELF objects are always ET_DYN, so non-local symbols with
2114 // default visibility are preemptible. The CounterPtr label difference
2115 // emits a REL32 relocation that lld rejects against preemptible targets.
2116 if (TT.isGPU() && TT.isOSBinFormatELF() &&
2117 !GlobalValue::isLocalLinkage(Linkage))
2118 Visibility = GlobalValue::ProtectedVisibility;
2119 auto *Data =
2120 new GlobalVariable(M, DataTy, false, Linkage, nullptr, DataVarName);
2121
2122 Constant *RelativeCounterPtr;
2123 Constant *RelativeUniformCounterPtr = ConstantInt::get(Ty: IntPtrTy, V: 0);
2124 GlobalVariable *BitmapPtr = PD.RegionBitmaps;
2125 Constant *RelativeBitmapPtr = ConstantInt::get(Ty: IntPtrTy, V: 0);
2126 InstrProfSectKind DataSectionKind;
2127 // With binary profile correlation, profile data is not loaded into memory.
2128 // profile data must reference profile counter with an absolute relocation.
2129 if (ProfileCorrelate == InstrProfCorrelator::BINARY) {
2130 DataSectionKind = IPSK_covdata;
2131 RelativeCounterPtr = ConstantExpr::getPtrToInt(C: CounterPtr, Ty: IntPtrTy);
2132 if (BitmapPtr != nullptr)
2133 RelativeBitmapPtr = ConstantExpr::getPtrToInt(C: BitmapPtr, Ty: IntPtrTy);
2134 if (UniformCounterPtr != nullptr)
2135 RelativeUniformCounterPtr =
2136 ConstantExpr::getPtrToInt(C: UniformCounterPtr, Ty: IntPtrTy);
2137 } else if (TT.isNVPTX()) {
2138 // The NVPTX target cannot handle self-referencing constant expressions in
2139 // global initializers at all. Use absolute pointers and have the runtime
2140 // registration convert them to relative offsets.
2141 DataSectionKind = IPSK_data;
2142 RelativeCounterPtr = ConstantExpr::getPtrToInt(C: CounterPtr, Ty: IntPtrTy);
2143 } else {
2144 // Reference the counter variable with a label difference (link-time
2145 // constant).
2146 DataSectionKind = IPSK_data;
2147 RelativeCounterPtr =
2148 ConstantExpr::getSub(C1: ConstantExpr::getPtrToInt(C: CounterPtr, Ty: IntPtrTy),
2149 C2: ConstantExpr::getPtrToInt(C: Data, Ty: IntPtrTy));
2150 if (BitmapPtr != nullptr)
2151 RelativeBitmapPtr =
2152 ConstantExpr::getSub(C1: ConstantExpr::getPtrToInt(C: BitmapPtr, Ty: IntPtrTy),
2153 C2: ConstantExpr::getPtrToInt(C: Data, Ty: IntPtrTy));
2154 if (UniformCounterPtr != nullptr)
2155 RelativeUniformCounterPtr = ConstantExpr::getSub(
2156 C1: ConstantExpr::getPtrToInt(C: UniformCounterPtr, Ty: IntPtrTy),
2157 C2: ConstantExpr::getPtrToInt(C: Data, Ty: IntPtrTy));
2158 }
2159
2160 Constant *DataVals[] = {
2161#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
2162#include "llvm/ProfileData/InstrProfData.inc"
2163 };
2164 Data->setInitializer(ConstantStruct::get(T: DataTy, V: DataVals));
2165
2166 Data->setVisibility(Visibility);
2167 Data->setSection(
2168 getInstrProfSectionName(IPSK: DataSectionKind, OF: TT.getObjectFormat()));
2169 Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
2170 if (isGPUProfTarget(M) && !Data->hasComdat()) {
2171 Data->setComdat(M.getOrInsertComdat(Name: CntsVarName));
2172 Data->setLinkage(GlobalValue::LinkOnceODRLinkage);
2173 } else {
2174 maybeSetComdat(GV: Data, GO: Fn, CounterGroupName: CntsVarName);
2175 }
2176
2177 PD.DataVar = Data;
2178
2179 // Mark the data variable as used so that it isn't stripped out.
2180 CompilerUsedVars.push_back(x: Data);
2181 // Now that the linkage set by the FE has been passed to the data and counter
2182 // variables, reset Name variable's linkage and visibility to private so that
2183 // it can be removed later by the compiler.
2184 NamePtr->setLinkage(GlobalValue::PrivateLinkage);
2185 // Collect the referenced names to be used by emitNameData.
2186 ReferencedNames.push_back(x: NamePtr);
2187}
2188
2189void InstrLowerer::emitVNodes() {
2190 if (!ValueProfileStaticAlloc)
2191 return;
2192
2193 // For now only support this on platforms that do
2194 // not require runtime registration to discover
2195 // named section start/end.
2196 if (needsRuntimeRegistrationOfSectionRange(TT))
2197 return;
2198
2199 size_t TotalNS = 0;
2200 for (auto &PD : ProfileDataMap) {
2201 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
2202 TotalNS += PD.second.NumValueSites[Kind];
2203 }
2204
2205 if (!TotalNS)
2206 return;
2207
2208 uint64_t NumCounters = TotalNS * NumCountersPerValueSite;
2209// Heuristic for small programs with very few total value sites.
2210// The default value of vp-counters-per-site is chosen based on
2211// the observation that large apps usually have a low percentage
2212// of value sites that actually have any profile data, and thus
2213// the average number of counters per site is low. For small
2214// apps with very few sites, this may not be true. Bump up the
2215// number of counters in this case.
2216#define INSTR_PROF_MIN_VAL_COUNTS 10
2217 if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)
2218 NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, b: (int)NumCounters * 2);
2219
2220 auto &Ctx = M.getContext();
2221 Type *VNodeTypes[] = {
2222#define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
2223#include "llvm/ProfileData/InstrProfData.inc"
2224 };
2225 auto *VNodeTy = StructType::get(Context&: Ctx, Elements: ArrayRef(VNodeTypes));
2226
2227 ArrayType *VNodesTy = ArrayType::get(ElementType: VNodeTy, NumElements: NumCounters);
2228 auto *VNodesVar = new GlobalVariable(
2229 M, VNodesTy, false, GlobalValue::PrivateLinkage,
2230 Constant::getNullValue(Ty: VNodesTy), getInstrProfVNodesVarName());
2231 setGlobalVariableLargeSection(TargetTriple: TT, GV&: *VNodesVar);
2232 VNodesVar->setSection(
2233 getInstrProfSectionName(IPSK: IPSK_vnodes, OF: TT.getObjectFormat()));
2234 VNodesVar->setAlignment(M.getDataLayout().getABITypeAlign(Ty: VNodesTy));
2235 // VNodesVar is used by runtime but not referenced via relocation by other
2236 // sections. Conservatively make it linker retained.
2237 UsedVars.push_back(x: VNodesVar);
2238}
2239
2240// Build the per-TU device-PGO sections struct: section start/stop bounds for
2241// names/counters/data/uniform-counters plus the raw version. Returns null if it
2242// already exists.
2243static GlobalVariable *emitGPUOffloadSectionsStruct(Module &M,
2244 StringRef CUIDPostfix) {
2245 std::string Name = ("__llvm_profile_sections" + CUIDPostfix).str();
2246 if (M.getNamedValue(Name))
2247 return nullptr;
2248
2249 LLVMContext &Ctx = M.getContext();
2250 unsigned AS = M.getDataLayout().getDefaultGlobalsAddressSpace();
2251 auto Extern = [&](StringRef Sym, Type *Ty, bool IsConst,
2252 GlobalValue::VisibilityTypes Vis) {
2253 GlobalVariable *GV = M.getNamedGlobal(Name: Sym);
2254 if (!GV) {
2255 GV = new GlobalVariable(M, Ty, IsConst, GlobalValue::ExternalLinkage,
2256 nullptr, Sym, nullptr,
2257 GlobalValue::NotThreadLocal, AS);
2258 GV->setVisibility(Vis);
2259 }
2260 return GV;
2261 };
2262 // Section bounds are hidden i8 markers; raw_version is an i64 constant.
2263 auto *I8 = Type::getInt8Ty(C&: Ctx);
2264 auto Hidden = GlobalValue::HiddenVisibility;
2265 Constant *Fields[] = {Extern("__start___llvm_prf_names", I8, false, Hidden),
2266 Extern("__stop___llvm_prf_names", I8, false, Hidden),
2267 Extern("__start___llvm_prf_cnts", I8, false, Hidden),
2268 Extern("__stop___llvm_prf_cnts", I8, false, Hidden),
2269 Extern("__start___llvm_prf_data", I8, false, Hidden),
2270 Extern("__stop___llvm_prf_data", I8, false, Hidden),
2271 Extern("__start___llvm_prf_ucnts", I8, false, Hidden),
2272 Extern("__stop___llvm_prf_ucnts", I8, false, Hidden),
2273 Extern("__llvm_profile_raw_version",
2274 Type::getInt64Ty(C&: Ctx), true,
2275 GlobalValue::DefaultVisibility)};
2276 auto *PtrTy = PointerType::get(C&: Ctx, AddressSpace: AS);
2277 auto *STy = StructType::get(
2278 Context&: Ctx, Elements: {PtrTy, PtrTy, PtrTy, PtrTy, PtrTy, PtrTy, PtrTy, PtrTy, PtrTy});
2279 auto *GV = new GlobalVariable(M, STy, /*isConstant=*/true,
2280 GlobalValue::ExternalLinkage,
2281 ConstantStruct::get(T: STy, V: Fields), Name, nullptr,
2282 GlobalValue::NotThreadLocal, AS);
2283 GV->setVisibility(GlobalValue::ProtectedVisibility);
2284 return GV;
2285}
2286
2287void InstrLowerer::emitNameData() {
2288 if (ReferencedNames.empty())
2289 return;
2290
2291 std::string CompressedNameStr;
2292 if (Error E = collectPGOFuncNameStrings(NameVars: ReferencedNames, Result&: CompressedNameStr,
2293 doCompression: DoInstrProfNameCompression)) {
2294 report_fatal_error(reason: Twine(toString(E: std::move(E))), gen_crash_diag: false);
2295 }
2296
2297 auto &Ctx = M.getContext();
2298 auto *NamesVal =
2299 ConstantDataArray::getString(Context&: Ctx, Initializer: StringRef(CompressedNameStr), AddNull: false);
2300 std::string NamesVarName = std::string(getInstrProfNamesVarName());
2301 GlobalValue::LinkageTypes NamesLinkage = GlobalValue::PrivateLinkage;
2302 GlobalValue::VisibilityTypes NamesVisibility = GlobalValue::DefaultVisibility;
2303 std::string GPUCUIDPostfix;
2304 if (isGPUProfTarget(M)) {
2305 if (auto *GV = M.getNamedGlobal(Name: getInstrProfNamesVarPostfixVarName())) {
2306 if (auto *Init =
2307 dyn_cast_or_null<ConstantDataArray>(Val: GV->getInitializer())) {
2308 if (Init->isCString()) {
2309 GPUCUIDPostfix = Init->getAsCString().str();
2310 NamesVarName += GPUCUIDPostfix;
2311 NamesLinkage = GlobalValue::ExternalLinkage;
2312 NamesVisibility = GlobalValue::ProtectedVisibility;
2313 removeFromUsedLists(
2314 M, ShouldRemove: [GV](Constant *C) { return C->stripPointerCasts() == GV; });
2315 GV->eraseFromParent();
2316 }
2317 }
2318 }
2319 }
2320 NamesVar = new GlobalVariable(M, NamesVal->getType(), true, NamesLinkage,
2321 NamesVal, NamesVarName);
2322 NamesVar->setVisibility(NamesVisibility);
2323
2324 NamesSize = CompressedNameStr.size();
2325 setGlobalVariableLargeSection(TargetTriple: TT, GV&: *NamesVar);
2326 std::string NamesSectionName =
2327 ProfileCorrelate == InstrProfCorrelator::BINARY
2328 ? getInstrProfSectionName(IPSK: IPSK_covname, OF: TT.getObjectFormat())
2329 : getInstrProfSectionName(IPSK: IPSK_name, OF: TT.getObjectFormat());
2330 NamesVar->setSection(NamesSectionName);
2331 // On COFF, it's important to reduce the alignment down to 1 to prevent the
2332 // linker from inserting padding before the start of the names section or
2333 // between names entries.
2334 NamesVar->setAlignment(Align(1));
2335 // NamesVar is used by runtime but not referenced via relocation by other
2336 // sections. Conservatively make it linker retained.
2337 UsedVars.push_back(x: NamesVar);
2338
2339 for (auto *NamePtr : ReferencedNames)
2340 NamePtr->eraseFromParent();
2341
2342 // Emit the device sections struct only when this TU produced profile data, so
2343 // its section start/stop references are backed by a real section.
2344 bool HasData = llvm::any_of(Range&: ProfileDataMap,
2345 P: [](const auto &KV) { return KV.second.DataVar; });
2346 if (!GPUCUIDPostfix.empty() && HasData)
2347 if (GlobalVariable *GV = emitGPUOffloadSectionsStruct(M, CUIDPostfix: GPUCUIDPostfix))
2348 CompilerUsedVars.push_back(x: GV);
2349}
2350
2351void InstrLowerer::emitVTableNames() {
2352 if (!EnableVTableValueProfiling || ReferencedVTables.empty())
2353 return;
2354
2355 // Collect the PGO names of referenced vtables and compress them.
2356 std::string CompressedVTableNames;
2357 if (Error E = collectVTableStrings(VTables: ReferencedVTables, Result&: CompressedVTableNames,
2358 doCompression: DoInstrProfNameCompression)) {
2359 report_fatal_error(reason: Twine(toString(E: std::move(E))), gen_crash_diag: false);
2360 }
2361
2362 auto &Ctx = M.getContext();
2363 auto *VTableNamesVal = ConstantDataArray::getString(
2364 Context&: Ctx, Initializer: StringRef(CompressedVTableNames), AddNull: false /* AddNull */);
2365 GlobalVariable *VTableNamesVar =
2366 new GlobalVariable(M, VTableNamesVal->getType(), true /* constant */,
2367 GlobalValue::PrivateLinkage, VTableNamesVal,
2368 getInstrProfVTableNamesVarName());
2369 VTableNamesVar->setSection(
2370 getInstrProfSectionName(IPSK: IPSK_vname, OF: TT.getObjectFormat()));
2371 VTableNamesVar->setAlignment(Align(1));
2372 // Make VTableNames linker retained.
2373 UsedVars.push_back(x: VTableNamesVar);
2374}
2375
2376void InstrLowerer::emitRegistration() {
2377 if (!needsRuntimeRegistrationOfSectionRange(TT))
2378 return;
2379
2380 // Construct the function.
2381 auto *VoidTy = Type::getVoidTy(C&: M.getContext());
2382 auto *VoidPtrTy = PointerType::getUnqual(C&: M.getContext());
2383 auto *Int64Ty = Type::getInt64Ty(C&: M.getContext());
2384 auto *RegisterFTy = FunctionType::get(Result: VoidTy, isVarArg: false);
2385 auto *RegisterF = Function::Create(Ty: RegisterFTy, Linkage: GlobalValue::InternalLinkage,
2386 N: getInstrProfRegFuncsName(), M);
2387 RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
2388 if (Options.NoRedZone)
2389 RegisterF->addFnAttr(Kind: Attribute::NoRedZone);
2390
2391 auto *RuntimeRegisterTy = FunctionType::get(Result: VoidTy, Params: VoidPtrTy, isVarArg: false);
2392 auto *RuntimeRegisterF =
2393 Function::Create(Ty: RuntimeRegisterTy, Linkage: GlobalVariable::ExternalLinkage,
2394 N: getInstrProfRegFuncName(), M);
2395
2396 IRBuilder<> IRB(BasicBlock::Create(Context&: M.getContext(), Name: "", Parent: RegisterF));
2397 for (Value *Data : CompilerUsedVars)
2398 if (!isa<Function>(Val: Data))
2399 // Check for addrspace cast when profiling GPU
2400 IRB.CreateCall(Callee: RuntimeRegisterF,
2401 Args: IRB.CreatePointerBitCastOrAddrSpaceCast(V: Data, DestTy: VoidPtrTy));
2402 for (Value *Data : UsedVars)
2403 if (Data != NamesVar && !isa<Function>(Val: Data))
2404 IRB.CreateCall(Callee: RuntimeRegisterF,
2405 Args: IRB.CreatePointerBitCastOrAddrSpaceCast(V: Data, DestTy: VoidPtrTy));
2406
2407 if (NamesVar) {
2408 Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
2409 auto *NamesRegisterTy =
2410 FunctionType::get(Result: VoidTy, Params: ArrayRef(ParamTypes), isVarArg: false);
2411 auto *NamesRegisterF =
2412 Function::Create(Ty: NamesRegisterTy, Linkage: GlobalVariable::ExternalLinkage,
2413 N: getInstrProfNamesRegFuncName(), M);
2414 IRB.CreateCall(Callee: NamesRegisterF, Args: {IRB.CreatePointerBitCastOrAddrSpaceCast(
2415 V: NamesVar, DestTy: VoidPtrTy),
2416 IRB.getInt64(C: NamesSize)});
2417 }
2418
2419 IRB.CreateRetVoid();
2420}
2421
2422bool InstrLowerer::emitRuntimeHook() {
2423 // GPU profiling data is read directly by the host offload runtime. We do not
2424 // need the standard runtime hook.
2425 if (TT.isGPU())
2426 return false;
2427
2428 // We expect the linker to be invoked with -u<hook_var> flag for Linux
2429 // in which case there is no need to emit the external variable.
2430 if (TT.isOSLinux() || TT.isOSAIX())
2431 return false;
2432
2433 // If the module's provided its own runtime, we don't need to do anything.
2434 if (M.getGlobalVariable(Name: getInstrProfRuntimeHookVarName()))
2435 return false;
2436
2437 // Declare an external variable that will pull in the runtime initialization.
2438 auto *Int32Ty = Type::getInt32Ty(C&: M.getContext());
2439 auto *Var =
2440 new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
2441 nullptr, getInstrProfRuntimeHookVarName());
2442 Var->setVisibility(GlobalValue::HiddenVisibility);
2443
2444 if (TT.isOSBinFormatELF() && !TT.isPS()) {
2445 // Mark the user variable as used so that it isn't stripped out.
2446 CompilerUsedVars.push_back(x: Var);
2447 } else {
2448 // Make a function that uses it.
2449 auto *User = Function::Create(Ty: FunctionType::get(Result: Int32Ty, isVarArg: false),
2450 Linkage: GlobalValue::LinkOnceODRLinkage,
2451 N: getInstrProfRuntimeHookVarUseFuncName(), M);
2452 User->addFnAttr(Kind: Attribute::NoInline);
2453 if (Options.NoRedZone)
2454 User->addFnAttr(Kind: Attribute::NoRedZone);
2455 User->setVisibility(GlobalValue::HiddenVisibility);
2456 if (TT.supportsCOMDAT())
2457 User->setComdat(M.getOrInsertComdat(Name: User->getName()));
2458 // Explicitly mark this function as cold since it is never called.
2459 User->setEntryCount(Count: 0);
2460
2461 IRBuilder<> IRB(BasicBlock::Create(Context&: M.getContext(), Name: "", Parent: User));
2462 auto *Load = IRB.CreateLoad(Ty: Int32Ty, Ptr: Var);
2463 IRB.CreateRet(V: Load);
2464
2465 // Mark the function as used so that it isn't stripped out.
2466 CompilerUsedVars.push_back(x: User);
2467 }
2468 return true;
2469}
2470
2471void InstrLowerer::emitUses() {
2472 // The metadata sections are parallel arrays. Optimizers (e.g.
2473 // GlobalOpt/ConstantMerge) may not discard associated sections as a unit, so
2474 // we conservatively retain all unconditionally in the compiler.
2475 //
2476 // On ELF and Mach-O, the linker can guarantee the associated sections will be
2477 // retained or discarded as a unit, so llvm.compiler.used is sufficient.
2478 // Similarly on COFF, if prof data is not referenced by code we use one comdat
2479 // and ensure this GC property as well. Otherwise, we have to conservatively
2480 // make all of the sections retained by the linker.
2481 if (TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() ||
2482 (TT.isOSBinFormatCOFF() && !DataReferencedByCode))
2483 appendToCompilerUsed(M, Values: CompilerUsedVars);
2484 else
2485 appendToUsed(M, Values: CompilerUsedVars);
2486
2487 // We do not add proper references from used metadata sections to NamesVar and
2488 // VNodesVar, so we have to be conservative and place them in llvm.used
2489 // regardless of the target,
2490 appendToUsed(M, Values: UsedVars);
2491}
2492
2493void InstrLowerer::emitInitialization() {
2494 // Create ProfileFileName variable. Don't don't this for the
2495 // context-sensitive instrumentation lowering: This lowering is after
2496 // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
2497 // have already create the variable before LTO/ThinLTO linking.
2498 if (!IsCS)
2499 createProfileFileNameVar(M, InstrProfileOutput: Options.InstrProfileOutput);
2500 Function *RegisterF = M.getFunction(Name: getInstrProfRegFuncsName());
2501 if (!RegisterF)
2502 return;
2503
2504 // Create the initialization function.
2505 auto *VoidTy = Type::getVoidTy(C&: M.getContext());
2506 auto *F = Function::Create(Ty: FunctionType::get(Result: VoidTy, isVarArg: false),
2507 Linkage: GlobalValue::InternalLinkage,
2508 N: getInstrProfInitFuncName(), M);
2509 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
2510 F->addFnAttr(Kind: Attribute::NoInline);
2511 if (Options.NoRedZone)
2512 F->addFnAttr(Kind: Attribute::NoRedZone);
2513
2514 // Add the basic block and the necessary calls.
2515 IRBuilder<> IRB(BasicBlock::Create(Context&: M.getContext(), Name: "", Parent: F));
2516 IRB.CreateCall(Callee: RegisterF, Args: {});
2517 IRB.CreateRetVoid();
2518
2519 appendToGlobalCtors(M, F, Priority: 0);
2520}
2521
2522namespace llvm {
2523// Create the variable for profile sampling.
2524void createProfileSamplingVar(Module &M) {
2525 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR));
2526 IntegerType *SamplingVarTy;
2527 Constant *ValueZero;
2528 if (getSampledInstrumentationConfig().UseShort) {
2529 SamplingVarTy = Type::getInt16Ty(C&: M.getContext());
2530 ValueZero = Constant::getIntegerValue(Ty: SamplingVarTy, V: APInt(16, 0));
2531 } else {
2532 SamplingVarTy = Type::getInt32Ty(C&: M.getContext());
2533 ValueZero = Constant::getIntegerValue(Ty: SamplingVarTy, V: APInt(32, 0));
2534 }
2535 auto SamplingVar = new GlobalVariable(
2536 M, SamplingVarTy, false, GlobalValue::WeakAnyLinkage, ValueZero, VarName);
2537 SamplingVar->setVisibility(GlobalValue::DefaultVisibility);
2538 SamplingVar->setThreadLocal(true);
2539 Triple TT(M.getTargetTriple());
2540 if (TT.supportsCOMDAT()) {
2541 SamplingVar->setLinkage(GlobalValue::ExternalLinkage);
2542 SamplingVar->setComdat(M.getOrInsertComdat(Name: VarName));
2543 }
2544 appendToCompilerUsed(M, Values: SamplingVar);
2545}
2546} // namespace llvm
2547
2548// For GPU targets: Allocate contiguous arrays for all profile data.
2549// This solves the linker reordering problem by using ONE symbol per section
2550// type, so there's nothing for the linker to reorder.
2551StructType *InstrLowerer::getProfileDataTy() {
2552 if (ProfileDataTy)
2553 return ProfileDataTy;
2554
2555 auto &Ctx = M.getContext();
2556 auto *IntPtrTy = M.getDataLayout().getIntPtrType(C&: M.getContext());
2557 auto *Int16Ty = Type::getInt16Ty(C&: Ctx);
2558 auto *Int16ArrayTy = ArrayType::get(ElementType: Int16Ty, NumElements: IPVK_Last + 1);
2559 Type *DataTypes[] = {
2560#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
2561#include "llvm/ProfileData/InstrProfData.inc"
2562 };
2563 ProfileDataTy = StructType::get(Context&: Ctx, Elements: ArrayRef(DataTypes));
2564 return ProfileDataTy;
2565}
2566