1//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements PGO instrumentation using a minimum spanning tree based
10// on the following paper:
11// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13// Issue 3, pp 313-322
14// The idea of the algorithm based on the fact that for each node (except for
15// the entry and exit), the sum of incoming edge counts equals the sum of
16// outgoing edge counts. The count of edge on spanning tree can be derived from
17// those edges not on the spanning tree. Knuth proves this method instruments
18// the minimum number of edges.
19//
20// The minimal spanning tree here is actually a maximum weight tree -- on-tree
21// edges have higher frequencies (more likely to execute). The idea is to
22// instrument those less frequently executed edges to reduce the runtime
23// overhead of instrumented binaries.
24//
25// This file contains two passes:
26// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27// count profile, and generates the instrumentation for indirect call
28// profiling.
29// (2) Pass PGOInstrumentationUse which reads the edge count profile and
30// annotates the branch weights. It also reads the indirect call value
31// profiling records and annotate the indirect call instructions.
32//
33// To get the precise counter information, These two passes need to invoke at
34// the same compilation point (so they see the same IR). For pass
35// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37// the profile is opened in module level and passed to each PGOUseFunc instance.
38// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39// in class FuncPGOInstrumentation.
40//
41// Class PGOEdge represents a CFG edge and some auxiliary information. Class
42// BBInfo contains auxiliary information for each BB. These two classes are used
43// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44// class of PGOEdge and BBInfo, respectively. They contains extra data structure
45// used in populating profile counters.
46// The MST implementation is in Class CFGMST (CFGMST.h).
47//
48//===----------------------------------------------------------------------===//
49
50#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
51#include "ValueProfileCollector.h"
52#include "llvm/ADT/APInt.h"
53#include "llvm/ADT/ArrayRef.h"
54#include "llvm/ADT/STLExtras.h"
55#include "llvm/ADT/SmallVector.h"
56#include "llvm/ADT/Statistic.h"
57#include "llvm/ADT/StringRef.h"
58#include "llvm/ADT/StringSet.h"
59#include "llvm/ADT/Twine.h"
60#include "llvm/ADT/iterator.h"
61#include "llvm/ADT/iterator_range.h"
62#include "llvm/Analysis/BlockFrequencyInfo.h"
63#include "llvm/Analysis/BranchProbabilityInfo.h"
64#include "llvm/Analysis/CFG.h"
65#include "llvm/Analysis/LoopInfo.h"
66#include "llvm/Analysis/OptimizationRemarkEmitter.h"
67#include "llvm/Analysis/ProfileSummaryInfo.h"
68#include "llvm/Analysis/TargetLibraryInfo.h"
69#include "llvm/IR/Attributes.h"
70#include "llvm/IR/BasicBlock.h"
71#include "llvm/IR/CFG.h"
72#include "llvm/IR/Comdat.h"
73#include "llvm/IR/Constant.h"
74#include "llvm/IR/Constants.h"
75#include "llvm/IR/DiagnosticInfo.h"
76#include "llvm/IR/Dominators.h"
77#include "llvm/IR/EHPersonalities.h"
78#include "llvm/IR/Function.h"
79#include "llvm/IR/GlobalAlias.h"
80#include "llvm/IR/GlobalValue.h"
81#include "llvm/IR/GlobalVariable.h"
82#include "llvm/IR/IRBuilder.h"
83#include "llvm/IR/InstVisitor.h"
84#include "llvm/IR/InstrTypes.h"
85#include "llvm/IR/Instruction.h"
86#include "llvm/IR/Instructions.h"
87#include "llvm/IR/IntrinsicInst.h"
88#include "llvm/IR/Intrinsics.h"
89#include "llvm/IR/LLVMContext.h"
90#include "llvm/IR/MDBuilder.h"
91#include "llvm/IR/Module.h"
92#include "llvm/IR/PassManager.h"
93#include "llvm/IR/ProfDataUtils.h"
94#include "llvm/IR/ProfileSummary.h"
95#include "llvm/IR/Type.h"
96#include "llvm/IR/Value.h"
97#include "llvm/ProfileData/InstrProf.h"
98#include "llvm/ProfileData/InstrProfReader.h"
99#include "llvm/Support/BranchProbability.h"
100#include "llvm/Support/CRC.h"
101#include "llvm/Support/Casting.h"
102#include "llvm/Support/CommandLine.h"
103#include "llvm/Support/Compiler.h"
104#include "llvm/Support/DOTGraphTraits.h"
105#include "llvm/Support/Debug.h"
106#include "llvm/Support/Error.h"
107#include "llvm/Support/ErrorHandling.h"
108#include "llvm/Support/GraphWriter.h"
109#include "llvm/Support/VirtualFileSystem.h"
110#include "llvm/Support/raw_ostream.h"
111#include "llvm/TargetParser/Triple.h"
112#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
113#include "llvm/Transforms/Instrumentation/CFGMST.h"
114#include "llvm/Transforms/Utils/BasicBlockUtils.h"
115#include "llvm/Transforms/Utils/Instrumentation.h"
116#include "llvm/Transforms/Utils/MisExpect.h"
117#include "llvm/Transforms/Utils/ModuleUtils.h"
118#include <algorithm>
119#include <cassert>
120#include <cstdint>
121#include <memory>
122#include <numeric>
123#include <optional>
124#include <stack>
125#include <string>
126#include <unordered_map>
127#include <utility>
128#include <vector>
129
130using namespace llvm;
131using VPCandidateInfo = ValueProfileCollector::CandidateInfo;
132
133#define DEBUG_TYPE "pgo-instrumentation"
134
135STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
136STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
137STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
138STATISTIC(NumOfPGOEdge, "Number of edges.");
139STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
140STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
141STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
142STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
143STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
144STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
145STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
146STATISTIC(NumOfCSPGOSelectInsts,
147 "Number of select instruction instrumented in CSPGO.");
148STATISTIC(NumOfCSPGOMemIntrinsics,
149 "Number of mem intrinsics instrumented in CSPGO.");
150STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
151STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
152STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
153STATISTIC(NumOfCSPGOFunc,
154 "Number of functions having valid profile counts in CSPGO.");
155STATISTIC(NumOfCSPGOMismatch,
156 "Number of functions having mismatch profile in CSPGO.");
157STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
158STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
159
160// Command line option to specify the file to read profile from. This is
161// mainly used for testing.
162static cl::opt<std::string> PGOTestProfileFile(
163 "pgo-test-profile-file", cl::init(Val: ""), cl::Hidden,
164 cl::value_desc("filename"),
165 cl::desc("Specify the path of profile data file. This is "
166 "mainly for test purpose."));
167static cl::opt<std::string> PGOTestProfileRemappingFile(
168 "pgo-test-profile-remapping-file", cl::init(Val: ""), cl::Hidden,
169 cl::value_desc("filename"),
170 cl::desc("Specify the path of profile remapping file. This is mainly for "
171 "test purpose."));
172
173// Command line option to disable value profiling. The default is false:
174// i.e. value profiling is enabled by default. This is for debug purpose.
175static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(Val: false),
176 cl::Hidden,
177 cl::desc("Disable Value Profiling"));
178
179// Command line option to set the maximum number of VP annotations to write to
180// the metadata for a single indirect call callsite.
181static cl::opt<unsigned> MaxNumAnnotations(
182 "icp-max-annotations", cl::init(Val: 3), cl::Hidden,
183 cl::desc("Max number of annotations for a single indirect "
184 "call callsite"));
185
186// Command line option to set the maximum number of value annotations
187// to write to the metadata for a single memop intrinsic.
188static cl::opt<unsigned> MaxNumMemOPAnnotations(
189 "memop-max-annotations", cl::init(Val: 4), cl::Hidden,
190 cl::desc("Max number of precise value annotations for a single memop"
191 "intrinsic"));
192
193// Command line option to control appending FunctionHash to the name of a COMDAT
194// function. This is to avoid the hash mismatch caused by the preinliner.
195static cl::opt<bool> DoComdatRenaming(
196 "do-comdat-renaming", cl::init(Val: false), cl::Hidden,
197 cl::desc("Append function hash to the name of COMDAT function to avoid "
198 "function hash mismatch due to the preinliner"));
199
200namespace llvm {
201// Command line option to enable/disable the warning about missing profile
202// information.
203cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(Val: false),
204 cl::Hidden,
205 cl::desc("Use this option to turn on/off "
206 "warnings about missing profile data for "
207 "functions."));
208
209// Command line option to enable/disable the warning about a hash mismatch in
210// the profile data.
211cl::opt<bool>
212 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(Val: false), cl::Hidden,
213 cl::desc("Use this option to turn off/on "
214 "warnings about profile cfg mismatch."));
215
216// Command line option to enable/disable the warning about a hash mismatch in
217// the profile data for Comdat functions, which often turns out to be false
218// positive due to the pre-instrumentation inline.
219cl::opt<bool> NoPGOWarnMismatchComdatWeak(
220 "no-pgo-warn-mismatch-comdat-weak", cl::init(Val: true), cl::Hidden,
221 cl::desc("The option is used to turn on/off "
222 "warnings about hash mismatch for comdat "
223 "or weak functions."));
224
225// Command line option to enable/disable select instruction instrumentation.
226static cl::opt<bool>
227 PGOInstrSelect("pgo-instr-select", cl::init(Val: true), cl::Hidden,
228 cl::desc("Use this option to turn on/off SELECT "
229 "instruction instrumentation. "));
230
231// Command line option to turn on CFG dot or text dump of raw profile counts
232static cl::opt<PGOViewCountsType> PGOViewRawCounts(
233 "pgo-view-raw-counts", cl::Hidden,
234 cl::desc("A boolean option to show CFG dag or text "
235 "with raw profile counts from "
236 "profile data. See also option "
237 "-pgo-view-counts. To limit graph "
238 "display to only one function, use "
239 "filtering option -view-bfi-func-name."),
240 cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
241 clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
242 clEnumValN(PGOVCT_Text, "text", "show in text.")));
243
244// Command line option to enable/disable memop intrinsic call.size profiling.
245static cl::opt<bool>
246 PGOInstrMemOP("pgo-instr-memop", cl::init(Val: true), cl::Hidden,
247 cl::desc("Use this option to turn on/off "
248 "memory intrinsic size profiling."));
249
250// Emit branch probability as optimization remarks.
251static cl::opt<bool>
252 EmitBranchProbability("pgo-emit-branch-prob", cl::init(Val: false), cl::Hidden,
253 cl::desc("When this option is on, the annotated "
254 "branch probability will be emitted as "
255 "optimization remarks: -{Rpass|"
256 "pass-remarks}=pgo-instrumentation"));
257
258static cl::opt<bool> PGOInstrumentEntry(
259 "pgo-instrument-entry", cl::init(Val: false), cl::Hidden,
260 cl::desc("Force to instrument function entry basicblock."));
261
262static cl::opt<bool>
263 PGOInstrumentLoopEntries("pgo-instrument-loop-entries", cl::init(Val: false),
264 cl::Hidden,
265 cl::desc("Force to instrument loop entries."));
266
267static cl::opt<bool> PGOFunctionEntryCoverage(
268 "pgo-function-entry-coverage", cl::Hidden,
269 cl::desc(
270 "Use this option to enable function entry coverage instrumentation."));
271
272static cl::opt<bool> PGOBlockCoverage(
273 "pgo-block-coverage",
274 cl::desc("Use this option to enable basic block coverage instrumentation"));
275
276static cl::opt<bool>
277 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
278 cl::desc("Create a dot file of CFGs with block "
279 "coverage inference information"));
280
281static cl::opt<bool> PGOTemporalInstrumentation(
282 "pgo-temporal-instrumentation",
283 cl::desc("Use this option to enable temporal instrumentation"));
284
285static cl::opt<bool>
286 PGOFixEntryCount("pgo-fix-entry-count", cl::init(Val: true), cl::Hidden,
287 cl::desc("Fix function entry count in profile use."));
288
289static cl::opt<bool> PGOVerifyHotBFI(
290 "pgo-verify-hot-bfi", cl::init(Val: false), cl::Hidden,
291 cl::desc("Print out the non-match BFI count if a hot raw profile count "
292 "becomes non-hot, or a cold raw profile count becomes hot. "
293 "The print is enabled under -Rpass-analysis=pgo, or "
294 "internal option -pass-remarks-analysis=pgo."));
295
296static cl::opt<bool> PGOVerifyBFI(
297 "pgo-verify-bfi", cl::init(Val: false), cl::Hidden,
298 cl::desc("Print out mismatched BFI counts after setting profile metadata "
299 "The print is enabled under -Rpass-analysis=pgo, or "
300 "internal option -pass-remarks-analysis=pgo."));
301
302static cl::opt<unsigned> PGOVerifyBFIRatio(
303 "pgo-verify-bfi-ratio", cl::init(Val: 2), cl::Hidden,
304 cl::desc("Set the threshold for pgo-verify-bfi: only print out "
305 "mismatched BFI if the difference percentage is greater than "
306 "this value (in percentage)."));
307
308static cl::opt<unsigned> PGOVerifyBFICutoff(
309 "pgo-verify-bfi-cutoff", cl::init(Val: 5), cl::Hidden,
310 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
311 "profile count value is below."));
312
313static cl::opt<std::string> PGOTraceFuncHash(
314 "pgo-trace-func-hash", cl::init(Val: "-"), cl::Hidden,
315 cl::value_desc("function name"),
316 cl::desc("Trace the hash of the function with this name."));
317
318static cl::opt<unsigned> PGOFunctionSizeThreshold(
319 "pgo-function-size-threshold", cl::Hidden,
320 cl::desc("Do not instrument functions smaller than this threshold."));
321
322static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold(
323 "pgo-critical-edge-threshold", cl::init(Val: 20000), cl::Hidden,
324 cl::desc("Do not instrument functions with the number of critical edges "
325 " greater than this threshold."));
326
327static cl::opt<uint64_t> PGOColdInstrumentEntryThreshold(
328 "pgo-cold-instrument-entry-threshold", cl::init(Val: 0), cl::Hidden,
329 cl::desc("For cold function instrumentation, skip instrumenting functions "
330 "whose entry count is above the given value."));
331
332static cl::opt<bool> PGOTreatUnknownAsCold(
333 "pgo-treat-unknown-as-cold", cl::init(Val: false), cl::Hidden,
334 cl::desc("For cold function instrumentation, treat count unknown(e.g. "
335 "unprofiled) functions as cold."));
336
337cl::opt<bool> PGOInstrumentColdFunctionOnly(
338 "pgo-instrument-cold-function-only", cl::init(Val: false), cl::Hidden,
339 cl::desc("Enable cold function only instrumentation."));
340
341cl::list<std::string> CtxPGOSkipCallsiteInstrument(
342 "ctx-prof-skip-callsite-instr", cl::Hidden,
343 cl::desc("Do not instrument callsites to functions in this list. Intended "
344 "for testing."));
345
346extern cl::opt<unsigned> MaxNumVTableAnnotations;
347
348// Command line option to turn on CFG dot dump after profile annotation.
349// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
350extern cl::opt<PGOViewCountsType> PGOViewCounts;
351
352// Command line option to specify the name of the function for CFG dump
353// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
354extern cl::opt<std::string> ViewBlockFreqFuncName;
355
356// Command line option to enable vtable value profiling. Defined in
357// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
358extern cl::opt<bool> EnableVTableValueProfiling;
359extern cl::opt<bool> EnableVTableProfileUse;
360LLVM_ABI extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind>
361 ProfileCorrelate;
362} // namespace llvm
363
364namespace {
365class FunctionInstrumenter final {
366 Module &M;
367 Function &F;
368 TargetLibraryInfo &TLI;
369 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
370 BranchProbabilityInfo *const BPI;
371 BlockFrequencyInfo *const BFI;
372 LoopInfo *const LI;
373
374 const PGOInstrumentationType InstrumentationType;
375
376 // FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls.
377 // Ctx profiling implicitly captures indirect call cases, but not other
378 // values. Supporting other values is relatively straight-forward - just
379 // another counter range within the context.
380 bool isValueProfilingDisabled() const {
381 // Value profiling is disabled for GPU targets because the device-side
382 // profiling runtime does not yet implement
383 // __llvm_profile_instrument_target. The existing compiler-rt implementation
384 // uses a linked-list with locks and eviction policy that is not efficient
385 // for massively parallel GPU execution. A GPU-optimized implementation is
386 // left as future work.
387 return DisableValueProfiling ||
388 InstrumentationType == PGOInstrumentationType::CTXPROF ||
389 isGPUProfTarget(M);
390 }
391
392 bool shouldInstrumentEntryBB() const {
393 return PGOInstrumentEntry ||
394 InstrumentationType == PGOInstrumentationType::CTXPROF;
395 }
396
397 bool shouldInstrumentLoopEntries() const { return PGOInstrumentLoopEntries; }
398
399public:
400 FunctionInstrumenter(
401 Module &M, Function &F, TargetLibraryInfo &TLI,
402 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
403 BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr,
404 LoopInfo *LI = nullptr,
405 PGOInstrumentationType InstrumentationType = PGOInstrumentationType::FDO)
406 : M(M), F(F), TLI(TLI), ComdatMembers(ComdatMembers), BPI(BPI), BFI(BFI),
407 LI(LI), InstrumentationType(InstrumentationType) {}
408
409 void instrument();
410};
411} // namespace
412
413// Return a string describing the branch condition that can be
414// used in static branch probability heuristics:
415static std::string getBranchCondString(Instruction *TI) {
416 CondBrInst *BI = dyn_cast<CondBrInst>(Val: TI);
417 if (!BI)
418 return std::string();
419
420 Value *Cond = BI->getCondition();
421 ICmpInst *CI = dyn_cast<ICmpInst>(Val: Cond);
422 if (!CI)
423 return std::string();
424
425 std::string result;
426 raw_string_ostream OS(result);
427 OS << CI->getPredicate() << "_";
428 CI->getOperand(i_nocapture: 0)->getType()->print(O&: OS, IsForDebug: true);
429
430 Value *RHS = CI->getOperand(i_nocapture: 1);
431 ConstantInt *CV = dyn_cast<ConstantInt>(Val: RHS);
432 if (CV) {
433 if (CV->isZero())
434 OS << "_Zero";
435 else if (CV->isOne())
436 OS << "_One";
437 else if (CV->isMinusOne())
438 OS << "_MinusOne";
439 else
440 OS << "_Const";
441 }
442 return result;
443}
444
445static const char *ValueProfKindDescr[] = {
446#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
447#include "llvm/ProfileData/InstrProfData.inc"
448};
449
450// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
451// aware this is an ir_level profile so it can set the version flag.
452static GlobalVariable *
453createIRLevelProfileFlagVar(Module &M,
454 PGOInstrumentationType InstrumentationType) {
455 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
456 Type *IntTy64 = Type::getInt64Ty(C&: M.getContext());
457 uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
458 if (InstrumentationType == PGOInstrumentationType::CSFDO)
459 ProfileVersion |= VARIANT_MASK_CSIR_PROF;
460 if (PGOInstrumentEntry ||
461 InstrumentationType == PGOInstrumentationType::CTXPROF)
462 ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
463 if (PGOInstrumentLoopEntries)
464 ProfileVersion |= VARIANT_MASK_INSTR_LOOP_ENTRIES;
465 if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
466 ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
467 if (PGOFunctionEntryCoverage)
468 ProfileVersion |=
469 VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
470 if (PGOBlockCoverage)
471 ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
472 if (PGOTemporalInstrumentation)
473 ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
474 auto IRLevelVersionVariable = new GlobalVariable(
475 M, IntTy64, true, GlobalValue::WeakAnyLinkage,
476 Constant::getIntegerValue(Ty: IntTy64, V: APInt(64, ProfileVersion)), VarName);
477 IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
478
479 Triple TT(M.getTargetTriple());
480 if (TT.supportsCOMDAT()) {
481 IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
482 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(Name: VarName));
483 }
484 return IRLevelVersionVariable;
485}
486
487namespace {
488
489/// The select instruction visitor plays three roles specified
490/// by the mode. In \c VM_counting mode, it simply counts the number of
491/// select instructions. In \c VM_instrument mode, it inserts code to count
492/// the number times TrueValue of select is taken. In \c VM_annotate mode,
493/// it reads the profile data and annotate the select instruction with metadata.
494enum VisitMode { VM_counting, VM_instrument, VM_annotate };
495class PGOUseFunc;
496
497/// Instruction Visitor class to visit select instructions.
498struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
499 Function &F;
500 unsigned NSIs = 0; // Number of select instructions instrumented.
501 VisitMode Mode = VM_counting; // Visiting mode.
502 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
503 unsigned TotalNumCtrs = 0; // Total number of counters
504 GlobalValue *FuncNameVar = nullptr;
505 uint64_t FuncHash = 0;
506 PGOUseFunc *UseFunc = nullptr;
507 bool HasSingleByteCoverage;
508
509 SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)
510 : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
511
512 void countSelects() {
513 NSIs = 0;
514 Mode = VM_counting;
515 visit(F);
516 }
517
518 // Visit the IR stream and instrument all select instructions. \p
519 // Ind is a pointer to the counter index variable; \p TotalNC
520 // is the total number of counters; \p FNV is the pointer to the
521 // PGO function name var; \p FHash is the function hash.
522 void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalValue *FNV,
523 uint64_t FHash) {
524 Mode = VM_instrument;
525 CurCtrIdx = Ind;
526 TotalNumCtrs = TotalNC;
527 FuncHash = FHash;
528 FuncNameVar = FNV;
529 visit(F);
530 }
531
532 // Visit the IR stream and annotate all select instructions.
533 void annotateSelects(PGOUseFunc *UF, unsigned *Ind) {
534 Mode = VM_annotate;
535 UseFunc = UF;
536 CurCtrIdx = Ind;
537 visit(F);
538 }
539
540 void instrumentOneSelectInst(SelectInst &SI);
541 void annotateOneSelectInst(SelectInst &SI);
542
543 // Visit \p SI instruction and perform tasks according to visit mode.
544 void visitSelectInst(SelectInst &SI);
545
546 // Return the number of select instructions. This needs be called after
547 // countSelects().
548 unsigned getNumOfSelectInsts() const { return NSIs; }
549};
550
551/// This class implements the CFG edges for the Minimum Spanning Tree (MST)
552/// based instrumentation.
553/// Note that the CFG can be a multi-graph. So there might be multiple edges
554/// with the same SrcBB and DestBB.
555struct PGOEdge {
556 BasicBlock *SrcBB;
557 BasicBlock *DestBB;
558 uint64_t Weight;
559 bool InMST = false;
560 bool Removed = false;
561 bool IsCritical = false;
562
563 PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)
564 : SrcBB(Src), DestBB(Dest), Weight(W) {}
565
566 /// Return the information string of an edge.
567 std::string infoString() const {
568 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
569 (IsCritical ? "c" : " ") + " W=" + Twine(Weight))
570 .str();
571 }
572};
573
574/// This class stores the auxiliary information for each BB in the MST.
575struct PGOBBInfo {
576 PGOBBInfo *Group;
577 uint32_t Index;
578 uint32_t Rank = 0;
579
580 PGOBBInfo(unsigned IX) : Group(this), Index(IX) {}
581
582 /// Return the information string of this object.
583 std::string infoString() const {
584 return (Twine("Index=") + Twine(Index)).str();
585 }
586};
587
588// This class implements the CFG edges. Note the CFG can be a multi-graph.
589template <class Edge, class BBInfo> class FuncPGOInstrumentation {
590private:
591 Function &F;
592
593 // Is this is context-sensitive instrumentation.
594 bool IsCS;
595
596 // A map that stores the Comdat group in function F.
597 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
598
599 ValueProfileCollector VPC;
600
601 void computeCFGHash();
602 void renameComdatFunction();
603
604public:
605 const TargetLibraryInfo &TLI;
606 std::vector<std::vector<VPCandidateInfo>> ValueSites;
607 SelectInstVisitor SIVisitor;
608 std::string FuncName;
609 std::string DeprecatedFuncName;
610 GlobalVariable *FuncNameVar;
611
612 // CFG hash value for this function.
613 uint64_t FunctionHash = 0;
614
615 // The Minimum Spanning Tree of function CFG.
616 CFGMST<Edge, BBInfo> MST;
617
618 const std::optional<BlockCoverageInference> BCI;
619
620 static std::optional<BlockCoverageInference>
621 constructBCI(Function &Func, bool HasSingleByteCoverage,
622 bool InstrumentFuncEntry) {
623 if (HasSingleByteCoverage)
624 return BlockCoverageInference(Func, InstrumentFuncEntry);
625 return {};
626 }
627
628 // Collect all the BBs that will be instrumented, and store them in
629 // InstrumentBBs.
630 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
631
632 // Give an edge, find the BB that will be instrumented.
633 // Return nullptr if there is no BB to be instrumented.
634 BasicBlock *getInstrBB(Edge *E);
635
636 // Return the auxiliary BB information.
637 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
638
639 // Return the auxiliary BB information if available.
640 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
641
642 // Dump edges and BB information.
643 void dumpInfo(StringRef Str = "") const {
644 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName +
645 " Hash: " + Twine(FunctionHash) + "\t" + Str);
646 }
647
648 FuncPGOInstrumentation(
649 Function &Func, TargetLibraryInfo &TLI,
650 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
651 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
652 BlockFrequencyInfo *BFI = nullptr, LoopInfo *LI = nullptr,
653 bool IsCS = false, bool InstrumentFuncEntry = true,
654 bool InstrumentLoopEntries = false, bool HasSingleByteCoverage = false)
655 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
656 TLI(TLI), ValueSites(IPVK_Last + 1),
657 SIVisitor(Func, HasSingleByteCoverage),
658 MST(F, InstrumentFuncEntry, InstrumentLoopEntries, BPI, BFI, LI),
659 BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
660 if (BCI && PGOViewBlockCoverageGraph)
661 BCI->viewBlockCoverageGraph();
662 // This should be done before CFG hash computation.
663 SIVisitor.countSelects();
664 ValueSites[IPVK_MemOPSize] = VPC.get(Kind: IPVK_MemOPSize);
665 if (!IsCS) {
666 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
667 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
668 NumOfPGOBB += MST.bbInfoSize();
669 ValueSites[IPVK_IndirectCallTarget] = VPC.get(Kind: IPVK_IndirectCallTarget);
670 if (EnableVTableValueProfiling)
671 ValueSites[IPVK_VTableTarget] = VPC.get(Kind: IPVK_VTableTarget);
672 } else {
673 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
674 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
675 NumOfCSPGOBB += MST.bbInfoSize();
676 }
677
678 FuncName = getIRPGOFuncName(F);
679 DeprecatedFuncName = getPGOFuncName(F);
680 computeCFGHash();
681 if (!ComdatMembers.empty())
682 renameComdatFunction();
683 LLVM_DEBUG(dumpInfo("after CFGMST"));
684
685 for (const auto &E : MST.allEdges()) {
686 if (E->Removed)
687 continue;
688 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
689 if (!E->InMST)
690 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
691 }
692
693 if (CreateGlobalVar)
694 FuncNameVar = createPGOFuncNameVar(F, PGOFuncName: FuncName);
695 }
696};
697
698} // end anonymous namespace
699
700// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
701// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
702// of selects, indirect calls, mem ops and edges.
703template <class Edge, class BBInfo>
704void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
705 std::vector<uint8_t> Indexes;
706 JamCRC JC;
707 for (auto &BB : F) {
708 for (BasicBlock *Succ : successors(BB: &BB)) {
709 auto BI = findBBInfo(BB: Succ);
710 if (BI == nullptr)
711 continue;
712 uint32_t Index = BI->Index;
713 for (int J = 0; J < 4; J++)
714 Indexes.push_back(x: (uint8_t)(Index >> (J * 8)));
715 }
716 }
717 JC.update(Data: Indexes);
718
719 JamCRC JCH;
720 // The higher 32 bits.
721 auto updateJCH = [&JCH](uint64_t Num) {
722 uint8_t Data[8];
723 support::endian::write64le(P: Data, V: Num);
724 JCH.update(Data);
725 };
726 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
727 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
728 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
729 if (BCI) {
730 updateJCH(BCI->getInstrumentedBlocksHash());
731 } else {
732 updateJCH((uint64_t)MST.numEdges());
733 }
734
735 // Hash format for context sensitive profile. Reserve 4 bits for other
736 // information.
737 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
738
739 // Reserve bit 60-63 for other information purpose.
740 FunctionHash &= NamedInstrProfRecord::FUNC_HASH_MASK;
741 if (IsCS)
742 NamedInstrProfRecord::setCSFlagInHash(FunctionHash);
743 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
744 << " CRC = " << JC.getCRC()
745 << ", Selects = " << SIVisitor.getNumOfSelectInsts()
746 << ", Edges = " << MST.numEdges() << ", ICSites = "
747 << ValueSites[IPVK_IndirectCallTarget].size()
748 << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
749 << ", High32 CRC = " << JCH.getCRC()
750 << ", Hash = " << FunctionHash << "\n";);
751
752 if (PGOTraceFuncHash != "-" && F.getName().contains(Other: PGOTraceFuncHash))
753 dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
754 << " in building " << F.getParent()->getSourceFileName() << "\n";
755}
756
757// Check if we can safely rename this Comdat function.
758static bool canRenameComdat(
759 Function &F,
760 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
761 if (!DoComdatRenaming || !canRenameComdatFunc(F, CheckAddressTaken: true))
762 return false;
763
764 // FIXME: Current only handle those Comdat groups that only containing one
765 // function.
766 // (1) For a Comdat group containing multiple functions, we need to have a
767 // unique postfix based on the hashes for each function. There is a
768 // non-trivial code refactoring to do this efficiently.
769 // (2) Variables can not be renamed, so we can not rename Comdat function in a
770 // group including global vars.
771 Comdat *C = F.getComdat();
772 for (auto &&CM : make_range(p: ComdatMembers.equal_range(x: C))) {
773 assert(!isa<GlobalAlias>(CM.second));
774 Function *FM = dyn_cast<Function>(Val: CM.second);
775 if (FM != &F)
776 return false;
777 }
778 return true;
779}
780
781// Append the CFGHash to the Comdat function name.
782template <class Edge, class BBInfo>
783void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
784 if (!canRenameComdat(F, ComdatMembers))
785 return;
786 std::string OrigName = F.getName().str();
787 std::string NewFuncName =
788 Twine(F.getName() + "." + Twine(FunctionHash)).str();
789 F.setName(Twine(NewFuncName));
790 GlobalAlias::create(Linkage: GlobalValue::WeakAnyLinkage, Name: OrigName, Aliasee: &F);
791 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
792 Comdat *NewComdat;
793 Module *M = F.getParent();
794 // For AvailableExternallyLinkage functions, change the linkage to
795 // LinkOnceODR and put them into comdat. This is because after renaming, there
796 // is no backup external copy available for the function.
797 if (!F.hasComdat()) {
798 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage);
799 NewComdat = M->getOrInsertComdat(Name: StringRef(NewFuncName));
800 F.setLinkage(GlobalValue::LinkOnceODRLinkage);
801 F.setComdat(NewComdat);
802 return;
803 }
804
805 // This function belongs to a single function Comdat group.
806 Comdat *OrigComdat = F.getComdat();
807 std::string NewComdatName =
808 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
809 NewComdat = M->getOrInsertComdat(Name: StringRef(NewComdatName));
810 NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
811
812 for (auto &&CM : make_range(p: ComdatMembers.equal_range(x: OrigComdat))) {
813 // Must be a function.
814 cast<Function>(Val: CM.second)->setComdat(NewComdat);
815 }
816}
817
818/// Collect all the BBs that will be instruments and add them to
819/// `InstrumentBBs`.
820template <class Edge, class BBInfo>
821void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
822 std::vector<BasicBlock *> &InstrumentBBs) {
823 if (BCI) {
824 for (auto &BB : F)
825 if (BCI->shouldInstrumentBlock(BB))
826 InstrumentBBs.push_back(x: &BB);
827 return;
828 }
829
830 // Use a worklist as we will update the vector during the iteration.
831 std::vector<Edge *> EdgeList;
832 EdgeList.reserve(MST.numEdges());
833 for (const auto &E : MST.allEdges())
834 EdgeList.push_back(E.get());
835
836 for (auto &E : EdgeList) {
837 BasicBlock *InstrBB = getInstrBB(E);
838 if (InstrBB)
839 InstrumentBBs.push_back(x: InstrBB);
840 }
841}
842
843// Given a CFG E to be instrumented, find which BB to place the instrumented
844// code. The function will split the critical edge if necessary.
845template <class Edge, class BBInfo>
846BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
847 if (E->InMST || E->Removed)
848 return nullptr;
849
850 BasicBlock *SrcBB = E->SrcBB;
851 BasicBlock *DestBB = E->DestBB;
852 // For a fake edge, instrument the real BB.
853 if (SrcBB == nullptr)
854 return DestBB;
855 if (DestBB == nullptr)
856 return SrcBB;
857
858 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
859 // There are basic blocks (such as catchswitch) cannot be instrumented.
860 // If the returned first insertion point is the end of BB, skip this BB.
861 if (BB->getFirstNonPHIOrDbgOrAlloca() == BB->end())
862 return nullptr;
863 return BB;
864 };
865
866 // Instrument the SrcBB if it has a single successor,
867 // otherwise, the DestBB if this is not a critical edge.
868 Instruction *TI = SrcBB->getTerminator();
869 if (TI->getNumSuccessors() <= 1)
870 return canInstrument(SrcBB);
871 if (!E->IsCritical)
872 return canInstrument(DestBB);
873
874 // Some IndirectBr critical edges cannot be split by the previous
875 // SplitIndirectBrCriticalEdges call. Bail out.
876 unsigned SuccNum = GetSuccessorNumber(BB: SrcBB, Succ: DestBB);
877 BasicBlock *InstrBB =
878 isa<IndirectBrInst>(Val: TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
879 if (!InstrBB) {
880 LLVM_DEBUG(
881 dbgs() << "Fail to split critical edge: not instrument this edge.\n");
882 return nullptr;
883 }
884 // For a critical edge, we have to split. Instrument the newly
885 // created BB.
886 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
887 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
888 << " --> " << getBBInfo(DestBB).Index << "\n");
889 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
890 MST.addEdge(SrcBB, InstrBB, 0);
891 // Second one: Add new edge of InstrBB->DestBB.
892 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
893 NewEdge1.InMST = true;
894 E->Removed = true;
895
896 return canInstrument(InstrBB);
897}
898
899// When generating value profiling calls on Windows routines that make use of
900// handler funclets for exception processing an operand bundle needs to attached
901// to the called function. This routine will set \p OpBundles to contain the
902// funclet information, if any is needed, that should be placed on the generated
903// value profiling call for the value profile candidate call.
904static void
905populateEHOperandBundle(VPCandidateInfo &Cand,
906 DenseMap<BasicBlock *, ColorVector> &BlockColors,
907 SmallVectorImpl<OperandBundleDef> &OpBundles) {
908 auto *OrigCall = dyn_cast<CallBase>(Val: Cand.AnnotatedInst);
909 if (!OrigCall)
910 return;
911
912 if (!isa<IntrinsicInst>(Val: OrigCall)) {
913 // The instrumentation call should belong to the same funclet as a
914 // non-intrinsic call, so just copy the operand bundle, if any exists.
915 std::optional<OperandBundleUse> ParentFunclet =
916 OrigCall->getOperandBundle(ID: LLVMContext::OB_funclet);
917 if (ParentFunclet)
918 OpBundles.emplace_back(Args: OperandBundleDef(*ParentFunclet));
919 } else {
920 // Intrinsics or other instructions do not get funclet information from the
921 // front-end. Need to use the BlockColors that was computed by the routine
922 // colorEHFunclets to determine whether a funclet is needed.
923 if (!BlockColors.empty()) {
924 const ColorVector &CV = BlockColors.find(Val: OrigCall->getParent())->second;
925 assert(CV.size() == 1 && "non-unique color for block!");
926 BasicBlock::iterator EHPadIt = CV.front()->getFirstNonPHIIt();
927 if (EHPadIt->isEHPad())
928 OpBundles.emplace_back(Args: "funclet", Args: &*EHPadIt);
929 }
930 }
931}
932
933// Visit all edge and instrument the edges not in MST, and do value profiling.
934// Critical edges will be split.
935void FunctionInstrumenter::instrument() {
936 if (!PGOBlockCoverage) {
937 // Split indirectbr critical edges here before computing the MST rather than
938 // later in getInstrBB() to avoid invalidating it.
939 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
940 }
941
942 const bool IsCtxProf = InstrumentationType == PGOInstrumentationType::CTXPROF;
943 FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
944 F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI, LI,
945 InstrumentationType == PGOInstrumentationType::CSFDO,
946 shouldInstrumentEntryBB(), shouldInstrumentLoopEntries(),
947 PGOBlockCoverage);
948
949 auto *const Name = IsCtxProf ? cast<GlobalValue>(Val: &F) : FuncInfo.FuncNameVar;
950 auto *const CFGHash =
951 ConstantInt::get(Ty: Type::getInt64Ty(C&: M.getContext()), V: FuncInfo.FunctionHash);
952 // Make sure that pointer to global is passed in with zero addrspace
953 // This is relevant during GPU profiling
954 auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
955 C: Name, Ty: PointerType::get(C&: M.getContext(), AddressSpace: 0));
956 if (PGOFunctionEntryCoverage) {
957 auto &EntryBB = F.getEntryBlock();
958 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
959 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
960 // i32 <index>)
961 Builder.CreateIntrinsic(
962 ID: Intrinsic::instrprof_cover,
963 Args: {NormalizedNamePtr, CFGHash, Builder.getInt32(C: 1), Builder.getInt32(C: 0)});
964 return;
965 }
966
967 std::vector<BasicBlock *> InstrumentBBs;
968 FuncInfo.getInstrumentBBs(InstrumentBBs);
969 unsigned NumCounters =
970 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
971
972 if (IsCtxProf) {
973 StringSet<> SkipCSInstr(llvm::from_range, CtxPGOSkipCallsiteInstrument);
974
975 auto *CSIntrinsic =
976 Intrinsic::getOrInsertDeclaration(M: &M, id: Intrinsic::instrprof_callsite);
977 // We want to count the instrumentable callsites, then instrument them. This
978 // is because the llvm.instrprof.callsite intrinsic has an argument (like
979 // the other instrprof intrinsics) capturing the total number of
980 // instrumented objects (counters, or callsites, in this case). In this
981 // case, we want that value so we can readily pass it to the compiler-rt
982 // APIs that may have to allocate memory based on the nr of callsites.
983 // The traversal logic is the same for both counting and instrumentation,
984 // just needs to be done in succession.
985 auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
986 for (auto &BB : F)
987 for (auto &Instr : BB)
988 if (auto *CS = dyn_cast<CallBase>(Val: &Instr)) {
989 if (!InstrProfCallsite::canInstrumentCallsite(CB: *CS))
990 continue;
991 if (CS->getCalledFunction() &&
992 SkipCSInstr.contains(key: CS->getCalledFunction()->getName()))
993 continue;
994 Visitor(CS);
995 }
996 };
997 // First, count callsites.
998 uint32_t TotalNumCallsites = 0;
999 Visit([&TotalNumCallsites](auto *) { ++TotalNumCallsites; });
1000
1001 // Now instrument.
1002 uint32_t CallsiteIndex = 0;
1003 Visit([&](auto *CB) {
1004 IRBuilder<> Builder(CB);
1005 Builder.CreateCall(CSIntrinsic,
1006 {Name, CFGHash, Builder.getInt32(C: TotalNumCallsites),
1007 Builder.getInt32(C: CallsiteIndex++),
1008 CB->getCalledOperand()});
1009 });
1010 }
1011
1012 uint32_t I = 0;
1013 if (PGOTemporalInstrumentation) {
1014 NumCounters += PGOBlockCoverage ? 8 : 1;
1015 auto &EntryBB = F.getEntryBlock();
1016 IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
1017 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
1018 // i32 <index>)
1019 Builder.CreateIntrinsic(ID: Intrinsic::instrprof_timestamp,
1020 Args: {NormalizedNamePtr, CFGHash,
1021 Builder.getInt32(C: NumCounters),
1022 Builder.getInt32(C: I)});
1023 I += PGOBlockCoverage ? 8 : 1;
1024 }
1025
1026 for (auto *InstrBB : InstrumentBBs) {
1027 IRBuilder<> Builder(InstrBB, InstrBB->getFirstNonPHIOrDbgOrAlloca());
1028 assert(Builder.GetInsertPoint() != InstrBB->end() &&
1029 "Cannot get the Instrumentation point");
1030 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
1031 // i32 <index>)
1032 Builder.CreateIntrinsic(ID: PGOBlockCoverage ? Intrinsic::instrprof_cover
1033 : Intrinsic::instrprof_increment,
1034 Args: {NormalizedNamePtr, CFGHash,
1035 Builder.getInt32(C: NumCounters),
1036 Builder.getInt32(C: I++)});
1037 }
1038
1039 // Now instrument select instructions:
1040 FuncInfo.SIVisitor.instrumentSelects(Ind: &I, TotalNC: NumCounters, FNV: Name,
1041 FHash: FuncInfo.FunctionHash);
1042 assert(I == NumCounters);
1043
1044 if (isValueProfilingDisabled())
1045 return;
1046
1047 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
1048
1049 // Intrinsic function calls do not have funclet operand bundles needed for
1050 // Windows exception handling attached to them. However, if value profiling is
1051 // inserted for one of these calls, then a funclet value will need to be set
1052 // on the instrumentation call based on the funclet coloring.
1053 DenseMap<BasicBlock *, ColorVector> BlockColors;
1054 if (F.hasPersonalityFn() &&
1055 isScopedEHPersonality(Pers: classifyEHPersonality(Pers: F.getPersonalityFn())))
1056 BlockColors = colorEHFunclets(F);
1057
1058 // For each VP Kind, walk the VP candidates and instrument each one.
1059 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
1060 unsigned SiteIndex = 0;
1061 if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
1062 continue;
1063
1064 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
1065 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
1066 << " site: CallSite Index = " << SiteIndex << "\n");
1067
1068 IRBuilder<> Builder(Cand.InsertPt);
1069 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
1070 "Cannot get the Instrumentation point");
1071
1072 Value *ToProfile = nullptr;
1073 if (Cand.V->getType()->isIntegerTy())
1074 ToProfile = Builder.CreateZExtOrTrunc(V: Cand.V, DestTy: Builder.getInt64Ty());
1075 else if (Cand.V->getType()->isPointerTy())
1076 ToProfile = Builder.CreatePtrToInt(V: Cand.V, DestTy: Builder.getInt64Ty());
1077 assert(ToProfile && "value profiling Value is of unexpected type");
1078
1079 auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1080 C: Name, Ty: PointerType::get(C&: M.getContext(), AddressSpace: 0));
1081
1082 SmallVector<OperandBundleDef, 1> OpBundles;
1083 populateEHOperandBundle(Cand, BlockColors, OpBundles);
1084 Builder.CreateCall(
1085 Callee: Intrinsic::getOrInsertDeclaration(M: &M,
1086 id: Intrinsic::instrprof_value_profile),
1087 Args: {NormalizedNamePtr, Builder.getInt64(C: FuncInfo.FunctionHash),
1088 ToProfile, Builder.getInt32(C: Kind), Builder.getInt32(C: SiteIndex++)},
1089 OpBundles);
1090 }
1091 } // IPVK_First <= Kind <= IPVK_Last
1092}
1093
1094namespace {
1095
1096// This class represents a CFG edge in profile use compilation.
1097struct PGOUseEdge : public PGOEdge {
1098 using PGOEdge::PGOEdge;
1099
1100 std::optional<uint64_t> Count;
1101
1102 // Set edge count value
1103 void setEdgeCount(uint64_t Value) { Count = Value; }
1104
1105 // Return the information string for this object.
1106 std::string infoString() const {
1107 if (!Count)
1108 return PGOEdge::infoString();
1109 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str();
1110 }
1111};
1112
1113using DirectEdges = SmallVector<PGOUseEdge *, 2>;
1114
1115// This class stores the auxiliary information for each BB.
1116struct PGOUseBBInfo : public PGOBBInfo {
1117 std::optional<uint64_t> Count;
1118 int32_t UnknownCountInEdge = 0;
1119 int32_t UnknownCountOutEdge = 0;
1120 DirectEdges InEdges;
1121 DirectEdges OutEdges;
1122
1123 PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {}
1124
1125 // Set the profile count value for this BB.
1126 void setBBInfoCount(uint64_t Value) { Count = Value; }
1127
1128 // Return the information string of this object.
1129 std::string infoString() const {
1130 if (!Count)
1131 return PGOBBInfo::infoString();
1132 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str();
1133 }
1134
1135 // Add an OutEdge and update the edge count.
1136 void addOutEdge(PGOUseEdge *E) {
1137 OutEdges.push_back(Elt: E);
1138 UnknownCountOutEdge++;
1139 }
1140
1141 // Add an InEdge and update the edge count.
1142 void addInEdge(PGOUseEdge *E) {
1143 InEdges.push_back(Elt: E);
1144 UnknownCountInEdge++;
1145 }
1146};
1147
1148} // end anonymous namespace
1149
1150// Sum up the count values for all the edges.
1151static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) {
1152 uint64_t Total = 0;
1153 for (const auto &E : Edges) {
1154 if (E->Removed)
1155 continue;
1156 if (E->Count)
1157 Total += *E->Count;
1158 }
1159 return Total;
1160}
1161
1162namespace {
1163
1164class PGOUseFunc {
1165public:
1166 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1167 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1168 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
1169 LoopInfo *LI, ProfileSummaryInfo *PSI, bool IsCS,
1170 bool InstrumentFuncEntry, bool InstrumentLoopEntries,
1171 bool HasSingleByteCoverage)
1172 : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1173 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, LI, IsCS,
1174 InstrumentFuncEntry, InstrumentLoopEntries,
1175 HasSingleByteCoverage),
1176 FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {}
1177
1178 void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
1179
1180 /// Get the profile record, assign it to \p ProfileRecord, handle errors if
1181 /// necessary, and assign \p ProgramMaxCount. \returns true if there are no
1182 /// errors.
1183 bool getRecord(IndexedInstrProfReader *PGOReader);
1184
1185 // Read counts for the instrumented BB from profile.
1186 bool readCounters(bool &AllZeros,
1187 InstrProfRecord::CountPseudoKind &PseudoKind);
1188
1189 // Populate the counts for all BBs.
1190 void populateCounters();
1191
1192 // Set block coverage based on profile coverage values.
1193 void populateCoverage();
1194
1195 // Set the branch weights based on the count values.
1196 void setBranchWeights();
1197
1198 // Annotate the value profile call sites for all value kind.
1199 void annotateValueSites();
1200
1201 // Annotate the value profile call sites for one value kind.
1202 void annotateValueSites(uint32_t Kind);
1203
1204 // Annotate the irreducible loop header weights.
1205 void annotateIrrLoopHeaderWeights();
1206
1207 // Annotate per-block uniformity info for offload profiling.
1208 void setBlockUniformityAttribute();
1209
1210 // The hotness of the function from the profile count.
1211 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1212
1213 // Return the function hotness from the profile.
1214 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1215
1216 // Return the function hash.
1217 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1218
1219 // Return the profile record for this function;
1220 NamedInstrProfRecord &getProfileRecord() { return ProfileRecord; }
1221
1222 // Return the auxiliary BB information.
1223 PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const {
1224 return FuncInfo.getBBInfo(BB);
1225 }
1226
1227 // Return the auxiliary BB information if available.
1228 PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const {
1229 return FuncInfo.findBBInfo(BB);
1230 }
1231
1232 Function &getFunc() const { return F; }
1233
1234 void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); }
1235
1236 uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1237
1238private:
1239 Function &F;
1240 Module *M;
1241 BlockFrequencyInfo *BFI;
1242 ProfileSummaryInfo *PSI;
1243
1244 // This member stores the shared information with class PGOGenFunc.
1245 FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1246
1247 // The maximum count value in the profile. This is only used in PGO use
1248 // compilation.
1249 uint64_t ProgramMaxCount;
1250
1251 // Position of counter that remains to be read.
1252 uint32_t CountPosition = 0;
1253
1254 // Total size of the profile count for this function.
1255 uint32_t ProfileCountSize = 0;
1256
1257 // ProfileRecord for this function.
1258 NamedInstrProfRecord ProfileRecord;
1259
1260 // Function hotness info derived from profile.
1261 FuncFreqAttr FreqAttr;
1262
1263 // Is to use the context sensitive profile.
1264 bool IsCS;
1265
1266 ValueProfileCollector VPC;
1267
1268 // Find the Instrumented BB and set the value. Return false on error.
1269 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1270
1271 // Set the edge counter value for the unknown edge -- there should be only
1272 // one unknown edge.
1273 void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1274
1275 // Set the hot/cold inline hints based on the count values.
1276 // FIXME: This function should be removed once the functionality in
1277 // the inliner is implemented.
1278 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1279 if (PSI->isHotCount(C: EntryCount))
1280 FreqAttr = FFA_Hot;
1281 else if (PSI->isColdCount(C: MaxCount))
1282 FreqAttr = FFA_Cold;
1283 }
1284};
1285
1286} // end anonymous namespace
1287
1288/// Set up InEdges/OutEdges for all BBs in the MST.
1289static void setupBBInfoEdges(
1290 const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1291 // This is not required when there is block coverage inference.
1292 if (FuncInfo.BCI)
1293 return;
1294 for (const auto &E : FuncInfo.MST.allEdges()) {
1295 if (E->Removed)
1296 continue;
1297 const BasicBlock *SrcBB = E->SrcBB;
1298 const BasicBlock *DestBB = E->DestBB;
1299 PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(BB: SrcBB);
1300 PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(BB: DestBB);
1301 SrcInfo.addOutEdge(E: E.get());
1302 DestInfo.addInEdge(E: E.get());
1303 }
1304}
1305
1306// Visit all the edges and assign the count value for the instrumented
1307// edges and the BB. Return false on error.
1308bool PGOUseFunc::setInstrumentedCounts(
1309 const std::vector<uint64_t> &CountFromProfile) {
1310
1311 std::vector<BasicBlock *> InstrumentBBs;
1312 FuncInfo.getInstrumentBBs(InstrumentBBs);
1313
1314 setupBBInfoEdges(FuncInfo);
1315
1316 unsigned NumInstrumentedBBs = InstrumentBBs.size();
1317 unsigned NumSelects = FuncInfo.SIVisitor.getNumOfSelectInsts();
1318 unsigned NumCounters = NumInstrumentedBBs + NumSelects;
1319 // The number of counters here should match the number of counters
1320 // in profile. Return if they mismatch.
1321 if (NumCounters != CountFromProfile.size()) {
1322 LLVM_DEBUG({
1323 dbgs() << "PGO COUNTER MISMATCH for function " << F.getName() << ":\n";
1324 dbgs() << " Expected counters: " << NumCounters << "\n";
1325 dbgs() << " - From instrumented edges: " << NumInstrumentedBBs << "\n";
1326 for (size_t i = 0; i < InstrumentBBs.size(); ++i) {
1327 dbgs() << " " << i << ": ";
1328 InstrumentBBs[i]->printAsOperand(dbgs(), false);
1329 dbgs() << "\n";
1330 }
1331 dbgs() << " - From select instructions: " << NumSelects << "\n";
1332 dbgs() << " Actual counters from profile: " << CountFromProfile.size()
1333 << "\n";
1334 });
1335 return false;
1336 }
1337 auto *FuncEntry = &*F.begin();
1338
1339 // Set the profile count to the Instrumented BBs.
1340 uint32_t I = 0;
1341 for (BasicBlock *InstrBB : InstrumentBBs) {
1342 uint64_t CountValue = CountFromProfile[I++];
1343 PGOUseBBInfo &Info = getBBInfo(BB: InstrBB);
1344 // If we reach here, we know that we have some nonzero count
1345 // values in this function. The entry count should not be 0.
1346 // Fix it if necessary.
1347 if (InstrBB == FuncEntry && CountValue == 0)
1348 CountValue = 1;
1349 Info.setBBInfoCount(CountValue);
1350 }
1351 ProfileCountSize = CountFromProfile.size();
1352 CountPosition = I;
1353
1354 // Set the edge count and update the count of unknown edges for BBs.
1355 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1356 E->setEdgeCount(Value);
1357 this->getBBInfo(BB: E->SrcBB).UnknownCountOutEdge--;
1358 this->getBBInfo(BB: E->DestBB).UnknownCountInEdge--;
1359 };
1360
1361 // Set the profile count the Instrumented edges. There are BBs that not in
1362 // MST but not instrumented. Need to set the edge count value so that we can
1363 // populate the profile counts later.
1364 for (const auto &E : FuncInfo.MST.allEdges()) {
1365 if (E->Removed || E->InMST)
1366 continue;
1367 const BasicBlock *SrcBB = E->SrcBB;
1368 PGOUseBBInfo &SrcInfo = getBBInfo(BB: SrcBB);
1369
1370 // If only one out-edge, the edge profile count should be the same as BB
1371 // profile count.
1372 if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)
1373 setEdgeCount(E.get(), *SrcInfo.Count);
1374 else {
1375 const BasicBlock *DestBB = E->DestBB;
1376 PGOUseBBInfo &DestInfo = getBBInfo(BB: DestBB);
1377 // If only one in-edge, the edge profile count should be the same as BB
1378 // profile count.
1379 if (DestInfo.Count && DestInfo.InEdges.size() == 1)
1380 setEdgeCount(E.get(), *DestInfo.Count);
1381 }
1382 if (E->Count)
1383 continue;
1384 // E's count should have been set from profile. If not, this meenas E skips
1385 // the instrumentation. We set the count to 0.
1386 setEdgeCount(E.get(), 0);
1387 }
1388 return true;
1389}
1390
1391// Set the count value for the unknown edge. There should be one and only one
1392// unknown edge in Edges vector.
1393void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1394 for (auto &E : Edges) {
1395 if (E->Count)
1396 continue;
1397 E->setEdgeCount(Value);
1398
1399 getBBInfo(BB: E->SrcBB).UnknownCountOutEdge--;
1400 getBBInfo(BB: E->DestBB).UnknownCountInEdge--;
1401 return;
1402 }
1403 llvm_unreachable("Cannot find the unknown count edge");
1404}
1405
1406// Emit function metadata indicating PGO profile mismatch.
1407static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx) {
1408 const char MetadataName[] = "instr_prof_hash_mismatch";
1409 SmallVector<Metadata *, 2> Names;
1410 // If this metadata already exists, ignore.
1411 auto *Existing = F.getMetadata(KindID: LLVMContext::MD_annotation);
1412 if (Existing) {
1413 MDTuple *Tuple = cast<MDTuple>(Val: Existing);
1414 for (const auto &N : Tuple->operands()) {
1415 if (N.equalsStr(Str: MetadataName))
1416 return;
1417 Names.push_back(Elt: N.get());
1418 }
1419 }
1420
1421 MDBuilder MDB(ctx);
1422 Names.push_back(Elt: MDB.createString(Str: MetadataName));
1423 MDNode *MD = MDTuple::get(Context&: ctx, MDs: Names);
1424 F.setMetadata(KindID: LLVMContext::MD_annotation, Node: MD);
1425}
1426
1427void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
1428 handleAllErrors(E: std::move(Err), Handlers: [&](const InstrProfError &IPE) {
1429 auto &Ctx = M->getContext();
1430 auto Err = IPE.get();
1431 bool SkipWarning = false;
1432 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1433 << FuncInfo.FuncName << ": ");
1434 if (Err == instrprof_error::unknown_function) {
1435 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1436 SkipWarning = !PGOWarnMissing;
1437 LLVM_DEBUG(dbgs() << "unknown function");
1438 } else if (Err == instrprof_error::hash_mismatch ||
1439 Err == instrprof_error::malformed) {
1440 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1441 SkipWarning =
1442 NoPGOWarnMismatch ||
1443 (NoPGOWarnMismatchComdatWeak &&
1444 (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
1445 F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
1446 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
1447 << " skip=" << SkipWarning << ")");
1448 // Emit function metadata indicating PGO profile mismatch.
1449 annotateFunctionWithHashMismatch(F, ctx&: M->getContext());
1450 }
1451
1452 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1453 if (SkipWarning)
1454 return;
1455
1456 std::string Msg =
1457 IPE.message() + std::string(" ") + F.getName().str() +
1458 std::string(" Hash = ") + std::to_string(val: FuncInfo.FunctionHash) +
1459 std::string(" up to ") + std::to_string(val: MismatchedFuncSum) +
1460 std::string(" count discarded");
1461
1462 Ctx.diagnose(
1463 DI: DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1464 });
1465}
1466
1467bool PGOUseFunc::getRecord(IndexedInstrProfReader *PGOReader) {
1468 uint64_t MismatchedFuncSum = 0;
1469 auto Result = PGOReader->getInstrProfRecord(
1470 FuncName: FuncInfo.FuncName, FuncHash: FuncInfo.FunctionHash, DeprecatedFuncName: FuncInfo.DeprecatedFuncName,
1471 MismatchedFuncSum: &MismatchedFuncSum);
1472 if (Error E = Result.takeError()) {
1473 handleInstrProfError(Err: std::move(E), MismatchedFuncSum);
1474 return false;
1475 }
1476 ProfileRecord = std::move(Result.get());
1477 ProgramMaxCount = PGOReader->getMaximumFunctionCount(UseCS: IsCS);
1478 return true;
1479}
1480
1481// Read the profile from ProfileFileName and assign the value to the
1482// instrumented BB and the edges. Return true if the profile are successfully
1483// read, and false on errors.
1484bool PGOUseFunc::readCounters(bool &AllZeros,
1485 InstrProfRecord::CountPseudoKind &PseudoKind) {
1486 auto &Ctx = M->getContext();
1487 PseudoKind = ProfileRecord.getCountPseudoKind();
1488 if (PseudoKind != InstrProfRecord::NotPseudo) {
1489 return true;
1490 }
1491 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1492
1493 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1494 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1495
1496 uint64_t ValueSum = 0;
1497 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1498 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1499 ValueSum += CountFromProfile[I];
1500 }
1501 AllZeros = (ValueSum == 0);
1502
1503 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1504
1505 getBBInfo(BB: nullptr).UnknownCountOutEdge = 2;
1506 getBBInfo(BB: nullptr).UnknownCountInEdge = 2;
1507
1508 if (!setInstrumentedCounts(CountFromProfile)) {
1509 LLVM_DEBUG(
1510 dbgs() << "Inconsistent number of counts, skipping this function");
1511 Ctx.diagnose(DI: DiagnosticInfoPGOProfile(
1512 M->getName().data(),
1513 Twine("Inconsistent number of counts in ") + F.getName().str() +
1514 Twine(": the profile may be stale or there is a function name "
1515 "collision."),
1516 DS_Warning));
1517 return false;
1518 }
1519 return true;
1520}
1521
1522void PGOUseFunc::populateCoverage() {
1523 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1524
1525 ArrayRef<uint64_t> CountsFromProfile = ProfileRecord.Counts;
1526 DenseMap<const BasicBlock *, bool> Coverage;
1527 unsigned Index = 0;
1528 for (auto &BB : F)
1529 if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1530 Coverage[&BB] = (CountsFromProfile[Index++] != 0);
1531 assert(Index == CountsFromProfile.size());
1532
1533 // For each B in InverseDependencies[A], if A is covered then B is covered.
1534 DenseMap<const BasicBlock *, DenseSet<const BasicBlock *>>
1535 InverseDependencies;
1536 for (auto &BB : F) {
1537 for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1538 // If Dep is covered then BB is covered.
1539 InverseDependencies[Dep].insert(V: &BB);
1540 }
1541 }
1542
1543 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm.
1544 std::stack<const BasicBlock *> CoveredBlocksToProcess;
1545 for (auto &[BB, IsCovered] : Coverage)
1546 if (IsCovered)
1547 CoveredBlocksToProcess.push(x: BB);
1548
1549 while (!CoveredBlocksToProcess.empty()) {
1550 auto *CoveredBlock = CoveredBlocksToProcess.top();
1551 assert(Coverage[CoveredBlock]);
1552 CoveredBlocksToProcess.pop();
1553 for (auto *BB : InverseDependencies[CoveredBlock]) {
1554 // If CoveredBlock is covered then BB is covered.
1555 bool &Cov = Coverage[BB];
1556 if (Cov)
1557 continue;
1558 Cov = true;
1559 CoveredBlocksToProcess.push(x: BB);
1560 }
1561 }
1562
1563 // Annotate block coverage.
1564 MDBuilder MDB(F.getContext());
1565 // We set the entry count to 10000 if the entry block is covered so that BFI
1566 // can propagate a fraction of this count to the other covered blocks.
1567 F.setEntryCount(Count: Coverage[&F.getEntryBlock()] ? 10000 : 0);
1568 for (auto &BB : F) {
1569 // For a block A and its successor B, we set the edge weight as follows:
1570 // If A is covered and B is covered, set weight=1.
1571 // If A is covered and B is uncovered, set weight=0.
1572 // If A is uncovered, set weight=1.
1573 // This setup will allow BFI to give nonzero profile counts to only covered
1574 // blocks.
1575 SmallVector<uint32_t, 4> Weights;
1576 for (auto *Succ : successors(BB: &BB))
1577 Weights.push_back(Elt: (Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1578 if (Weights.size() >= 2)
1579 llvm::setBranchWeights(I&: *BB.getTerminator(), Weights,
1580 /*IsExpected=*/false);
1581 }
1582
1583 unsigned NumCorruptCoverage = 0;
1584 DominatorTree DT(F);
1585 LoopInfo LI(DT);
1586 BranchProbabilityInfo BPI(F, LI);
1587 BlockFrequencyInfo BFI(F, BPI, LI);
1588 auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> {
1589 if (auto C = BFI.getBlockProfileCount(BB: &BB))
1590 return C == 0;
1591 return {};
1592 };
1593 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
1594 for (auto &BB : F) {
1595 LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ")
1596 << (Coverage[&BB] ? "X " : " ") << " " << BB.getName()
1597 << "\n");
1598 // In some cases it is possible to find a covered block that has no covered
1599 // successors, e.g., when a block calls a function that may call exit(). In
1600 // those cases, BFI could find its successor to be covered while BCI could
1601 // find its successor to be dead.
1602 const bool &Cov = Coverage[&BB];
1603 if (Cov == IsBlockDead(BB).value_or(u: false)) {
1604 LLVM_DEBUG(
1605 dbgs() << "Found inconsistent block covearge for " << BB.getName()
1606 << ": BCI=" << (Cov ? "Covered" : "Dead") << " BFI="
1607 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");
1608 ++NumCorruptCoverage;
1609 }
1610 if (Cov)
1611 ++NumCoveredBlocks;
1612 }
1613 if (PGOVerifyBFI && NumCorruptCoverage) {
1614 auto &Ctx = M->getContext();
1615 Ctx.diagnose(DI: DiagnosticInfoPGOProfile(
1616 M->getName().data(),
1617 Twine("Found inconsistent block coverage for function ") + F.getName() +
1618 " in " + Twine(NumCorruptCoverage) + " blocks.",
1619 DS_Warning));
1620 }
1621 if (PGOViewBlockCoverageGraph)
1622 FuncInfo.BCI->viewBlockCoverageGraph(Coverage: &Coverage);
1623}
1624
1625// Populate the counters from instrumented BBs to all BBs.
1626// In the end of this operation, all BBs should have a valid count value.
1627void PGOUseFunc::populateCounters() {
1628 bool Changes = true;
1629 unsigned NumPasses = 0;
1630 while (Changes) {
1631 NumPasses++;
1632 Changes = false;
1633
1634 // For efficient traversal, it's better to start from the end as most
1635 // of the instrumented edges are at the end.
1636 for (auto &BB : reverse(C&: F)) {
1637 PGOUseBBInfo *UseBBInfo = findBBInfo(BB: &BB);
1638 if (UseBBInfo == nullptr)
1639 continue;
1640 if (!UseBBInfo->Count) {
1641 if (UseBBInfo->UnknownCountOutEdge == 0) {
1642 UseBBInfo->Count = sumEdgeCount(Edges: UseBBInfo->OutEdges);
1643 Changes = true;
1644 } else if (UseBBInfo->UnknownCountInEdge == 0) {
1645 UseBBInfo->Count = sumEdgeCount(Edges: UseBBInfo->InEdges);
1646 Changes = true;
1647 }
1648 }
1649 if (UseBBInfo->Count) {
1650 if (UseBBInfo->UnknownCountOutEdge == 1) {
1651 uint64_t Total = 0;
1652 uint64_t OutSum = sumEdgeCount(Edges: UseBBInfo->OutEdges);
1653 // If the one of the successor block can early terminate (no-return),
1654 // we can end up with situation where out edge sum count is larger as
1655 // the source BB's count is collected by a post-dominated block.
1656 if (*UseBBInfo->Count > OutSum)
1657 Total = *UseBBInfo->Count - OutSum;
1658 setEdgeCount(Edges&: UseBBInfo->OutEdges, Value: Total);
1659 Changes = true;
1660 }
1661 if (UseBBInfo->UnknownCountInEdge == 1) {
1662 uint64_t Total = 0;
1663 uint64_t InSum = sumEdgeCount(Edges: UseBBInfo->InEdges);
1664 if (*UseBBInfo->Count > InSum)
1665 Total = *UseBBInfo->Count - InSum;
1666 setEdgeCount(Edges&: UseBBInfo->InEdges, Value: Total);
1667 Changes = true;
1668 }
1669 }
1670 }
1671 }
1672
1673 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1674 (void)NumPasses;
1675#ifndef NDEBUG
1676 // Assert every BB has a valid counter.
1677 for (auto &BB : F) {
1678 auto BI = findBBInfo(&BB);
1679 if (BI == nullptr)
1680 continue;
1681 assert(BI->Count && "BB count is not valid");
1682 }
1683#endif
1684 // Now annotate select instructions. This may fixup impossible block counts.
1685 FuncInfo.SIVisitor.annotateSelects(UF: this, Ind: &CountPosition);
1686 assert(CountPosition == ProfileCountSize);
1687
1688 uint64_t FuncEntryCount = *getBBInfo(BB: &*F.begin()).Count;
1689 uint64_t FuncMaxCount = FuncEntryCount;
1690 for (auto &BB : F) {
1691 auto BI = findBBInfo(BB: &BB);
1692 if (BI == nullptr)
1693 continue;
1694 FuncMaxCount = std::max(a: FuncMaxCount, b: *BI->Count);
1695 }
1696
1697 // Fix the obviously inconsistent entry count.
1698 if (FuncMaxCount > 0 && FuncEntryCount == 0)
1699 FuncEntryCount = 1;
1700 F.setEntryCount(Count: FuncEntryCount);
1701 markFunctionAttributes(EntryCount: FuncEntryCount, MaxCount: FuncMaxCount);
1702
1703 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1704}
1705
1706// Assign the scaled count values to the BB with multiple out edges.
1707void PGOUseFunc::setBranchWeights() {
1708 // Generate MD_prof metadata for every branch instruction.
1709 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1710 << " IsCS=" << IsCS << "\n");
1711 for (auto &BB : F) {
1712 Instruction *TI = BB.getTerminator();
1713 if (TI->getNumSuccessors() < 2)
1714 continue;
1715 if (!(isa<CondBrInst>(Val: TI) || isa<SwitchInst>(Val: TI) ||
1716 isa<IndirectBrInst>(Val: TI) || isa<InvokeInst>(Val: TI) ||
1717 isa<CallBrInst>(Val: TI)))
1718 continue;
1719
1720 const PGOUseBBInfo &BBCountInfo = getBBInfo(BB: &BB);
1721 if (!*BBCountInfo.Count)
1722 continue;
1723
1724 // We have a non-zero Branch BB.
1725
1726 // SuccessorCount can be greater than OutEdgesCount, because
1727 // removed edges don't appear in OutEdges.
1728 unsigned OutEdgesCount = BBCountInfo.OutEdges.size();
1729 unsigned SuccessorCount = BB.getTerminator()->getNumSuccessors();
1730 assert(OutEdgesCount <= SuccessorCount);
1731
1732 SmallVector<uint64_t, 2> EdgeCounts(SuccessorCount, 0);
1733 uint64_t MaxCount = 0;
1734 for (unsigned It = 0; It < OutEdgesCount; It++) {
1735 const PGOUseEdge *E = BBCountInfo.OutEdges[It];
1736 const BasicBlock *SrcBB = E->SrcBB;
1737 const BasicBlock *DestBB = E->DestBB;
1738 if (DestBB == nullptr)
1739 continue;
1740 unsigned SuccNum = GetSuccessorNumber(BB: SrcBB, Succ: DestBB);
1741 uint64_t EdgeCount = *E->Count;
1742 if (EdgeCount > MaxCount)
1743 MaxCount = EdgeCount;
1744 EdgeCounts[SuccNum] = EdgeCount;
1745 }
1746
1747 if (MaxCount)
1748 setProfMetadata(TI, EdgeCounts, MaxCount);
1749 else {
1750 // A zero MaxCount can come about when we have a BB with a positive
1751 // count, and whose successor blocks all have 0 count. This can happen
1752 // when there is no exit block and the code exits via a noreturn function.
1753 auto &Ctx = M->getContext();
1754 Ctx.diagnose(DI: DiagnosticInfoPGOProfile(
1755 M->getName().data(),
1756 Twine("Profile in ") + F.getName().str() +
1757 Twine(" partially ignored") +
1758 Twine(", possibly due to the lack of a return path."),
1759 DS_Warning));
1760 }
1761 }
1762}
1763
1764static bool isIndirectBrTarget(BasicBlock *BB) {
1765 for (BasicBlock *Pred : predecessors(BB)) {
1766 if (isa<IndirectBrInst>(Val: Pred->getTerminator()))
1767 return true;
1768 }
1769 return false;
1770}
1771
1772void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1773 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1774 // Find irr loop headers
1775 for (auto &BB : F) {
1776 // As a heuristic also annotate indrectbr targets as they have a high chance
1777 // to become an irreducible loop header after the indirectbr tail
1778 // duplication.
1779 if (BFI->isIrrLoopHeader(BB: &BB) || isIndirectBrTarget(BB: &BB)) {
1780 Instruction *TI = BB.getTerminator();
1781 const PGOUseBBInfo &BBCountInfo = getBBInfo(BB: &BB);
1782 setIrrLoopHeaderMetadata(M, TI, Count: *BBCountInfo.Count);
1783 }
1784 }
1785}
1786
1787void PGOUseFunc::setBlockUniformityAttribute() {
1788 if (ProfileRecord.UniformityBits.empty())
1789 return;
1790
1791 // Annotate uniformity on each instrumented IR basic block so later codegen
1792 // passes (MachineFunction) can consume it without relying on fragile block
1793 // numbering heuristics.
1794 //
1795 // Metadata kind: LLVMContext::MD_block_uniformity_profile
1796 // Payload: i1 (true = uniform, false = divergent)
1797
1798 std::vector<BasicBlock *> InstrumentBBs;
1799 FuncInfo.getInstrumentBBs(InstrumentBBs);
1800
1801 LLVMContext &Ctx = F.getContext();
1802 Type *Int1Ty = Type::getInt1Ty(C&: Ctx);
1803
1804 for (size_t I = 0, E = InstrumentBBs.size(); I < E; ++I) {
1805 BasicBlock *BB = InstrumentBBs[I];
1806 if (!BB || !BB->getTerminator())
1807 continue;
1808 bool IsUniform = ProfileRecord.isBlockUniform(BlockIdx: I);
1809 auto *MD = MDNode::get(
1810 Context&: Ctx, MDs: ConstantAsMetadata::get(C: ConstantInt::get(Ty: Int1Ty, V: IsUniform)));
1811 BB->getTerminator()->setMetadata(KindID: LLVMContext::MD_block_uniformity_profile,
1812 Node: MD);
1813 }
1814
1815 LLVM_DEBUG({
1816 dbgs() << "PGO: Set block uniformity profile for " << F.getName() << ": ";
1817 for (size_t I = 0, E = InstrumentBBs.size(); I < E; ++I)
1818 dbgs() << (ProfileRecord.isBlockUniform(I) ? 'U' : 'D');
1819 dbgs() << "\n";
1820 });
1821}
1822
1823void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1824 Module *M = F.getParent();
1825 IRBuilder<> Builder(&SI);
1826 Type *Int64Ty = Builder.getInt64Ty();
1827 auto *Step = Builder.CreateZExt(V: SI.getCondition(), DestTy: Int64Ty);
1828 auto *NormalizedFuncNameVarPtr =
1829 ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1830 C: FuncNameVar, Ty: PointerType::get(C&: M->getContext(), AddressSpace: 0));
1831 Builder.CreateIntrinsic(ID: Intrinsic::instrprof_increment_step,
1832 Args: {NormalizedFuncNameVarPtr, Builder.getInt64(C: FuncHash),
1833 Builder.getInt32(C: TotalNumCtrs),
1834 Builder.getInt32(C: *CurCtrIdx), Step});
1835 ++(*CurCtrIdx);
1836}
1837
1838void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1839 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1840 assert(*CurCtrIdx < CountFromProfile.size() &&
1841 "Out of bound access of counters");
1842 uint64_t SCounts[2];
1843 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1844 ++(*CurCtrIdx);
1845 uint64_t TotalCount = 0;
1846 auto BI = UseFunc->findBBInfo(BB: SI.getParent());
1847 if (BI != nullptr) {
1848 TotalCount = *BI->Count;
1849
1850 // Fix the block count if it is impossible.
1851 if (TotalCount < SCounts[0])
1852 BI->Count = SCounts[0];
1853 }
1854 // False Count
1855 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1856 uint64_t MaxCount = std::max(a: SCounts[0], b: SCounts[1]);
1857 if (MaxCount)
1858 setProfMetadata(TI: &SI, EdgeCounts: SCounts, MaxCount);
1859}
1860
1861void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1862 if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)
1863 return;
1864 // FIXME: do not handle this yet.
1865 if (SI.getCondition()->getType()->isVectorTy())
1866 return;
1867
1868 switch (Mode) {
1869 case VM_counting:
1870 NSIs++;
1871 return;
1872 case VM_instrument:
1873 instrumentOneSelectInst(SI);
1874 return;
1875 case VM_annotate:
1876 annotateOneSelectInst(SI);
1877 return;
1878 }
1879
1880 llvm_unreachable("Unknown visiting mode");
1881}
1882
1883static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind) {
1884 if (ValueProfKind == IPVK_MemOPSize)
1885 return MaxNumMemOPAnnotations;
1886 if (ValueProfKind == llvm::IPVK_VTableTarget)
1887 return MaxNumVTableAnnotations;
1888 return MaxNumAnnotations;
1889}
1890
1891// Traverse all valuesites and annotate the instructions for all value kind.
1892void PGOUseFunc::annotateValueSites() {
1893 if (DisableValueProfiling)
1894 return;
1895
1896 // Create the PGOFuncName meta data.
1897 createPGOFuncNameMetadata(F, PGOFuncName: FuncInfo.FuncName);
1898
1899 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1900 annotateValueSites(Kind);
1901}
1902
1903// Annotate the instructions for a specific value kind.
1904void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1905 assert(Kind <= IPVK_Last);
1906 unsigned ValueSiteIndex = 0;
1907
1908 unsigned NumValueSites = ProfileRecord.getNumValueSites(ValueKind: Kind);
1909
1910 // Since there isn't a reliable or fast way for profile reader to tell if a
1911 // profile is generated with `-enable-vtable-value-profiling` on, we run the
1912 // value profile collector over the function IR to find the instrumented sites
1913 // iff function profile records shows the number of instrumented vtable sites
1914 // is not zero. Function cfg already takes the number of instrumented
1915 // indirect call sites into account so it doesn't hash the number of
1916 // instrumented vtables; as a side effect it makes it easier to enable
1917 // profiling and profile use in two steps if needed.
1918 // TODO: Remove this if/when -enable-vtable-value-profiling is on by default.
1919 if (NumValueSites > 0 && Kind == IPVK_VTableTarget &&
1920 NumValueSites != FuncInfo.ValueSites[IPVK_VTableTarget].size() &&
1921 MaxNumVTableAnnotations != 0)
1922 FuncInfo.ValueSites[IPVK_VTableTarget] = VPC.get(Kind: IPVK_VTableTarget);
1923 auto &ValueSites = FuncInfo.ValueSites[Kind];
1924 if (NumValueSites != ValueSites.size()) {
1925 auto &Ctx = M->getContext();
1926 Ctx.diagnose(DI: DiagnosticInfoPGOProfile(
1927 M->getName().data(),
1928 Twine("Inconsistent number of value sites for ") +
1929 Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") +
1930 F.getName().str() +
1931 Twine("\", possibly due to the use of a stale profile."),
1932 DS_Warning));
1933 return;
1934 }
1935
1936 for (VPCandidateInfo &I : ValueSites) {
1937 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1938 << "): Index = " << ValueSiteIndex << " out of "
1939 << NumValueSites << "\n");
1940 annotateValueSite(
1941 M&: *M, Inst&: *I.AnnotatedInst, InstrProfR: ProfileRecord,
1942 ValueKind: static_cast<InstrProfValueKind>(Kind), SiteIndx: ValueSiteIndex,
1943 MaxMDCount: getMaxNumAnnotations(ValueProfKind: static_cast<InstrProfValueKind>(Kind)));
1944 ValueSiteIndex++;
1945 }
1946}
1947
1948// Collect the set of members for each Comdat in module M and store
1949// in ComdatMembers.
1950static void collectComdatMembers(
1951 Module &M,
1952 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1953 if (!DoComdatRenaming)
1954 return;
1955 for (Function &F : M)
1956 if (Comdat *C = F.getComdat())
1957 ComdatMembers.insert(x: std::make_pair(x&: C, y: &F));
1958 for (GlobalVariable &GV : M.globals())
1959 if (Comdat *C = GV.getComdat())
1960 ComdatMembers.insert(x: std::make_pair(x&: C, y: &GV));
1961 for (GlobalAlias &GA : M.aliases())
1962 if (Comdat *C = GA.getComdat())
1963 ComdatMembers.insert(x: std::make_pair(x&: C, y: &GA));
1964}
1965
1966// Return true if we should not find instrumentation data for this function
1967static bool skipPGOUse(const Function &F) {
1968 if (F.isDeclaration())
1969 return true;
1970 // If there are too many critical edges, PGO might cause
1971 // compiler time problem. Skip PGO if the number of
1972 // critical edges execeed the threshold.
1973 unsigned NumCriticalEdges = 0;
1974 for (auto &BB : F) {
1975 const Instruction *TI = BB.getTerminator();
1976 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
1977 if (isCriticalEdge(TI, SuccNum: I))
1978 NumCriticalEdges++;
1979 }
1980 }
1981 if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {
1982 LLVM_DEBUG(dbgs() << "In func " << F.getName()
1983 << ", NumCriticalEdges=" << NumCriticalEdges
1984 << " exceed the threshold. Skip PGO.\n");
1985 return true;
1986 }
1987 return false;
1988}
1989
1990// Return true if we should not instrument this function
1991static bool skipPGOGen(const Function &F) {
1992 if (skipPGOUse(F))
1993 return true;
1994 if (F.hasFnAttribute(Kind: llvm::Attribute::Naked))
1995 return true;
1996 if (F.hasFnAttribute(Kind: llvm::Attribute::NoProfile))
1997 return true;
1998 if (F.hasFnAttribute(Kind: llvm::Attribute::SkipProfile))
1999 return true;
2000 if (F.getInstructionCount() < PGOFunctionSizeThreshold)
2001 return true;
2002 if (PGOInstrumentColdFunctionOnly) {
2003 if (auto EntryCount = F.getEntryCount())
2004 return *EntryCount > PGOColdInstrumentEntryThreshold;
2005 return !PGOTreatUnknownAsCold;
2006 }
2007 return false;
2008}
2009
2010static bool InstrumentAllFunctions(
2011 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
2012 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
2013 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
2014 function_ref<LoopInfo *(Function &)> LookupLI,
2015 PGOInstrumentationType InstrumentationType) {
2016 // For the context-sensitive instrumentation, we should have a separated pass
2017 // (before LTO/ThinLTO linking) to create these variables.
2018 if (InstrumentationType == PGOInstrumentationType::FDO)
2019 createIRLevelProfileFlagVar(M, InstrumentationType);
2020
2021 Triple TT(M.getTargetTriple());
2022 LLVMContext &Ctx = M.getContext();
2023 if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling)
2024 Ctx.diagnose(DI: DiagnosticInfoPGOProfile(
2025 M.getName().data(),
2026 Twine("VTable value profiling is presently not "
2027 "supported for non-ELF object formats"),
2028 DS_Warning));
2029 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2030 collectComdatMembers(M, ComdatMembers);
2031
2032 for (auto &F : M) {
2033 if (skipPGOGen(F))
2034 continue;
2035 TargetLibraryInfo &TLI = LookupTLI(F);
2036 BranchProbabilityInfo *BPI = LookupBPI(F);
2037 BlockFrequencyInfo *BFI = LookupBFI(F);
2038 LoopInfo *LI = LookupLI(F);
2039 FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI, LI,
2040 InstrumentationType);
2041 FI.instrument();
2042 }
2043 return true;
2044}
2045
2046PreservedAnalyses
2047PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &MAM) {
2048 createProfileFileNameVar(M, InstrProfileOutput: CSInstrName);
2049 // The variable in a comdat may be discarded by LTO. Ensure the declaration
2050 // will be retained.
2051 appendToCompilerUsed(
2052 M, Values: createIRLevelProfileFlagVar(M, InstrumentationType: PGOInstrumentationType::CSFDO));
2053 if (ProfileSampling)
2054 createProfileSamplingVar(M);
2055 PreservedAnalyses PA;
2056 PA.preserve<FunctionAnalysisManagerModuleProxy>();
2057 PA.preserveSet<AllAnalysesOn<Function>>();
2058 return PA;
2059}
2060
2061PreservedAnalyses PGOInstrumentationGen::run(Module &M,
2062 ModuleAnalysisManager &MAM) {
2063 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
2064 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2065 return FAM.getResult<TargetLibraryAnalysis>(IR&: F);
2066 };
2067 auto LookupBPI = [&FAM](Function &F) {
2068 return &FAM.getResult<BranchProbabilityAnalysis>(IR&: F);
2069 };
2070 auto LookupBFI = [&FAM](Function &F) {
2071 return &FAM.getResult<BlockFrequencyAnalysis>(IR&: F);
2072 };
2073 auto LookupLI = [&FAM](Function &F) {
2074 return &FAM.getResult<LoopAnalysis>(IR&: F);
2075 };
2076
2077 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, LookupLI,
2078 InstrumentationType))
2079 return PreservedAnalyses::all();
2080
2081 return PreservedAnalyses::none();
2082}
2083
2084// Using the ratio b/w sums of profile count values and BFI count values to
2085// adjust the func entry count.
2086static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
2087 BranchProbabilityInfo &NBPI) {
2088 Function &F = Func.getFunc();
2089 BlockFrequencyInfo NBFI(F, NBPI, LI);
2090#ifndef NDEBUG
2091 auto BFIEntryCount = F.getEntryCount();
2092 assert(BFIEntryCount && (*BFIEntryCount > 0) && "Invalid BFI Entrycount");
2093#endif
2094 auto SumCount = APFloat::getZero(Sem: APFloat::IEEEdouble());
2095 auto SumBFICount = APFloat::getZero(Sem: APFloat::IEEEdouble());
2096 for (auto &BBI : F) {
2097 uint64_t CountValue = 0;
2098 uint64_t BFICountValue = 0;
2099 if (!Func.findBBInfo(BB: &BBI))
2100 continue;
2101 auto BFICount = NBFI.getBlockProfileCount(BB: &BBI);
2102 CountValue = *Func.getBBInfo(BB: &BBI).Count;
2103 BFICountValue = *BFICount;
2104 SumCount.add(RHS: APFloat(CountValue * 1.0), RM: APFloat::rmNearestTiesToEven);
2105 SumBFICount.add(RHS: APFloat(BFICountValue * 1.0), RM: APFloat::rmNearestTiesToEven);
2106 }
2107 if (SumCount.isZero())
2108 return;
2109
2110 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
2111 "Incorrect sum of BFI counts");
2112 if (SumBFICount.compare(RHS: SumCount) == APFloat::cmpEqual)
2113 return;
2114 double Scale = (SumCount / SumBFICount).convertToDouble();
2115 if (Scale < 1.001 && Scale > 0.999)
2116 return;
2117
2118 uint64_t FuncEntryCount = *Func.getBBInfo(BB: &*F.begin()).Count;
2119 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
2120 if (NewEntryCount == 0)
2121 NewEntryCount = 1;
2122 if (NewEntryCount != FuncEntryCount) {
2123 F.setEntryCount(Count: NewEntryCount);
2124 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
2125 << ", entry_count " << FuncEntryCount << " --> "
2126 << NewEntryCount << "\n");
2127 }
2128}
2129
2130// Compare the profile count values with BFI count values, and print out
2131// the non-matching ones.
2132static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
2133 BranchProbabilityInfo &NBPI,
2134 uint64_t HotCountThreshold,
2135 uint64_t ColdCountThreshold) {
2136 Function &F = Func.getFunc();
2137 BlockFrequencyInfo NBFI(F, NBPI, LI);
2138 // bool PrintFunc = false;
2139 bool HotBBOnly = PGOVerifyHotBFI;
2140 StringRef Msg;
2141 OptimizationRemarkEmitter ORE(&F);
2142
2143 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
2144 for (auto &BBI : F) {
2145 PGOUseBBInfo *BBInfo = Func.findBBInfo(BB: &BBI);
2146 if (!BBInfo)
2147 continue;
2148
2149 uint64_t CountValue = BBInfo->Count.value_or(u&: CountValue);
2150 uint64_t BFICountValue = 0;
2151
2152 BBNum++;
2153 if (CountValue)
2154 NonZeroBBNum++;
2155 auto BFICount = NBFI.getBlockProfileCount(BB: &BBI);
2156 if (BFICount)
2157 BFICountValue = *BFICount;
2158
2159 if (HotBBOnly) {
2160 bool rawIsHot = CountValue >= HotCountThreshold;
2161 bool BFIIsHot = BFICountValue >= HotCountThreshold;
2162 bool rawIsCold = CountValue <= ColdCountThreshold;
2163 bool ShowCount = false;
2164 if (rawIsHot && !BFIIsHot) {
2165 Msg = "raw-Hot to BFI-nonHot";
2166 ShowCount = true;
2167 } else if (rawIsCold && BFIIsHot) {
2168 Msg = "raw-Cold to BFI-Hot";
2169 ShowCount = true;
2170 }
2171 if (!ShowCount)
2172 continue;
2173 } else {
2174 if ((CountValue < PGOVerifyBFICutoff) &&
2175 (BFICountValue < PGOVerifyBFICutoff))
2176 continue;
2177 uint64_t Diff = (BFICountValue >= CountValue)
2178 ? BFICountValue - CountValue
2179 : CountValue - BFICountValue;
2180 if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
2181 continue;
2182 }
2183 BBMisMatchNum++;
2184
2185 ORE.emit(RemarkBuilder: [&]() {
2186 OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify",
2187 F.getSubprogram(), &BBI);
2188 Remark << "BB " << ore::NV("Block", BBI.getName())
2189 << " Count=" << ore::NV("Count", CountValue)
2190 << " BFI_Count=" << ore::NV("Count", BFICountValue);
2191 if (!Msg.empty())
2192 Remark << " (" << Msg << ")";
2193 return Remark;
2194 });
2195 }
2196 if (BBMisMatchNum)
2197 ORE.emit(RemarkBuilder: [&]() {
2198 return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
2199 F.getSubprogram(), &F.getEntryBlock())
2200 << "In Func " << ore::NV("Function", F.getName())
2201 << ": Num_of_BB=" << ore::NV("Count", BBNum)
2202 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
2203 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
2204 });
2205}
2206
2207static bool annotateAllFunctions(
2208 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
2209 vfs::FileSystem &FS,
2210 function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
2211 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
2212 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
2213 function_ref<LoopInfo *(Function &)> LookupLI, ProfileSummaryInfo *PSI,
2214 bool IsCS) {
2215 LLVM_DEBUG(dbgs() << "Read in profile counters: ");
2216 auto &Ctx = M.getContext();
2217 // Read the counter array from file.
2218 auto ReaderOrErr = IndexedInstrProfReader::create(Path: ProfileFileName, FS,
2219 RemappingPath: ProfileRemappingFileName);
2220 if (Error E = ReaderOrErr.takeError()) {
2221 handleAllErrors(E: std::move(E), Handlers: [&](const ErrorInfoBase &EI) {
2222 Ctx.diagnose(
2223 DI: DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
2224 });
2225 return false;
2226 }
2227
2228 std::unique_ptr<IndexedInstrProfReader> PGOReader =
2229 std::move(ReaderOrErr.get());
2230 if (!PGOReader) {
2231 Ctx.diagnose(DI: DiagnosticInfoPGOProfile(ProfileFileName.data(),
2232 StringRef("Cannot get PGOReader")));
2233 return false;
2234 }
2235 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2236 return false;
2237
2238 // TODO: might need to change the warning once the clang option is finalized.
2239 if (!PGOReader->isIRLevelProfile()) {
2240 Ctx.diagnose(DI: DiagnosticInfoPGOProfile(
2241 ProfileFileName.data(), "Not an IR level instrumentation profile"));
2242 return false;
2243 }
2244 if (PGOReader->functionEntryOnly()) {
2245 Ctx.diagnose(DI: DiagnosticInfoPGOProfile(
2246 ProfileFileName.data(),
2247 "Function entry profiles are not yet supported for optimization"));
2248 return false;
2249 }
2250
2251 if (EnableVTableProfileUse) {
2252 for (GlobalVariable &G : M.globals()) {
2253 if (!G.hasName() || !G.hasMetadata(KindID: LLVMContext::MD_type))
2254 continue;
2255
2256 // Create the PGOFuncName meta data.
2257 createPGONameMetadata(GO&: G, PGOName: getPGOName(V: G, InLTO: false /* InLTO*/));
2258 }
2259 }
2260
2261 // Add the profile summary (read from the header of the indexed summary) here
2262 // so that we can use it below when reading counters (which checks if the
2263 // function should be marked with a cold or inlinehint attribute).
2264 M.setProfileSummary(M: PGOReader->getSummary(UseCS: IsCS).getMD(Context&: M.getContext()),
2265 Kind: IsCS ? ProfileSummary::PSK_CSInstr
2266 : ProfileSummary::PSK_Instr);
2267 PSI->refresh();
2268
2269 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2270 collectComdatMembers(M, ComdatMembers);
2271 std::vector<Function *> HotFunctions;
2272 std::vector<Function *> ColdFunctions;
2273
2274 // If the profile marked as always instrument the entry BB, do the
2275 // same. Note this can be overwritten by the internal option in CFGMST.h
2276 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2277 if (PGOInstrumentEntry.getNumOccurrences() > 0)
2278 InstrumentFuncEntry = PGOInstrumentEntry;
2279 bool InstrumentLoopEntries = PGOReader->instrLoopEntriesEnabled();
2280 if (PGOInstrumentLoopEntries.getNumOccurrences() > 0)
2281 InstrumentLoopEntries = PGOInstrumentLoopEntries;
2282
2283 bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2284 for (auto &F : M) {
2285 if (skipPGOUse(F))
2286 continue;
2287 TargetLibraryInfo &TLI = LookupTLI(F);
2288 BranchProbabilityInfo *BPI = LookupBPI(F);
2289 BlockFrequencyInfo *BFI = LookupBFI(F);
2290 LoopInfo *LI = LookupLI(F);
2291 if (!HasSingleByteCoverage) {
2292 // Split indirectbr critical edges here before computing the MST rather
2293 // than later in getInstrBB() to avoid invalidating it.
2294 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
2295 BFI);
2296 }
2297 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, LI, PSI, IsCS,
2298 InstrumentFuncEntry, InstrumentLoopEntries,
2299 HasSingleByteCoverage);
2300 if (!Func.getRecord(PGOReader: PGOReader.get()))
2301 continue;
2302 if (HasSingleByteCoverage) {
2303 Func.populateCoverage();
2304 continue;
2305 }
2306 // When PseudoKind is set to a value other than InstrProfRecord::NotPseudo,
2307 // it means the profile for the function is unrepresentative and this
2308 // function is actually hot / warm. We will reset the function hot / cold
2309 // attribute and drop all the profile counters.
2310 InstrProfRecord::CountPseudoKind PseudoKind = InstrProfRecord::NotPseudo;
2311 bool AllZeros = false;
2312 if (!Func.readCounters(AllZeros, PseudoKind))
2313 continue;
2314 if (AllZeros) {
2315 F.setEntryCount(Count: 0);
2316 if (Func.getProgramMaxCount() != 0)
2317 ColdFunctions.push_back(x: &F);
2318 continue;
2319 }
2320 if (PseudoKind != InstrProfRecord::NotPseudo) {
2321 // Clear function attribute cold.
2322 if (F.hasFnAttribute(Kind: Attribute::Cold))
2323 F.removeFnAttr(Kind: Attribute::Cold);
2324 // Set function attribute as hot.
2325 if (PseudoKind == InstrProfRecord::PseudoHot)
2326 F.addFnAttr(Kind: Attribute::Hot);
2327 continue;
2328 }
2329 Func.populateCounters();
2330 Func.setBranchWeights();
2331 Func.annotateValueSites();
2332 Func.annotateIrrLoopHeaderWeights();
2333 Func.setBlockUniformityAttribute();
2334 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2335 if (FreqAttr == PGOUseFunc::FFA_Cold)
2336 ColdFunctions.push_back(x: &F);
2337 else if (FreqAttr == PGOUseFunc::FFA_Hot)
2338 HotFunctions.push_back(x: &F);
2339 if (PGOViewCounts != PGOVCT_None &&
2340 (ViewBlockFreqFuncName.empty() ||
2341 F.getName() == ViewBlockFreqFuncName)) {
2342 LoopInfo LI{DominatorTree(F)};
2343 std::unique_ptr<BranchProbabilityInfo> NewBPI =
2344 std::make_unique<BranchProbabilityInfo>(args&: F, args&: LI);
2345 std::unique_ptr<BlockFrequencyInfo> NewBFI =
2346 std::make_unique<BlockFrequencyInfo>(args&: F, args&: *NewBPI, args&: LI);
2347 if (PGOViewCounts == PGOVCT_Graph)
2348 NewBFI->view();
2349 else if (PGOViewCounts == PGOVCT_Text) {
2350 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
2351 NewBFI->print(OS&: dbgs());
2352 }
2353 }
2354 if (PGOViewRawCounts != PGOVCT_None &&
2355 (ViewBlockFreqFuncName.empty() ||
2356 F.getName() == ViewBlockFreqFuncName)) {
2357 if (PGOViewRawCounts == PGOVCT_Graph)
2358 if (ViewBlockFreqFuncName.empty())
2359 WriteGraph(G: &Func, Name: Twine("PGORawCounts_") + Func.getFunc().getName());
2360 else
2361 ViewGraph(G: &Func, Name: Twine("PGORawCounts_") + Func.getFunc().getName());
2362 else if (PGOViewRawCounts == PGOVCT_Text) {
2363 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
2364 Func.dumpInfo();
2365 }
2366 }
2367
2368 if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) {
2369 LoopInfo LI{DominatorTree(F)};
2370 BranchProbabilityInfo NBPI(F, LI);
2371
2372 // Fix func entry count.
2373 if (PGOFixEntryCount)
2374 fixFuncEntryCount(Func, LI, NBPI);
2375
2376 // Verify BlockFrequency information.
2377 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
2378 if (PGOVerifyHotBFI) {
2379 HotCountThreshold = PSI->getOrCompHotCountThreshold();
2380 ColdCountThreshold = PSI->getOrCompColdCountThreshold();
2381 }
2382 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
2383 }
2384 }
2385
2386 // Set function hotness attribute from the profile.
2387 // We have to apply these attributes at the end because their presence
2388 // can affect the BranchProbabilityInfo of any callers, resulting in an
2389 // inconsistent MST between prof-gen and prof-use.
2390 for (auto &F : HotFunctions) {
2391 F->addFnAttr(Kind: Attribute::InlineHint);
2392 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
2393 << "\n");
2394 }
2395 for (auto &F : ColdFunctions) {
2396 // Only set when there is no Attribute::Hot set by the user. For Hot
2397 // attribute, user's annotation has the precedence over the profile.
2398 if (F->hasFnAttribute(Kind: Attribute::Hot)) {
2399 auto &Ctx = M.getContext();
2400 std::string Msg = std::string("Function ") + F->getName().str() +
2401 std::string(" is annotated as a hot function but"
2402 " the profile is cold");
2403 Ctx.diagnose(
2404 DI: DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
2405 continue;
2406 }
2407 F->addFnAttr(Kind: Attribute::Cold);
2408 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
2409 << "\n");
2410 }
2411 return true;
2412}
2413
2414PGOInstrumentationUse::PGOInstrumentationUse(
2415 std::string Filename, std::string RemappingFilename, bool IsCS,
2416 IntrusiveRefCntPtr<vfs::FileSystem> VFS)
2417 : ProfileFileName(std::move(Filename)),
2418 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),
2419 FS(std::move(VFS)) {
2420 if (!PGOTestProfileFile.empty())
2421 ProfileFileName = PGOTestProfileFile;
2422 if (!PGOTestProfileRemappingFile.empty())
2423 ProfileRemappingFileName = PGOTestProfileRemappingFile;
2424 if (!FS)
2425 FS = vfs::getRealFileSystem();
2426}
2427
2428PreservedAnalyses PGOInstrumentationUse::run(Module &M,
2429 ModuleAnalysisManager &MAM) {
2430
2431 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
2432 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2433 return FAM.getResult<TargetLibraryAnalysis>(IR&: F);
2434 };
2435 auto LookupBPI = [&FAM](Function &F) {
2436 return &FAM.getResult<BranchProbabilityAnalysis>(IR&: F);
2437 };
2438 auto LookupBFI = [&FAM](Function &F) {
2439 return &FAM.getResult<BlockFrequencyAnalysis>(IR&: F);
2440 };
2441 auto LookupLI = [&FAM](Function &F) {
2442 return &FAM.getResult<LoopAnalysis>(IR&: F);
2443 };
2444
2445 auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(IR&: M);
2446 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, FS&: *FS,
2447 LookupTLI, LookupBPI, LookupBFI, LookupLI, PSI,
2448 IsCS))
2449 return PreservedAnalyses::all();
2450
2451 return PreservedAnalyses::none();
2452}
2453
2454static std::string getSimpleNodeName(const BasicBlock *Node) {
2455 if (!Node->getName().empty())
2456 return Node->getName().str();
2457
2458 std::string SimpleNodeName;
2459 raw_string_ostream OS(SimpleNodeName);
2460 Node->printAsOperand(O&: OS, PrintType: false);
2461 return SimpleNodeName;
2462}
2463
2464void llvm::setProfMetadata(Instruction *TI, ArrayRef<uint64_t> EdgeCounts,
2465 uint64_t MaxCount) {
2466 auto Weights = downscaleWeights(Weights: EdgeCounts, KnownMaxCount: MaxCount);
2467
2468 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W : Weights) {
2469 dbgs() << W << " ";
2470 } dbgs() << "\n");
2471
2472 misexpect::checkExpectAnnotations(I: *TI, ExistingWeights: Weights, /*IsFrontend=*/false);
2473
2474 setBranchWeights(I&: *TI, Weights, /*IsExpected=*/false);
2475
2476 if (EmitBranchProbability) {
2477 std::string BrCondStr = getBranchCondString(TI);
2478 if (BrCondStr.empty())
2479 return;
2480
2481 uint64_t WSum =
2482 std::accumulate(first: Weights.begin(), last: Weights.end(), init: (uint64_t)0,
2483 binary_op: [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2484 uint64_t TotalCount =
2485 std::accumulate(first: EdgeCounts.begin(), last: EdgeCounts.end(), init: (uint64_t)0,
2486 binary_op: [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2487 uint64_t Scale = calculateCountScale(MaxCount: WSum);
2488 BranchProbability BP(scaleBranchCount(Count: Weights[0], Scale),
2489 scaleBranchCount(Count: WSum, Scale));
2490 std::string BranchProbStr;
2491 raw_string_ostream OS(BranchProbStr);
2492 OS << BP;
2493 OS << " (total count : " << TotalCount << ")";
2494 Function *F = TI->getParent()->getParent();
2495 OptimizationRemarkEmitter ORE(F);
2496 ORE.emit(RemarkBuilder: [&]() {
2497 return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2498 << BrCondStr << " is true with probability : " << BranchProbStr;
2499 });
2500 }
2501}
2502
2503namespace llvm {
2504
2505void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) {
2506 MDBuilder MDB(M->getContext());
2507 TI->setMetadata(KindID: llvm::LLVMContext::MD_irr_loop,
2508 Node: MDB.createIrrLoopHeaderWeight(Weight: Count));
2509}
2510
2511template <> struct GraphTraits<PGOUseFunc *> {
2512 using NodeRef = const BasicBlock *;
2513 using ChildIteratorType = const_succ_iterator;
2514 using nodes_iterator = pointer_iterator<Function::const_iterator>;
2515
2516 static NodeRef getEntryNode(const PGOUseFunc *G) {
2517 return &G->getFunc().front();
2518 }
2519
2520 static ChildIteratorType child_begin(const NodeRef N) {
2521 return succ_begin(BB: N);
2522 }
2523
2524 static ChildIteratorType child_end(const NodeRef N) { return succ_end(BB: N); }
2525
2526 static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2527 return nodes_iterator(G->getFunc().begin());
2528 }
2529
2530 static nodes_iterator nodes_end(const PGOUseFunc *G) {
2531 return nodes_iterator(G->getFunc().end());
2532 }
2533};
2534
2535template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2536 explicit DOTGraphTraits(bool isSimple = false)
2537 : DefaultDOTGraphTraits(isSimple) {}
2538
2539 static std::string getGraphName(const PGOUseFunc *G) {
2540 return std::string(G->getFunc().getName());
2541 }
2542
2543 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2544 std::string Result;
2545 raw_string_ostream OS(Result);
2546
2547 OS << getSimpleNodeName(Node) << ":\\l";
2548 PGOUseBBInfo *BI = Graph->findBBInfo(BB: Node);
2549 OS << "Count : ";
2550 if (BI && BI->Count)
2551 OS << *BI->Count << "\\l";
2552 else
2553 OS << "Unknown\\l";
2554
2555 if (!PGOInstrSelect)
2556 return Result;
2557
2558 for (const Instruction &I : *Node) {
2559 if (!isa<SelectInst>(Val: &I))
2560 continue;
2561 // Display scaled counts for SELECT instruction:
2562 OS << "SELECT : { T = ";
2563 uint64_t TC, FC;
2564 bool HasProf = extractBranchWeights(I, TrueVal&: TC, FalseVal&: FC);
2565 if (!HasProf)
2566 OS << "Unknown, F = Unknown }\\l";
2567 else
2568 OS << TC << ", F = " << FC << " }\\l";
2569 }
2570 return Result;
2571 }
2572};
2573
2574} // end namespace llvm
2575