1 | //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements PGO instrumentation using a minimum spanning tree based |
10 | // on the following paper: |
11 | // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points |
12 | // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, |
13 | // Issue 3, pp 313-322 |
14 | // The idea of the algorithm based on the fact that for each node (except for |
15 | // the entry and exit), the sum of incoming edge counts equals the sum of |
16 | // outgoing edge counts. The count of edge on spanning tree can be derived from |
17 | // those edges not on the spanning tree. Knuth proves this method instruments |
18 | // the minimum number of edges. |
19 | // |
20 | // The minimal spanning tree here is actually a maximum weight tree -- on-tree |
21 | // edges have higher frequencies (more likely to execute). The idea is to |
22 | // instrument those less frequently executed edges to reduce the runtime |
23 | // overhead of instrumented binaries. |
24 | // |
25 | // This file contains two passes: |
26 | // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge |
27 | // count profile, and generates the instrumentation for indirect call |
28 | // profiling. |
29 | // (2) Pass PGOInstrumentationUse which reads the edge count profile and |
30 | // annotates the branch weights. It also reads the indirect call value |
31 | // profiling records and annotate the indirect call instructions. |
32 | // |
33 | // To get the precise counter information, These two passes need to invoke at |
34 | // the same compilation point (so they see the same IR). For pass |
35 | // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For |
36 | // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and |
37 | // the profile is opened in module level and passed to each PGOUseFunc instance. |
38 | // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put |
39 | // in class FuncPGOInstrumentation. |
40 | // |
41 | // Class PGOEdge represents a CFG edge and some auxiliary information. Class |
42 | // BBInfo contains auxiliary information for each BB. These two classes are used |
43 | // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived |
44 | // class of PGOEdge and BBInfo, respectively. They contains extra data structure |
45 | // used in populating profile counters. |
46 | // The MST implementation is in Class CFGMST (CFGMST.h). |
47 | // |
48 | //===----------------------------------------------------------------------===// |
49 | |
50 | #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" |
51 | #include "ValueProfileCollector.h" |
52 | #include "llvm/ADT/APInt.h" |
53 | #include "llvm/ADT/ArrayRef.h" |
54 | #include "llvm/ADT/STLExtras.h" |
55 | #include "llvm/ADT/SmallVector.h" |
56 | #include "llvm/ADT/Statistic.h" |
57 | #include "llvm/ADT/StringRef.h" |
58 | #include "llvm/ADT/StringSet.h" |
59 | #include "llvm/ADT/Twine.h" |
60 | #include "llvm/ADT/iterator.h" |
61 | #include "llvm/ADT/iterator_range.h" |
62 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
63 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
64 | #include "llvm/Analysis/CFG.h" |
65 | #include "llvm/Analysis/LoopInfo.h" |
66 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
67 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
68 | #include "llvm/Analysis/TargetLibraryInfo.h" |
69 | #include "llvm/IR/Attributes.h" |
70 | #include "llvm/IR/BasicBlock.h" |
71 | #include "llvm/IR/CFG.h" |
72 | #include "llvm/IR/Comdat.h" |
73 | #include "llvm/IR/Constant.h" |
74 | #include "llvm/IR/Constants.h" |
75 | #include "llvm/IR/DiagnosticInfo.h" |
76 | #include "llvm/IR/Dominators.h" |
77 | #include "llvm/IR/EHPersonalities.h" |
78 | #include "llvm/IR/Function.h" |
79 | #include "llvm/IR/GlobalAlias.h" |
80 | #include "llvm/IR/GlobalValue.h" |
81 | #include "llvm/IR/GlobalVariable.h" |
82 | #include "llvm/IR/IRBuilder.h" |
83 | #include "llvm/IR/InstVisitor.h" |
84 | #include "llvm/IR/InstrTypes.h" |
85 | #include "llvm/IR/Instruction.h" |
86 | #include "llvm/IR/Instructions.h" |
87 | #include "llvm/IR/IntrinsicInst.h" |
88 | #include "llvm/IR/Intrinsics.h" |
89 | #include "llvm/IR/LLVMContext.h" |
90 | #include "llvm/IR/MDBuilder.h" |
91 | #include "llvm/IR/Module.h" |
92 | #include "llvm/IR/PassManager.h" |
93 | #include "llvm/IR/ProfDataUtils.h" |
94 | #include "llvm/IR/ProfileSummary.h" |
95 | #include "llvm/IR/Type.h" |
96 | #include "llvm/IR/Value.h" |
97 | #include "llvm/ProfileData/InstrProf.h" |
98 | #include "llvm/ProfileData/InstrProfReader.h" |
99 | #include "llvm/Support/BranchProbability.h" |
100 | #include "llvm/Support/CRC.h" |
101 | #include "llvm/Support/Casting.h" |
102 | #include "llvm/Support/CommandLine.h" |
103 | #include "llvm/Support/Compiler.h" |
104 | #include "llvm/Support/DOTGraphTraits.h" |
105 | #include "llvm/Support/Debug.h" |
106 | #include "llvm/Support/Error.h" |
107 | #include "llvm/Support/ErrorHandling.h" |
108 | #include "llvm/Support/GraphWriter.h" |
109 | #include "llvm/Support/VirtualFileSystem.h" |
110 | #include "llvm/Support/raw_ostream.h" |
111 | #include "llvm/TargetParser/Triple.h" |
112 | #include "llvm/Transforms/Instrumentation/BlockCoverageInference.h" |
113 | #include "llvm/Transforms/Instrumentation/CFGMST.h" |
114 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
115 | #include "llvm/Transforms/Utils/Instrumentation.h" |
116 | #include "llvm/Transforms/Utils/MisExpect.h" |
117 | #include "llvm/Transforms/Utils/ModuleUtils.h" |
118 | #include <algorithm> |
119 | #include <cassert> |
120 | #include <cstdint> |
121 | #include <memory> |
122 | #include <numeric> |
123 | #include <optional> |
124 | #include <stack> |
125 | #include <string> |
126 | #include <unordered_map> |
127 | #include <utility> |
128 | #include <vector> |
129 | |
130 | using namespace llvm; |
131 | using ProfileCount = Function::ProfileCount; |
132 | using VPCandidateInfo = ValueProfileCollector::CandidateInfo; |
133 | |
134 | #define DEBUG_TYPE "pgo-instrumentation" |
135 | |
136 | STATISTIC(NumOfPGOInstrument, "Number of edges instrumented." ); |
137 | STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented." ); |
138 | STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented." ); |
139 | STATISTIC(NumOfPGOEdge, "Number of edges." ); |
140 | STATISTIC(NumOfPGOBB, "Number of basic-blocks." ); |
141 | STATISTIC(NumOfPGOSplit, "Number of critical edge splits." ); |
142 | STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts." ); |
143 | STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile." ); |
144 | STATISTIC(NumOfPGOMissing, "Number of functions without profile." ); |
145 | STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations." ); |
146 | STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO." ); |
147 | STATISTIC(NumOfCSPGOSelectInsts, |
148 | "Number of select instruction instrumented in CSPGO." ); |
149 | STATISTIC(NumOfCSPGOMemIntrinsics, |
150 | "Number of mem intrinsics instrumented in CSPGO." ); |
151 | STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO." ); |
152 | STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO." ); |
153 | STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO." ); |
154 | STATISTIC(NumOfCSPGOFunc, |
155 | "Number of functions having valid profile counts in CSPGO." ); |
156 | STATISTIC(NumOfCSPGOMismatch, |
157 | "Number of functions having mismatch profile in CSPGO." ); |
158 | STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO." ); |
159 | STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed" ); |
160 | |
161 | // Command line option to specify the file to read profile from. This is |
162 | // mainly used for testing. |
163 | static cl::opt<std::string> PGOTestProfileFile( |
164 | "pgo-test-profile-file" , cl::init(Val: "" ), cl::Hidden, |
165 | cl::value_desc("filename" ), |
166 | cl::desc("Specify the path of profile data file. This is " |
167 | "mainly for test purpose." )); |
168 | static cl::opt<std::string> PGOTestProfileRemappingFile( |
169 | "pgo-test-profile-remapping-file" , cl::init(Val: "" ), cl::Hidden, |
170 | cl::value_desc("filename" ), |
171 | cl::desc("Specify the path of profile remapping file. This is mainly for " |
172 | "test purpose." )); |
173 | |
174 | // Command line option to disable value profiling. The default is false: |
175 | // i.e. value profiling is enabled by default. This is for debug purpose. |
176 | static cl::opt<bool> DisableValueProfiling("disable-vp" , cl::init(Val: false), |
177 | cl::Hidden, |
178 | cl::desc("Disable Value Profiling" )); |
179 | |
180 | // Command line option to set the maximum number of VP annotations to write to |
181 | // the metadata for a single indirect call callsite. |
182 | static cl::opt<unsigned> MaxNumAnnotations( |
183 | "icp-max-annotations" , cl::init(Val: 3), cl::Hidden, |
184 | cl::desc("Max number of annotations for a single indirect " |
185 | "call callsite" )); |
186 | |
187 | // Command line option to set the maximum number of value annotations |
188 | // to write to the metadata for a single memop intrinsic. |
189 | static cl::opt<unsigned> MaxNumMemOPAnnotations( |
190 | "memop-max-annotations" , cl::init(Val: 4), cl::Hidden, |
191 | cl::desc("Max number of precise value annotations for a single memop" |
192 | "intrinsic" )); |
193 | |
194 | // Command line option to control appending FunctionHash to the name of a COMDAT |
195 | // function. This is to avoid the hash mismatch caused by the preinliner. |
196 | static cl::opt<bool> DoComdatRenaming( |
197 | "do-comdat-renaming" , cl::init(Val: false), cl::Hidden, |
198 | cl::desc("Append function hash to the name of COMDAT function to avoid " |
199 | "function hash mismatch due to the preinliner" )); |
200 | |
201 | namespace llvm { |
202 | // Command line option to enable/disable the warning about missing profile |
203 | // information. |
204 | cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function" , cl::init(Val: false), |
205 | cl::Hidden, |
206 | cl::desc("Use this option to turn on/off " |
207 | "warnings about missing profile data for " |
208 | "functions." )); |
209 | |
210 | // Command line option to enable/disable the warning about a hash mismatch in |
211 | // the profile data. |
212 | cl::opt<bool> |
213 | NoPGOWarnMismatch("no-pgo-warn-mismatch" , cl::init(Val: false), cl::Hidden, |
214 | cl::desc("Use this option to turn off/on " |
215 | "warnings about profile cfg mismatch." )); |
216 | |
217 | // Command line option to enable/disable the warning about a hash mismatch in |
218 | // the profile data for Comdat functions, which often turns out to be false |
219 | // positive due to the pre-instrumentation inline. |
220 | cl::opt<bool> NoPGOWarnMismatchComdatWeak( |
221 | "no-pgo-warn-mismatch-comdat-weak" , cl::init(Val: true), cl::Hidden, |
222 | cl::desc("The option is used to turn on/off " |
223 | "warnings about hash mismatch for comdat " |
224 | "or weak functions." )); |
225 | } // namespace llvm |
226 | |
227 | // Command line option to enable/disable select instruction instrumentation. |
228 | static cl::opt<bool> |
229 | PGOInstrSelect("pgo-instr-select" , cl::init(Val: true), cl::Hidden, |
230 | cl::desc("Use this option to turn on/off SELECT " |
231 | "instruction instrumentation. " )); |
232 | |
233 | // Command line option to turn on CFG dot or text dump of raw profile counts |
234 | static cl::opt<PGOViewCountsType> PGOViewRawCounts( |
235 | "pgo-view-raw-counts" , cl::Hidden, |
236 | cl::desc("A boolean option to show CFG dag or text " |
237 | "with raw profile counts from " |
238 | "profile data. See also option " |
239 | "-pgo-view-counts. To limit graph " |
240 | "display to only one function, use " |
241 | "filtering option -view-bfi-func-name." ), |
242 | cl::values(clEnumValN(PGOVCT_None, "none" , "do not show." ), |
243 | clEnumValN(PGOVCT_Graph, "graph" , "show a graph." ), |
244 | clEnumValN(PGOVCT_Text, "text" , "show in text." ))); |
245 | |
246 | // Command line option to enable/disable memop intrinsic call.size profiling. |
247 | static cl::opt<bool> |
248 | PGOInstrMemOP("pgo-instr-memop" , cl::init(Val: true), cl::Hidden, |
249 | cl::desc("Use this option to turn on/off " |
250 | "memory intrinsic size profiling." )); |
251 | |
252 | // Emit branch probability as optimization remarks. |
253 | static cl::opt<bool> |
254 | EmitBranchProbability("pgo-emit-branch-prob" , cl::init(Val: false), cl::Hidden, |
255 | cl::desc("When this option is on, the annotated " |
256 | "branch probability will be emitted as " |
257 | "optimization remarks: -{Rpass|" |
258 | "pass-remarks}=pgo-instrumentation" )); |
259 | |
260 | static cl::opt<bool> PGOInstrumentEntry( |
261 | "pgo-instrument-entry" , cl::init(Val: false), cl::Hidden, |
262 | cl::desc("Force to instrument function entry basicblock." )); |
263 | |
264 | static cl::opt<bool> |
265 | PGOInstrumentLoopEntries("pgo-instrument-loop-entries" , cl::init(Val: false), |
266 | cl::Hidden, |
267 | cl::desc("Force to instrument loop entries." )); |
268 | |
269 | static cl::opt<bool> PGOFunctionEntryCoverage( |
270 | "pgo-function-entry-coverage" , cl::Hidden, |
271 | cl::desc( |
272 | "Use this option to enable function entry coverage instrumentation." )); |
273 | |
274 | static cl::opt<bool> PGOBlockCoverage( |
275 | "pgo-block-coverage" , |
276 | cl::desc("Use this option to enable basic block coverage instrumentation" )); |
277 | |
278 | static cl::opt<bool> |
279 | PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph" , |
280 | cl::desc("Create a dot file of CFGs with block " |
281 | "coverage inference information" )); |
282 | |
283 | static cl::opt<bool> PGOTemporalInstrumentation( |
284 | "pgo-temporal-instrumentation" , |
285 | cl::desc("Use this option to enable temporal instrumentation" )); |
286 | |
287 | static cl::opt<bool> |
288 | PGOFixEntryCount("pgo-fix-entry-count" , cl::init(Val: true), cl::Hidden, |
289 | cl::desc("Fix function entry count in profile use." )); |
290 | |
291 | static cl::opt<bool> PGOVerifyHotBFI( |
292 | "pgo-verify-hot-bfi" , cl::init(Val: false), cl::Hidden, |
293 | cl::desc("Print out the non-match BFI count if a hot raw profile count " |
294 | "becomes non-hot, or a cold raw profile count becomes hot. " |
295 | "The print is enabled under -Rpass-analysis=pgo, or " |
296 | "internal option -pass-remarks-analysis=pgo." )); |
297 | |
298 | static cl::opt<bool> PGOVerifyBFI( |
299 | "pgo-verify-bfi" , cl::init(Val: false), cl::Hidden, |
300 | cl::desc("Print out mismatched BFI counts after setting profile metadata " |
301 | "The print is enabled under -Rpass-analysis=pgo, or " |
302 | "internal option -pass-remarks-analysis=pgo." )); |
303 | |
304 | static cl::opt<unsigned> PGOVerifyBFIRatio( |
305 | "pgo-verify-bfi-ratio" , cl::init(Val: 2), cl::Hidden, |
306 | cl::desc("Set the threshold for pgo-verify-bfi: only print out " |
307 | "mismatched BFI if the difference percentage is greater than " |
308 | "this value (in percentage)." )); |
309 | |
310 | static cl::opt<unsigned> PGOVerifyBFICutoff( |
311 | "pgo-verify-bfi-cutoff" , cl::init(Val: 5), cl::Hidden, |
312 | cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " |
313 | "profile count value is below." )); |
314 | |
315 | static cl::opt<std::string> PGOTraceFuncHash( |
316 | "pgo-trace-func-hash" , cl::init(Val: "-" ), cl::Hidden, |
317 | cl::value_desc("function name" ), |
318 | cl::desc("Trace the hash of the function with this name." )); |
319 | |
320 | static cl::opt<unsigned> PGOFunctionSizeThreshold( |
321 | "pgo-function-size-threshold" , cl::Hidden, |
322 | cl::desc("Do not instrument functions smaller than this threshold." )); |
323 | |
324 | static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold( |
325 | "pgo-critical-edge-threshold" , cl::init(Val: 20000), cl::Hidden, |
326 | cl::desc("Do not instrument functions with the number of critical edges " |
327 | " greater than this threshold." )); |
328 | |
329 | static cl::opt<uint64_t> PGOColdInstrumentEntryThreshold( |
330 | "pgo-cold-instrument-entry-threshold" , cl::init(Val: 0), cl::Hidden, |
331 | cl::desc("For cold function instrumentation, skip instrumenting functions " |
332 | "whose entry count is above the given value." )); |
333 | |
334 | static cl::opt<bool> PGOTreatUnknownAsCold( |
335 | "pgo-treat-unknown-as-cold" , cl::init(Val: false), cl::Hidden, |
336 | cl::desc("For cold function instrumentation, treat count unknown(e.g. " |
337 | "unprofiled) functions as cold." )); |
338 | |
339 | cl::opt<bool> PGOInstrumentColdFunctionOnly( |
340 | "pgo-instrument-cold-function-only" , cl::init(Val: false), cl::Hidden, |
341 | cl::desc("Enable cold function only instrumentation." )); |
342 | |
343 | cl::list<std::string> CtxPGOSkipCallsiteInstrument( |
344 | "ctx-prof-skip-callsite-instr" , cl::Hidden, |
345 | cl::desc("Do not instrument callsites to functions in this list. Intended " |
346 | "for testing." )); |
347 | |
348 | extern cl::opt<unsigned> MaxNumVTableAnnotations; |
349 | |
350 | namespace llvm { |
351 | // Command line option to turn on CFG dot dump after profile annotation. |
352 | // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts |
353 | extern cl::opt<PGOViewCountsType> PGOViewCounts; |
354 | |
355 | // Command line option to specify the name of the function for CFG dump |
356 | // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= |
357 | extern cl::opt<std::string> ViewBlockFreqFuncName; |
358 | |
359 | // Command line option to enable vtable value profiling. Defined in |
360 | // ProfileData/InstrProf.cpp: -enable-vtable-value-profiling= |
361 | extern cl::opt<bool> EnableVTableValueProfiling; |
362 | extern cl::opt<bool> EnableVTableProfileUse; |
363 | LLVM_ABI extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> |
364 | ProfileCorrelate; |
365 | } // namespace llvm |
366 | |
367 | namespace { |
368 | class FunctionInstrumenter final { |
369 | Module &M; |
370 | Function &F; |
371 | TargetLibraryInfo &TLI; |
372 | std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; |
373 | BranchProbabilityInfo *const BPI; |
374 | BlockFrequencyInfo *const BFI; |
375 | LoopInfo *const LI; |
376 | |
377 | const PGOInstrumentationType InstrumentationType; |
378 | |
379 | // FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. |
380 | // Ctx profiling implicitly captures indirect call cases, but not other |
381 | // values. Supporting other values is relatively straight-forward - just |
382 | // another counter range within the context. |
383 | bool isValueProfilingDisabled() const { |
384 | return DisableValueProfiling || |
385 | InstrumentationType == PGOInstrumentationType::CTXPROF; |
386 | } |
387 | |
388 | bool shouldInstrumentEntryBB() const { |
389 | return PGOInstrumentEntry || |
390 | InstrumentationType == PGOInstrumentationType::CTXPROF; |
391 | } |
392 | |
393 | bool shouldInstrumentLoopEntries() const { return PGOInstrumentLoopEntries; } |
394 | |
395 | public: |
396 | FunctionInstrumenter( |
397 | Module &M, Function &F, TargetLibraryInfo &TLI, |
398 | std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, |
399 | BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr, |
400 | LoopInfo *LI = nullptr, |
401 | PGOInstrumentationType InstrumentationType = PGOInstrumentationType::FDO) |
402 | : M(M), F(F), TLI(TLI), ComdatMembers(ComdatMembers), BPI(BPI), BFI(BFI), |
403 | LI(LI), InstrumentationType(InstrumentationType) {} |
404 | |
405 | void instrument(); |
406 | }; |
407 | } // namespace |
408 | |
409 | // Return a string describing the branch condition that can be |
410 | // used in static branch probability heuristics: |
411 | static std::string getBranchCondString(Instruction *TI) { |
412 | BranchInst *BI = dyn_cast<BranchInst>(Val: TI); |
413 | if (!BI || !BI->isConditional()) |
414 | return std::string(); |
415 | |
416 | Value *Cond = BI->getCondition(); |
417 | ICmpInst *CI = dyn_cast<ICmpInst>(Val: Cond); |
418 | if (!CI) |
419 | return std::string(); |
420 | |
421 | std::string result; |
422 | raw_string_ostream OS(result); |
423 | OS << CI->getPredicate() << "_" ; |
424 | CI->getOperand(i_nocapture: 0)->getType()->print(O&: OS, IsForDebug: true); |
425 | |
426 | Value *RHS = CI->getOperand(i_nocapture: 1); |
427 | ConstantInt *CV = dyn_cast<ConstantInt>(Val: RHS); |
428 | if (CV) { |
429 | if (CV->isZero()) |
430 | OS << "_Zero" ; |
431 | else if (CV->isOne()) |
432 | OS << "_One" ; |
433 | else if (CV->isMinusOne()) |
434 | OS << "_MinusOne" ; |
435 | else |
436 | OS << "_Const" ; |
437 | } |
438 | return result; |
439 | } |
440 | |
441 | static const char *ValueProfKindDescr[] = { |
442 | #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, |
443 | #include "llvm/ProfileData/InstrProfData.inc" |
444 | }; |
445 | |
446 | // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime |
447 | // aware this is an ir_level profile so it can set the version flag. |
448 | static GlobalVariable * |
449 | createIRLevelProfileFlagVar(Module &M, |
450 | PGOInstrumentationType InstrumentationType) { |
451 | const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); |
452 | Type *IntTy64 = Type::getInt64Ty(C&: M.getContext()); |
453 | uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); |
454 | if (InstrumentationType == PGOInstrumentationType::CSFDO) |
455 | ProfileVersion |= VARIANT_MASK_CSIR_PROF; |
456 | if (PGOInstrumentEntry || |
457 | InstrumentationType == PGOInstrumentationType::CTXPROF) |
458 | ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; |
459 | if (PGOInstrumentLoopEntries) |
460 | ProfileVersion |= VARIANT_MASK_INSTR_LOOP_ENTRIES; |
461 | if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) |
462 | ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; |
463 | if (PGOFunctionEntryCoverage) |
464 | ProfileVersion |= |
465 | VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY; |
466 | if (PGOBlockCoverage) |
467 | ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE; |
468 | if (PGOTemporalInstrumentation) |
469 | ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF; |
470 | auto IRLevelVersionVariable = new GlobalVariable( |
471 | M, IntTy64, true, GlobalValue::WeakAnyLinkage, |
472 | Constant::getIntegerValue(Ty: IntTy64, V: APInt(64, ProfileVersion)), VarName); |
473 | IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility); |
474 | if (isGPUProfTarget(M)) |
475 | IRLevelVersionVariable->setVisibility( |
476 | llvm::GlobalValue::ProtectedVisibility); |
477 | |
478 | Triple TT(M.getTargetTriple()); |
479 | if (TT.supportsCOMDAT()) { |
480 | IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); |
481 | IRLevelVersionVariable->setComdat(M.getOrInsertComdat(Name: VarName)); |
482 | } |
483 | return IRLevelVersionVariable; |
484 | } |
485 | |
486 | namespace { |
487 | |
488 | /// The select instruction visitor plays three roles specified |
489 | /// by the mode. In \c VM_counting mode, it simply counts the number of |
490 | /// select instructions. In \c VM_instrument mode, it inserts code to count |
491 | /// the number times TrueValue of select is taken. In \c VM_annotate mode, |
492 | /// it reads the profile data and annotate the select instruction with metadata. |
493 | enum VisitMode { VM_counting, VM_instrument, VM_annotate }; |
494 | class PGOUseFunc; |
495 | |
496 | /// Instruction Visitor class to visit select instructions. |
497 | struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { |
498 | Function &F; |
499 | unsigned NSIs = 0; // Number of select instructions instrumented. |
500 | VisitMode Mode = VM_counting; // Visiting mode. |
501 | unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. |
502 | unsigned TotalNumCtrs = 0; // Total number of counters |
503 | GlobalValue *FuncNameVar = nullptr; |
504 | uint64_t FuncHash = 0; |
505 | PGOUseFunc *UseFunc = nullptr; |
506 | bool HasSingleByteCoverage; |
507 | |
508 | SelectInstVisitor(Function &Func, bool HasSingleByteCoverage) |
509 | : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {} |
510 | |
511 | void countSelects() { |
512 | NSIs = 0; |
513 | Mode = VM_counting; |
514 | visit(F); |
515 | } |
516 | |
517 | // Visit the IR stream and instrument all select instructions. \p |
518 | // Ind is a pointer to the counter index variable; \p TotalNC |
519 | // is the total number of counters; \p FNV is the pointer to the |
520 | // PGO function name var; \p FHash is the function hash. |
521 | void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalValue *FNV, |
522 | uint64_t FHash) { |
523 | Mode = VM_instrument; |
524 | CurCtrIdx = Ind; |
525 | TotalNumCtrs = TotalNC; |
526 | FuncHash = FHash; |
527 | FuncNameVar = FNV; |
528 | visit(F); |
529 | } |
530 | |
531 | // Visit the IR stream and annotate all select instructions. |
532 | void annotateSelects(PGOUseFunc *UF, unsigned *Ind) { |
533 | Mode = VM_annotate; |
534 | UseFunc = UF; |
535 | CurCtrIdx = Ind; |
536 | visit(F); |
537 | } |
538 | |
539 | void instrumentOneSelectInst(SelectInst &SI); |
540 | void annotateOneSelectInst(SelectInst &SI); |
541 | |
542 | // Visit \p SI instruction and perform tasks according to visit mode. |
543 | void visitSelectInst(SelectInst &SI); |
544 | |
545 | // Return the number of select instructions. This needs be called after |
546 | // countSelects(). |
547 | unsigned getNumOfSelectInsts() const { return NSIs; } |
548 | }; |
549 | |
550 | /// This class implements the CFG edges for the Minimum Spanning Tree (MST) |
551 | /// based instrumentation. |
552 | /// Note that the CFG can be a multi-graph. So there might be multiple edges |
553 | /// with the same SrcBB and DestBB. |
554 | struct PGOEdge { |
555 | BasicBlock *SrcBB; |
556 | BasicBlock *DestBB; |
557 | uint64_t Weight; |
558 | bool InMST = false; |
559 | bool Removed = false; |
560 | bool IsCritical = false; |
561 | |
562 | PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1) |
563 | : SrcBB(Src), DestBB(Dest), Weight(W) {} |
564 | |
565 | /// Return the information string of an edge. |
566 | std::string infoString() const { |
567 | return (Twine(Removed ? "-" : " " ) + (InMST ? " " : "*" ) + |
568 | (IsCritical ? "c" : " " ) + " W=" + Twine(Weight)) |
569 | .str(); |
570 | } |
571 | }; |
572 | |
573 | /// This class stores the auxiliary information for each BB in the MST. |
574 | struct PGOBBInfo { |
575 | PGOBBInfo *Group; |
576 | uint32_t Index; |
577 | uint32_t Rank = 0; |
578 | |
579 | PGOBBInfo(unsigned IX) : Group(this), Index(IX) {} |
580 | |
581 | /// Return the information string of this object. |
582 | std::string infoString() const { |
583 | return (Twine("Index=" ) + Twine(Index)).str(); |
584 | } |
585 | }; |
586 | |
587 | // This class implements the CFG edges. Note the CFG can be a multi-graph. |
588 | template <class Edge, class BBInfo> class FuncPGOInstrumentation { |
589 | private: |
590 | Function &F; |
591 | |
592 | // Is this is context-sensitive instrumentation. |
593 | bool IsCS; |
594 | |
595 | // A map that stores the Comdat group in function F. |
596 | std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; |
597 | |
598 | ValueProfileCollector VPC; |
599 | |
600 | void computeCFGHash(); |
601 | void renameComdatFunction(); |
602 | |
603 | public: |
604 | const TargetLibraryInfo &TLI; |
605 | std::vector<std::vector<VPCandidateInfo>> ValueSites; |
606 | SelectInstVisitor SIVisitor; |
607 | std::string FuncName; |
608 | std::string DeprecatedFuncName; |
609 | GlobalVariable *FuncNameVar; |
610 | |
611 | // CFG hash value for this function. |
612 | uint64_t FunctionHash = 0; |
613 | |
614 | // The Minimum Spanning Tree of function CFG. |
615 | CFGMST<Edge, BBInfo> MST; |
616 | |
617 | const std::optional<BlockCoverageInference> BCI; |
618 | |
619 | static std::optional<BlockCoverageInference> |
620 | constructBCI(Function &Func, bool HasSingleByteCoverage, |
621 | bool InstrumentFuncEntry) { |
622 | if (HasSingleByteCoverage) |
623 | return BlockCoverageInference(Func, InstrumentFuncEntry); |
624 | return {}; |
625 | } |
626 | |
627 | // Collect all the BBs that will be instrumented, and store them in |
628 | // InstrumentBBs. |
629 | void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); |
630 | |
631 | // Give an edge, find the BB that will be instrumented. |
632 | // Return nullptr if there is no BB to be instrumented. |
633 | BasicBlock *getInstrBB(Edge *E); |
634 | |
635 | // Return the auxiliary BB information. |
636 | BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } |
637 | |
638 | // Return the auxiliary BB information if available. |
639 | BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } |
640 | |
641 | // Dump edges and BB information. |
642 | void dumpInfo(StringRef Str = "" ) const { |
643 | MST.dumpEdges(dbgs(), Twine("Dump Function " ) + FuncName + |
644 | " Hash: " + Twine(FunctionHash) + "\t" + Str); |
645 | } |
646 | |
647 | FuncPGOInstrumentation( |
648 | Function &Func, TargetLibraryInfo &TLI, |
649 | std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, |
650 | bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, |
651 | BlockFrequencyInfo *BFI = nullptr, LoopInfo *LI = nullptr, |
652 | bool IsCS = false, bool InstrumentFuncEntry = true, |
653 | bool InstrumentLoopEntries = false, bool HasSingleByteCoverage = false) |
654 | : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), |
655 | TLI(TLI), ValueSites(IPVK_Last + 1), |
656 | SIVisitor(Func, HasSingleByteCoverage), |
657 | MST(F, InstrumentFuncEntry, InstrumentLoopEntries, BPI, BFI, LI), |
658 | BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) { |
659 | if (BCI && PGOViewBlockCoverageGraph) |
660 | BCI->viewBlockCoverageGraph(); |
661 | // This should be done before CFG hash computation. |
662 | SIVisitor.countSelects(); |
663 | ValueSites[IPVK_MemOPSize] = VPC.get(Kind: IPVK_MemOPSize); |
664 | if (!IsCS) { |
665 | NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); |
666 | NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); |
667 | NumOfPGOBB += MST.bbInfoSize(); |
668 | ValueSites[IPVK_IndirectCallTarget] = VPC.get(Kind: IPVK_IndirectCallTarget); |
669 | if (EnableVTableValueProfiling) |
670 | ValueSites[IPVK_VTableTarget] = VPC.get(Kind: IPVK_VTableTarget); |
671 | } else { |
672 | NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); |
673 | NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); |
674 | NumOfCSPGOBB += MST.bbInfoSize(); |
675 | } |
676 | |
677 | FuncName = getIRPGOFuncName(F); |
678 | DeprecatedFuncName = getPGOFuncName(F); |
679 | computeCFGHash(); |
680 | if (!ComdatMembers.empty()) |
681 | renameComdatFunction(); |
682 | LLVM_DEBUG(dumpInfo("after CFGMST" )); |
683 | |
684 | for (const auto &E : MST.allEdges()) { |
685 | if (E->Removed) |
686 | continue; |
687 | IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; |
688 | if (!E->InMST) |
689 | IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; |
690 | } |
691 | |
692 | if (CreateGlobalVar) |
693 | FuncNameVar = createPGOFuncNameVar(F, PGOFuncName: FuncName); |
694 | } |
695 | }; |
696 | |
697 | } // end anonymous namespace |
698 | |
699 | // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index |
700 | // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers |
701 | // of selects, indirect calls, mem ops and edges. |
702 | template <class Edge, class BBInfo> |
703 | void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { |
704 | std::vector<uint8_t> Indexes; |
705 | JamCRC JC; |
706 | for (auto &BB : F) { |
707 | for (BasicBlock *Succ : successors(BB: &BB)) { |
708 | auto BI = findBBInfo(BB: Succ); |
709 | if (BI == nullptr) |
710 | continue; |
711 | uint32_t Index = BI->Index; |
712 | for (int J = 0; J < 4; J++) |
713 | Indexes.push_back(x: (uint8_t)(Index >> (J * 8))); |
714 | } |
715 | } |
716 | JC.update(Data: Indexes); |
717 | |
718 | JamCRC JCH; |
719 | // The higher 32 bits. |
720 | auto updateJCH = [&JCH](uint64_t Num) { |
721 | uint8_t Data[8]; |
722 | support::endian::write64le(P: Data, V: Num); |
723 | JCH.update(Data); |
724 | }; |
725 | updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); |
726 | updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); |
727 | updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); |
728 | if (BCI) { |
729 | updateJCH(BCI->getInstrumentedBlocksHash()); |
730 | } else { |
731 | updateJCH((uint64_t)MST.numEdges()); |
732 | } |
733 | |
734 | // Hash format for context sensitive profile. Reserve 4 bits for other |
735 | // information. |
736 | FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); |
737 | |
738 | // Reserve bit 60-63 for other information purpose. |
739 | FunctionHash &= 0x0FFFFFFFFFFFFFFF; |
740 | if (IsCS) |
741 | NamedInstrProfRecord::setCSFlagInHash(FunctionHash); |
742 | LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" |
743 | << " CRC = " << JC.getCRC() |
744 | << ", Selects = " << SIVisitor.getNumOfSelectInsts() |
745 | << ", Edges = " << MST.numEdges() << ", ICSites = " |
746 | << ValueSites[IPVK_IndirectCallTarget].size() |
747 | << ", Memops = " << ValueSites[IPVK_MemOPSize].size() |
748 | << ", High32 CRC = " << JCH.getCRC() |
749 | << ", Hash = " << FunctionHash << "\n" ;); |
750 | |
751 | if (PGOTraceFuncHash != "-" && F.getName().contains(Other: PGOTraceFuncHash)) |
752 | dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash |
753 | << " in building " << F.getParent()->getSourceFileName() << "\n" ; |
754 | } |
755 | |
756 | // Check if we can safely rename this Comdat function. |
757 | static bool canRenameComdat( |
758 | Function &F, |
759 | std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { |
760 | if (!DoComdatRenaming || !canRenameComdatFunc(F, CheckAddressTaken: true)) |
761 | return false; |
762 | |
763 | // FIXME: Current only handle those Comdat groups that only containing one |
764 | // function. |
765 | // (1) For a Comdat group containing multiple functions, we need to have a |
766 | // unique postfix based on the hashes for each function. There is a |
767 | // non-trivial code refactoring to do this efficiently. |
768 | // (2) Variables can not be renamed, so we can not rename Comdat function in a |
769 | // group including global vars. |
770 | Comdat *C = F.getComdat(); |
771 | for (auto &&CM : make_range(p: ComdatMembers.equal_range(x: C))) { |
772 | assert(!isa<GlobalAlias>(CM.second)); |
773 | Function *FM = dyn_cast<Function>(Val: CM.second); |
774 | if (FM != &F) |
775 | return false; |
776 | } |
777 | return true; |
778 | } |
779 | |
780 | // Append the CFGHash to the Comdat function name. |
781 | template <class Edge, class BBInfo> |
782 | void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { |
783 | if (!canRenameComdat(F, ComdatMembers)) |
784 | return; |
785 | std::string OrigName = F.getName().str(); |
786 | std::string NewFuncName = |
787 | Twine(F.getName() + "." + Twine(FunctionHash)).str(); |
788 | F.setName(Twine(NewFuncName)); |
789 | GlobalAlias::create(Linkage: GlobalValue::WeakAnyLinkage, Name: OrigName, Aliasee: &F); |
790 | FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); |
791 | Comdat *NewComdat; |
792 | Module *M = F.getParent(); |
793 | // For AvailableExternallyLinkage functions, change the linkage to |
794 | // LinkOnceODR and put them into comdat. This is because after renaming, there |
795 | // is no backup external copy available for the function. |
796 | if (!F.hasComdat()) { |
797 | assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); |
798 | NewComdat = M->getOrInsertComdat(Name: StringRef(NewFuncName)); |
799 | F.setLinkage(GlobalValue::LinkOnceODRLinkage); |
800 | F.setComdat(NewComdat); |
801 | return; |
802 | } |
803 | |
804 | // This function belongs to a single function Comdat group. |
805 | Comdat *OrigComdat = F.getComdat(); |
806 | std::string NewComdatName = |
807 | Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); |
808 | NewComdat = M->getOrInsertComdat(Name: StringRef(NewComdatName)); |
809 | NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); |
810 | |
811 | for (auto &&CM : make_range(p: ComdatMembers.equal_range(x: OrigComdat))) { |
812 | // Must be a function. |
813 | cast<Function>(Val: CM.second)->setComdat(NewComdat); |
814 | } |
815 | } |
816 | |
817 | /// Collect all the BBs that will be instruments and add them to |
818 | /// `InstrumentBBs`. |
819 | template <class Edge, class BBInfo> |
820 | void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( |
821 | std::vector<BasicBlock *> &InstrumentBBs) { |
822 | if (BCI) { |
823 | for (auto &BB : F) |
824 | if (BCI->shouldInstrumentBlock(BB)) |
825 | InstrumentBBs.push_back(x: &BB); |
826 | return; |
827 | } |
828 | |
829 | // Use a worklist as we will update the vector during the iteration. |
830 | std::vector<Edge *> EdgeList; |
831 | EdgeList.reserve(MST.numEdges()); |
832 | for (const auto &E : MST.allEdges()) |
833 | EdgeList.push_back(E.get()); |
834 | |
835 | for (auto &E : EdgeList) { |
836 | BasicBlock *InstrBB = getInstrBB(E); |
837 | if (InstrBB) |
838 | InstrumentBBs.push_back(x: InstrBB); |
839 | } |
840 | } |
841 | |
842 | // Given a CFG E to be instrumented, find which BB to place the instrumented |
843 | // code. The function will split the critical edge if necessary. |
844 | template <class Edge, class BBInfo> |
845 | BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { |
846 | if (E->InMST || E->Removed) |
847 | return nullptr; |
848 | |
849 | BasicBlock *SrcBB = E->SrcBB; |
850 | BasicBlock *DestBB = E->DestBB; |
851 | // For a fake edge, instrument the real BB. |
852 | if (SrcBB == nullptr) |
853 | return DestBB; |
854 | if (DestBB == nullptr) |
855 | return SrcBB; |
856 | |
857 | auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { |
858 | // There are basic blocks (such as catchswitch) cannot be instrumented. |
859 | // If the returned first insertion point is the end of BB, skip this BB. |
860 | if (BB->getFirstNonPHIOrDbgOrAlloca() == BB->end()) |
861 | return nullptr; |
862 | return BB; |
863 | }; |
864 | |
865 | // Instrument the SrcBB if it has a single successor, |
866 | // otherwise, the DestBB if this is not a critical edge. |
867 | Instruction *TI = SrcBB->getTerminator(); |
868 | if (TI->getNumSuccessors() <= 1) |
869 | return canInstrument(SrcBB); |
870 | if (!E->IsCritical) |
871 | return canInstrument(DestBB); |
872 | |
873 | // Some IndirectBr critical edges cannot be split by the previous |
874 | // SplitIndirectBrCriticalEdges call. Bail out. |
875 | unsigned SuccNum = GetSuccessorNumber(BB: SrcBB, Succ: DestBB); |
876 | BasicBlock *InstrBB = |
877 | isa<IndirectBrInst>(Val: TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); |
878 | if (!InstrBB) { |
879 | LLVM_DEBUG( |
880 | dbgs() << "Fail to split critical edge: not instrument this edge.\n" ); |
881 | return nullptr; |
882 | } |
883 | // For a critical edge, we have to split. Instrument the newly |
884 | // created BB. |
885 | IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; |
886 | LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index |
887 | << " --> " << getBBInfo(DestBB).Index << "\n" ); |
888 | // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. |
889 | MST.addEdge(SrcBB, InstrBB, 0); |
890 | // Second one: Add new edge of InstrBB->DestBB. |
891 | Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); |
892 | NewEdge1.InMST = true; |
893 | E->Removed = true; |
894 | |
895 | return canInstrument(InstrBB); |
896 | } |
897 | |
898 | // When generating value profiling calls on Windows routines that make use of |
899 | // handler funclets for exception processing an operand bundle needs to attached |
900 | // to the called function. This routine will set \p OpBundles to contain the |
901 | // funclet information, if any is needed, that should be placed on the generated |
902 | // value profiling call for the value profile candidate call. |
903 | static void |
904 | populateEHOperandBundle(VPCandidateInfo &Cand, |
905 | DenseMap<BasicBlock *, ColorVector> &BlockColors, |
906 | SmallVectorImpl<OperandBundleDef> &OpBundles) { |
907 | auto *OrigCall = dyn_cast<CallBase>(Val: Cand.AnnotatedInst); |
908 | if (!OrigCall) |
909 | return; |
910 | |
911 | if (!isa<IntrinsicInst>(Val: OrigCall)) { |
912 | // The instrumentation call should belong to the same funclet as a |
913 | // non-intrinsic call, so just copy the operand bundle, if any exists. |
914 | std::optional<OperandBundleUse> ParentFunclet = |
915 | OrigCall->getOperandBundle(ID: LLVMContext::OB_funclet); |
916 | if (ParentFunclet) |
917 | OpBundles.emplace_back(Args: OperandBundleDef(*ParentFunclet)); |
918 | } else { |
919 | // Intrinsics or other instructions do not get funclet information from the |
920 | // front-end. Need to use the BlockColors that was computed by the routine |
921 | // colorEHFunclets to determine whether a funclet is needed. |
922 | if (!BlockColors.empty()) { |
923 | const ColorVector &CV = BlockColors.find(Val: OrigCall->getParent())->second; |
924 | assert(CV.size() == 1 && "non-unique color for block!" ); |
925 | BasicBlock::iterator EHPadIt = CV.front()->getFirstNonPHIIt(); |
926 | if (EHPadIt->isEHPad()) |
927 | OpBundles.emplace_back(Args: "funclet" , Args: &*EHPadIt); |
928 | } |
929 | } |
930 | } |
931 | |
932 | // Visit all edge and instrument the edges not in MST, and do value profiling. |
933 | // Critical edges will be split. |
934 | void FunctionInstrumenter::instrument() { |
935 | if (!PGOBlockCoverage) { |
936 | // Split indirectbr critical edges here before computing the MST rather than |
937 | // later in getInstrBB() to avoid invalidating it. |
938 | SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); |
939 | } |
940 | |
941 | const bool IsCtxProf = InstrumentationType == PGOInstrumentationType::CTXPROF; |
942 | FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo( |
943 | F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI, LI, |
944 | InstrumentationType == PGOInstrumentationType::CSFDO, |
945 | shouldInstrumentEntryBB(), shouldInstrumentLoopEntries(), |
946 | PGOBlockCoverage); |
947 | |
948 | auto *const Name = IsCtxProf ? cast<GlobalValue>(Val: &F) : FuncInfo.FuncNameVar; |
949 | auto *const CFGHash = |
950 | ConstantInt::get(Ty: Type::getInt64Ty(C&: M.getContext()), V: FuncInfo.FunctionHash); |
951 | // Make sure that pointer to global is passed in with zero addrspace |
952 | // This is relevant during GPU profiling |
953 | auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast( |
954 | C: Name, Ty: PointerType::get(C&: M.getContext(), AddressSpace: 0)); |
955 | if (PGOFunctionEntryCoverage) { |
956 | auto &EntryBB = F.getEntryBlock(); |
957 | IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca()); |
958 | // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>, |
959 | // i32 <index>) |
960 | Builder.CreateIntrinsic( |
961 | ID: Intrinsic::instrprof_cover, |
962 | Args: {NormalizedNamePtr, CFGHash, Builder.getInt32(C: 1), Builder.getInt32(C: 0)}); |
963 | return; |
964 | } |
965 | |
966 | std::vector<BasicBlock *> InstrumentBBs; |
967 | FuncInfo.getInstrumentBBs(InstrumentBBs); |
968 | unsigned NumCounters = |
969 | InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); |
970 | |
971 | if (IsCtxProf) { |
972 | StringSet<> SkipCSInstr(llvm::from_range, CtxPGOSkipCallsiteInstrument); |
973 | |
974 | auto *CSIntrinsic = |
975 | Intrinsic::getOrInsertDeclaration(M: &M, id: Intrinsic::instrprof_callsite); |
976 | // We want to count the instrumentable callsites, then instrument them. This |
977 | // is because the llvm.instrprof.callsite intrinsic has an argument (like |
978 | // the other instrprof intrinsics) capturing the total number of |
979 | // instrumented objects (counters, or callsites, in this case). In this |
980 | // case, we want that value so we can readily pass it to the compiler-rt |
981 | // APIs that may have to allocate memory based on the nr of callsites. |
982 | // The traversal logic is the same for both counting and instrumentation, |
983 | // just needs to be done in succession. |
984 | auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) { |
985 | for (auto &BB : F) |
986 | for (auto &Instr : BB) |
987 | if (auto *CS = dyn_cast<CallBase>(Val: &Instr)) { |
988 | if (!InstrProfCallsite::canInstrumentCallsite(CB: *CS)) |
989 | continue; |
990 | if (CS->getCalledFunction() && |
991 | SkipCSInstr.contains(key: CS->getCalledFunction()->getName())) |
992 | continue; |
993 | Visitor(CS); |
994 | } |
995 | }; |
996 | // First, count callsites. |
997 | uint32_t TotalNumCallsites = 0; |
998 | Visit([&TotalNumCallsites](auto *) { ++TotalNumCallsites; }); |
999 | |
1000 | // Now instrument. |
1001 | uint32_t CallsiteIndex = 0; |
1002 | Visit([&](auto *CB) { |
1003 | IRBuilder<> Builder(CB); |
1004 | Builder.CreateCall(CSIntrinsic, |
1005 | {Name, CFGHash, Builder.getInt32(C: TotalNumCallsites), |
1006 | Builder.getInt32(C: CallsiteIndex++), |
1007 | CB->getCalledOperand()}); |
1008 | }); |
1009 | } |
1010 | |
1011 | uint32_t I = 0; |
1012 | if (PGOTemporalInstrumentation) { |
1013 | NumCounters += PGOBlockCoverage ? 8 : 1; |
1014 | auto &EntryBB = F.getEntryBlock(); |
1015 | IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca()); |
1016 | // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>, |
1017 | // i32 <index>) |
1018 | Builder.CreateIntrinsic(ID: Intrinsic::instrprof_timestamp, |
1019 | Args: {NormalizedNamePtr, CFGHash, |
1020 | Builder.getInt32(C: NumCounters), |
1021 | Builder.getInt32(C: I)}); |
1022 | I += PGOBlockCoverage ? 8 : 1; |
1023 | } |
1024 | |
1025 | for (auto *InstrBB : InstrumentBBs) { |
1026 | IRBuilder<> Builder(InstrBB, InstrBB->getFirstNonPHIOrDbgOrAlloca()); |
1027 | assert(Builder.GetInsertPoint() != InstrBB->end() && |
1028 | "Cannot get the Instrumentation point" ); |
1029 | // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>, |
1030 | // i32 <index>) |
1031 | Builder.CreateIntrinsic(ID: PGOBlockCoverage ? Intrinsic::instrprof_cover |
1032 | : Intrinsic::instrprof_increment, |
1033 | Args: {NormalizedNamePtr, CFGHash, |
1034 | Builder.getInt32(C: NumCounters), |
1035 | Builder.getInt32(C: I++)}); |
1036 | } |
1037 | |
1038 | // Now instrument select instructions: |
1039 | FuncInfo.SIVisitor.instrumentSelects(Ind: &I, TotalNC: NumCounters, FNV: Name, |
1040 | FHash: FuncInfo.FunctionHash); |
1041 | assert(I == NumCounters); |
1042 | |
1043 | if (isValueProfilingDisabled()) |
1044 | return; |
1045 | |
1046 | NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); |
1047 | |
1048 | // Intrinsic function calls do not have funclet operand bundles needed for |
1049 | // Windows exception handling attached to them. However, if value profiling is |
1050 | // inserted for one of these calls, then a funclet value will need to be set |
1051 | // on the instrumentation call based on the funclet coloring. |
1052 | DenseMap<BasicBlock *, ColorVector> BlockColors; |
1053 | if (F.hasPersonalityFn() && |
1054 | isScopedEHPersonality(Pers: classifyEHPersonality(Pers: F.getPersonalityFn()))) |
1055 | BlockColors = colorEHFunclets(F); |
1056 | |
1057 | // For each VP Kind, walk the VP candidates and instrument each one. |
1058 | for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { |
1059 | unsigned SiteIndex = 0; |
1060 | if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) |
1061 | continue; |
1062 | |
1063 | for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { |
1064 | LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] |
1065 | << " site: CallSite Index = " << SiteIndex << "\n" ); |
1066 | |
1067 | IRBuilder<> Builder(Cand.InsertPt); |
1068 | assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && |
1069 | "Cannot get the Instrumentation point" ); |
1070 | |
1071 | Value *ToProfile = nullptr; |
1072 | if (Cand.V->getType()->isIntegerTy()) |
1073 | ToProfile = Builder.CreateZExtOrTrunc(V: Cand.V, DestTy: Builder.getInt64Ty()); |
1074 | else if (Cand.V->getType()->isPointerTy()) |
1075 | ToProfile = Builder.CreatePtrToInt(V: Cand.V, DestTy: Builder.getInt64Ty()); |
1076 | assert(ToProfile && "value profiling Value is of unexpected type" ); |
1077 | |
1078 | auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast( |
1079 | C: Name, Ty: PointerType::get(C&: M.getContext(), AddressSpace: 0)); |
1080 | |
1081 | SmallVector<OperandBundleDef, 1> OpBundles; |
1082 | populateEHOperandBundle(Cand, BlockColors, OpBundles); |
1083 | Builder.CreateCall( |
1084 | Callee: Intrinsic::getOrInsertDeclaration(M: &M, |
1085 | id: Intrinsic::instrprof_value_profile), |
1086 | Args: {NormalizedNamePtr, Builder.getInt64(C: FuncInfo.FunctionHash), |
1087 | ToProfile, Builder.getInt32(C: Kind), Builder.getInt32(C: SiteIndex++)}, |
1088 | OpBundles); |
1089 | } |
1090 | } // IPVK_First <= Kind <= IPVK_Last |
1091 | } |
1092 | |
1093 | namespace { |
1094 | |
1095 | // This class represents a CFG edge in profile use compilation. |
1096 | struct PGOUseEdge : public PGOEdge { |
1097 | using PGOEdge::PGOEdge; |
1098 | |
1099 | std::optional<uint64_t> Count; |
1100 | |
1101 | // Set edge count value |
1102 | void setEdgeCount(uint64_t Value) { Count = Value; } |
1103 | |
1104 | // Return the information string for this object. |
1105 | std::string infoString() const { |
1106 | if (!Count) |
1107 | return PGOEdge::infoString(); |
1108 | return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str(); |
1109 | } |
1110 | }; |
1111 | |
1112 | using DirectEdges = SmallVector<PGOUseEdge *, 2>; |
1113 | |
1114 | // This class stores the auxiliary information for each BB. |
1115 | struct PGOUseBBInfo : public PGOBBInfo { |
1116 | std::optional<uint64_t> Count; |
1117 | int32_t UnknownCountInEdge = 0; |
1118 | int32_t UnknownCountOutEdge = 0; |
1119 | DirectEdges InEdges; |
1120 | DirectEdges OutEdges; |
1121 | |
1122 | PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {} |
1123 | |
1124 | // Set the profile count value for this BB. |
1125 | void setBBInfoCount(uint64_t Value) { Count = Value; } |
1126 | |
1127 | // Return the information string of this object. |
1128 | std::string infoString() const { |
1129 | if (!Count) |
1130 | return PGOBBInfo::infoString(); |
1131 | return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str(); |
1132 | } |
1133 | |
1134 | // Add an OutEdge and update the edge count. |
1135 | void addOutEdge(PGOUseEdge *E) { |
1136 | OutEdges.push_back(Elt: E); |
1137 | UnknownCountOutEdge++; |
1138 | } |
1139 | |
1140 | // Add an InEdge and update the edge count. |
1141 | void addInEdge(PGOUseEdge *E) { |
1142 | InEdges.push_back(Elt: E); |
1143 | UnknownCountInEdge++; |
1144 | } |
1145 | }; |
1146 | |
1147 | } // end anonymous namespace |
1148 | |
1149 | // Sum up the count values for all the edges. |
1150 | static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { |
1151 | uint64_t Total = 0; |
1152 | for (const auto &E : Edges) { |
1153 | if (E->Removed) |
1154 | continue; |
1155 | if (E->Count) |
1156 | Total += *E->Count; |
1157 | } |
1158 | return Total; |
1159 | } |
1160 | |
1161 | namespace { |
1162 | |
1163 | class PGOUseFunc { |
1164 | public: |
1165 | PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, |
1166 | std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, |
1167 | BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, |
1168 | LoopInfo *LI, ProfileSummaryInfo *PSI, bool IsCS, |
1169 | bool InstrumentFuncEntry, bool InstrumentLoopEntries, |
1170 | bool HasSingleByteCoverage) |
1171 | : F(Func), M(Modu), BFI(BFIin), PSI(PSI), |
1172 | FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, LI, IsCS, |
1173 | InstrumentFuncEntry, InstrumentLoopEntries, |
1174 | HasSingleByteCoverage), |
1175 | FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {} |
1176 | |
1177 | void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum); |
1178 | |
1179 | /// Get the profile record, assign it to \p ProfileRecord, handle errors if |
1180 | /// necessary, and assign \p ProgramMaxCount. \returns true if there are no |
1181 | /// errors. |
1182 | bool getRecord(IndexedInstrProfReader *PGOReader); |
1183 | |
1184 | // Read counts for the instrumented BB from profile. |
1185 | bool readCounters(bool &AllZeros, |
1186 | InstrProfRecord::CountPseudoKind &PseudoKind); |
1187 | |
1188 | // Populate the counts for all BBs. |
1189 | void populateCounters(); |
1190 | |
1191 | // Set block coverage based on profile coverage values. |
1192 | void populateCoverage(); |
1193 | |
1194 | // Set the branch weights based on the count values. |
1195 | void setBranchWeights(); |
1196 | |
1197 | // Annotate the value profile call sites for all value kind. |
1198 | void annotateValueSites(); |
1199 | |
1200 | // Annotate the value profile call sites for one value kind. |
1201 | void annotateValueSites(uint32_t Kind); |
1202 | |
1203 | // Annotate the irreducible loop header weights. |
1204 | void annotateIrrLoopHeaderWeights(); |
1205 | |
1206 | // The hotness of the function from the profile count. |
1207 | enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; |
1208 | |
1209 | // Return the function hotness from the profile. |
1210 | FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } |
1211 | |
1212 | // Return the function hash. |
1213 | uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } |
1214 | |
1215 | // Return the profile record for this function; |
1216 | NamedInstrProfRecord &getProfileRecord() { return ProfileRecord; } |
1217 | |
1218 | // Return the auxiliary BB information. |
1219 | PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const { |
1220 | return FuncInfo.getBBInfo(BB); |
1221 | } |
1222 | |
1223 | // Return the auxiliary BB information if available. |
1224 | PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const { |
1225 | return FuncInfo.findBBInfo(BB); |
1226 | } |
1227 | |
1228 | Function &getFunc() const { return F; } |
1229 | |
1230 | void dumpInfo(StringRef Str = "" ) const { FuncInfo.dumpInfo(Str); } |
1231 | |
1232 | uint64_t getProgramMaxCount() const { return ProgramMaxCount; } |
1233 | |
1234 | private: |
1235 | Function &F; |
1236 | Module *M; |
1237 | BlockFrequencyInfo *BFI; |
1238 | ProfileSummaryInfo *PSI; |
1239 | |
1240 | // This member stores the shared information with class PGOGenFunc. |
1241 | FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo; |
1242 | |
1243 | // The maximum count value in the profile. This is only used in PGO use |
1244 | // compilation. |
1245 | uint64_t ProgramMaxCount; |
1246 | |
1247 | // Position of counter that remains to be read. |
1248 | uint32_t CountPosition = 0; |
1249 | |
1250 | // Total size of the profile count for this function. |
1251 | uint32_t ProfileCountSize = 0; |
1252 | |
1253 | // ProfileRecord for this function. |
1254 | NamedInstrProfRecord ProfileRecord; |
1255 | |
1256 | // Function hotness info derived from profile. |
1257 | FuncFreqAttr FreqAttr; |
1258 | |
1259 | // Is to use the context sensitive profile. |
1260 | bool IsCS; |
1261 | |
1262 | ValueProfileCollector VPC; |
1263 | |
1264 | // Find the Instrumented BB and set the value. Return false on error. |
1265 | bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); |
1266 | |
1267 | // Set the edge counter value for the unknown edge -- there should be only |
1268 | // one unknown edge. |
1269 | void setEdgeCount(DirectEdges &Edges, uint64_t Value); |
1270 | |
1271 | // Set the hot/cold inline hints based on the count values. |
1272 | // FIXME: This function should be removed once the functionality in |
1273 | // the inliner is implemented. |
1274 | void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { |
1275 | if (PSI->isHotCount(C: EntryCount)) |
1276 | FreqAttr = FFA_Hot; |
1277 | else if (PSI->isColdCount(C: MaxCount)) |
1278 | FreqAttr = FFA_Cold; |
1279 | } |
1280 | }; |
1281 | |
1282 | } // end anonymous namespace |
1283 | |
1284 | /// Set up InEdges/OutEdges for all BBs in the MST. |
1285 | static void setupBBInfoEdges( |
1286 | const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) { |
1287 | // This is not required when there is block coverage inference. |
1288 | if (FuncInfo.BCI) |
1289 | return; |
1290 | for (const auto &E : FuncInfo.MST.allEdges()) { |
1291 | if (E->Removed) |
1292 | continue; |
1293 | const BasicBlock *SrcBB = E->SrcBB; |
1294 | const BasicBlock *DestBB = E->DestBB; |
1295 | PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(BB: SrcBB); |
1296 | PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(BB: DestBB); |
1297 | SrcInfo.addOutEdge(E: E.get()); |
1298 | DestInfo.addInEdge(E: E.get()); |
1299 | } |
1300 | } |
1301 | |
1302 | // Visit all the edges and assign the count value for the instrumented |
1303 | // edges and the BB. Return false on error. |
1304 | bool PGOUseFunc::setInstrumentedCounts( |
1305 | const std::vector<uint64_t> &CountFromProfile) { |
1306 | |
1307 | std::vector<BasicBlock *> InstrumentBBs; |
1308 | FuncInfo.getInstrumentBBs(InstrumentBBs); |
1309 | |
1310 | setupBBInfoEdges(FuncInfo); |
1311 | |
1312 | unsigned NumCounters = |
1313 | InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); |
1314 | // The number of counters here should match the number of counters |
1315 | // in profile. Return if they mismatch. |
1316 | if (NumCounters != CountFromProfile.size()) { |
1317 | return false; |
1318 | } |
1319 | auto *FuncEntry = &*F.begin(); |
1320 | |
1321 | // Set the profile count to the Instrumented BBs. |
1322 | uint32_t I = 0; |
1323 | for (BasicBlock *InstrBB : InstrumentBBs) { |
1324 | uint64_t CountValue = CountFromProfile[I++]; |
1325 | PGOUseBBInfo &Info = getBBInfo(BB: InstrBB); |
1326 | // If we reach here, we know that we have some nonzero count |
1327 | // values in this function. The entry count should not be 0. |
1328 | // Fix it if necessary. |
1329 | if (InstrBB == FuncEntry && CountValue == 0) |
1330 | CountValue = 1; |
1331 | Info.setBBInfoCount(CountValue); |
1332 | } |
1333 | ProfileCountSize = CountFromProfile.size(); |
1334 | CountPosition = I; |
1335 | |
1336 | // Set the edge count and update the count of unknown edges for BBs. |
1337 | auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { |
1338 | E->setEdgeCount(Value); |
1339 | this->getBBInfo(BB: E->SrcBB).UnknownCountOutEdge--; |
1340 | this->getBBInfo(BB: E->DestBB).UnknownCountInEdge--; |
1341 | }; |
1342 | |
1343 | // Set the profile count the Instrumented edges. There are BBs that not in |
1344 | // MST but not instrumented. Need to set the edge count value so that we can |
1345 | // populate the profile counts later. |
1346 | for (const auto &E : FuncInfo.MST.allEdges()) { |
1347 | if (E->Removed || E->InMST) |
1348 | continue; |
1349 | const BasicBlock *SrcBB = E->SrcBB; |
1350 | PGOUseBBInfo &SrcInfo = getBBInfo(BB: SrcBB); |
1351 | |
1352 | // If only one out-edge, the edge profile count should be the same as BB |
1353 | // profile count. |
1354 | if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1) |
1355 | setEdgeCount(E.get(), *SrcInfo.Count); |
1356 | else { |
1357 | const BasicBlock *DestBB = E->DestBB; |
1358 | PGOUseBBInfo &DestInfo = getBBInfo(BB: DestBB); |
1359 | // If only one in-edge, the edge profile count should be the same as BB |
1360 | // profile count. |
1361 | if (DestInfo.Count && DestInfo.InEdges.size() == 1) |
1362 | setEdgeCount(E.get(), *DestInfo.Count); |
1363 | } |
1364 | if (E->Count) |
1365 | continue; |
1366 | // E's count should have been set from profile. If not, this meenas E skips |
1367 | // the instrumentation. We set the count to 0. |
1368 | setEdgeCount(E.get(), 0); |
1369 | } |
1370 | return true; |
1371 | } |
1372 | |
1373 | // Set the count value for the unknown edge. There should be one and only one |
1374 | // unknown edge in Edges vector. |
1375 | void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { |
1376 | for (auto &E : Edges) { |
1377 | if (E->Count) |
1378 | continue; |
1379 | E->setEdgeCount(Value); |
1380 | |
1381 | getBBInfo(BB: E->SrcBB).UnknownCountOutEdge--; |
1382 | getBBInfo(BB: E->DestBB).UnknownCountInEdge--; |
1383 | return; |
1384 | } |
1385 | llvm_unreachable("Cannot find the unknown count edge" ); |
1386 | } |
1387 | |
1388 | // Emit function metadata indicating PGO profile mismatch. |
1389 | static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx) { |
1390 | const char MetadataName[] = "instr_prof_hash_mismatch" ; |
1391 | SmallVector<Metadata *, 2> Names; |
1392 | // If this metadata already exists, ignore. |
1393 | auto *Existing = F.getMetadata(KindID: LLVMContext::MD_annotation); |
1394 | if (Existing) { |
1395 | MDTuple *Tuple = cast<MDTuple>(Val: Existing); |
1396 | for (const auto &N : Tuple->operands()) { |
1397 | if (N.equalsStr(Str: MetadataName)) |
1398 | return; |
1399 | Names.push_back(Elt: N.get()); |
1400 | } |
1401 | } |
1402 | |
1403 | MDBuilder MDB(ctx); |
1404 | Names.push_back(Elt: MDB.createString(Str: MetadataName)); |
1405 | MDNode *MD = MDTuple::get(Context&: ctx, MDs: Names); |
1406 | F.setMetadata(KindID: LLVMContext::MD_annotation, Node: MD); |
1407 | } |
1408 | |
1409 | void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) { |
1410 | handleAllErrors(E: std::move(Err), Handlers: [&](const InstrProfError &IPE) { |
1411 | auto &Ctx = M->getContext(); |
1412 | auto Err = IPE.get(); |
1413 | bool SkipWarning = false; |
1414 | LLVM_DEBUG(dbgs() << "Error in reading profile for Func " |
1415 | << FuncInfo.FuncName << ": " ); |
1416 | if (Err == instrprof_error::unknown_function) { |
1417 | IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; |
1418 | SkipWarning = !PGOWarnMissing; |
1419 | LLVM_DEBUG(dbgs() << "unknown function" ); |
1420 | } else if (Err == instrprof_error::hash_mismatch || |
1421 | Err == instrprof_error::malformed) { |
1422 | IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; |
1423 | SkipWarning = |
1424 | NoPGOWarnMismatch || |
1425 | (NoPGOWarnMismatchComdatWeak && |
1426 | (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage || |
1427 | F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); |
1428 | LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash |
1429 | << " skip=" << SkipWarning << ")" ); |
1430 | // Emit function metadata indicating PGO profile mismatch. |
1431 | annotateFunctionWithHashMismatch(F, ctx&: M->getContext()); |
1432 | } |
1433 | |
1434 | LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n" ); |
1435 | if (SkipWarning) |
1436 | return; |
1437 | |
1438 | std::string Msg = |
1439 | IPE.message() + std::string(" " ) + F.getName().str() + |
1440 | std::string(" Hash = " ) + std::to_string(val: FuncInfo.FunctionHash) + |
1441 | std::string(" up to " ) + std::to_string(val: MismatchedFuncSum) + |
1442 | std::string(" count discarded" ); |
1443 | |
1444 | Ctx.diagnose( |
1445 | DI: DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); |
1446 | }); |
1447 | } |
1448 | |
1449 | bool PGOUseFunc::getRecord(IndexedInstrProfReader *PGOReader) { |
1450 | uint64_t MismatchedFuncSum = 0; |
1451 | auto Result = PGOReader->getInstrProfRecord( |
1452 | FuncName: FuncInfo.FuncName, FuncHash: FuncInfo.FunctionHash, DeprecatedFuncName: FuncInfo.DeprecatedFuncName, |
1453 | MismatchedFuncSum: &MismatchedFuncSum); |
1454 | if (Error E = Result.takeError()) { |
1455 | handleInstrProfError(Err: std::move(E), MismatchedFuncSum); |
1456 | return false; |
1457 | } |
1458 | ProfileRecord = std::move(Result.get()); |
1459 | ProgramMaxCount = PGOReader->getMaximumFunctionCount(UseCS: IsCS); |
1460 | return true; |
1461 | } |
1462 | |
1463 | // Read the profile from ProfileFileName and assign the value to the |
1464 | // instrumented BB and the edges. Return true if the profile are successfully |
1465 | // read, and false on errors. |
1466 | bool PGOUseFunc::readCounters(bool &AllZeros, |
1467 | InstrProfRecord::CountPseudoKind &PseudoKind) { |
1468 | auto &Ctx = M->getContext(); |
1469 | PseudoKind = ProfileRecord.getCountPseudoKind(); |
1470 | if (PseudoKind != InstrProfRecord::NotPseudo) { |
1471 | return true; |
1472 | } |
1473 | std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; |
1474 | |
1475 | IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; |
1476 | LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n" ); |
1477 | |
1478 | uint64_t ValueSum = 0; |
1479 | for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { |
1480 | LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n" ); |
1481 | ValueSum += CountFromProfile[I]; |
1482 | } |
1483 | AllZeros = (ValueSum == 0); |
1484 | |
1485 | LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n" ); |
1486 | |
1487 | getBBInfo(BB: nullptr).UnknownCountOutEdge = 2; |
1488 | getBBInfo(BB: nullptr).UnknownCountInEdge = 2; |
1489 | |
1490 | if (!setInstrumentedCounts(CountFromProfile)) { |
1491 | LLVM_DEBUG( |
1492 | dbgs() << "Inconsistent number of counts, skipping this function" ); |
1493 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
1494 | M->getName().data(), |
1495 | Twine("Inconsistent number of counts in " ) + F.getName().str() + |
1496 | Twine(": the profile may be stale or there is a function name " |
1497 | "collision." ), |
1498 | DS_Warning)); |
1499 | return false; |
1500 | } |
1501 | return true; |
1502 | } |
1503 | |
1504 | void PGOUseFunc::populateCoverage() { |
1505 | IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; |
1506 | |
1507 | ArrayRef<uint64_t> CountsFromProfile = ProfileRecord.Counts; |
1508 | DenseMap<const BasicBlock *, bool> Coverage; |
1509 | unsigned Index = 0; |
1510 | for (auto &BB : F) |
1511 | if (FuncInfo.BCI->shouldInstrumentBlock(BB)) |
1512 | Coverage[&BB] = (CountsFromProfile[Index++] != 0); |
1513 | assert(Index == CountsFromProfile.size()); |
1514 | |
1515 | // For each B in InverseDependencies[A], if A is covered then B is covered. |
1516 | DenseMap<const BasicBlock *, DenseSet<const BasicBlock *>> |
1517 | InverseDependencies; |
1518 | for (auto &BB : F) { |
1519 | for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) { |
1520 | // If Dep is covered then BB is covered. |
1521 | InverseDependencies[Dep].insert(V: &BB); |
1522 | } |
1523 | } |
1524 | |
1525 | // Infer coverage of the non-instrumented blocks using a flood-fill algorithm. |
1526 | std::stack<const BasicBlock *> CoveredBlocksToProcess; |
1527 | for (auto &[BB, IsCovered] : Coverage) |
1528 | if (IsCovered) |
1529 | CoveredBlocksToProcess.push(x: BB); |
1530 | |
1531 | while (!CoveredBlocksToProcess.empty()) { |
1532 | auto *CoveredBlock = CoveredBlocksToProcess.top(); |
1533 | assert(Coverage[CoveredBlock]); |
1534 | CoveredBlocksToProcess.pop(); |
1535 | for (auto *BB : InverseDependencies[CoveredBlock]) { |
1536 | // If CoveredBlock is covered then BB is covered. |
1537 | bool &Cov = Coverage[BB]; |
1538 | if (Cov) |
1539 | continue; |
1540 | Cov = true; |
1541 | CoveredBlocksToProcess.push(x: BB); |
1542 | } |
1543 | } |
1544 | |
1545 | // Annotate block coverage. |
1546 | MDBuilder MDB(F.getContext()); |
1547 | // We set the entry count to 10000 if the entry block is covered so that BFI |
1548 | // can propagate a fraction of this count to the other covered blocks. |
1549 | F.setEntryCount(Count: Coverage[&F.getEntryBlock()] ? 10000 : 0); |
1550 | for (auto &BB : F) { |
1551 | // For a block A and its successor B, we set the edge weight as follows: |
1552 | // If A is covered and B is covered, set weight=1. |
1553 | // If A is covered and B is uncovered, set weight=0. |
1554 | // If A is uncovered, set weight=1. |
1555 | // This setup will allow BFI to give nonzero profile counts to only covered |
1556 | // blocks. |
1557 | SmallVector<uint32_t, 4> Weights; |
1558 | for (auto *Succ : successors(BB: &BB)) |
1559 | Weights.push_back(Elt: (Coverage[Succ] || !Coverage[&BB]) ? 1 : 0); |
1560 | if (Weights.size() >= 2) |
1561 | llvm::setBranchWeights(I&: *BB.getTerminator(), Weights, |
1562 | /*IsExpected=*/false); |
1563 | } |
1564 | |
1565 | unsigned NumCorruptCoverage = 0; |
1566 | DominatorTree DT(F); |
1567 | LoopInfo LI(DT); |
1568 | BranchProbabilityInfo BPI(F, LI); |
1569 | BlockFrequencyInfo BFI(F, BPI, LI); |
1570 | auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> { |
1571 | if (auto C = BFI.getBlockProfileCount(BB: &BB)) |
1572 | return C == 0; |
1573 | return {}; |
1574 | }; |
1575 | LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n" ); |
1576 | for (auto &BB : F) { |
1577 | LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " " ) |
1578 | << (Coverage[&BB] ? "X " : " " ) << " " << BB.getName() |
1579 | << "\n" ); |
1580 | // In some cases it is possible to find a covered block that has no covered |
1581 | // successors, e.g., when a block calls a function that may call exit(). In |
1582 | // those cases, BFI could find its successor to be covered while BCI could |
1583 | // find its successor to be dead. |
1584 | const bool &Cov = Coverage[&BB]; |
1585 | if (Cov == IsBlockDead(BB).value_or(u: false)) { |
1586 | LLVM_DEBUG( |
1587 | dbgs() << "Found inconsistent block covearge for " << BB.getName() |
1588 | << ": BCI=" << (Cov ? "Covered" : "Dead" ) << " BFI=" |
1589 | << (IsBlockDead(BB).value() ? "Dead" : "Covered" ) << "\n" ); |
1590 | ++NumCorruptCoverage; |
1591 | } |
1592 | if (Cov) |
1593 | ++NumCoveredBlocks; |
1594 | } |
1595 | if (PGOVerifyBFI && NumCorruptCoverage) { |
1596 | auto &Ctx = M->getContext(); |
1597 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
1598 | M->getName().data(), |
1599 | Twine("Found inconsistent block coverage for function " ) + F.getName() + |
1600 | " in " + Twine(NumCorruptCoverage) + " blocks." , |
1601 | DS_Warning)); |
1602 | } |
1603 | if (PGOViewBlockCoverageGraph) |
1604 | FuncInfo.BCI->viewBlockCoverageGraph(Coverage: &Coverage); |
1605 | } |
1606 | |
1607 | // Populate the counters from instrumented BBs to all BBs. |
1608 | // In the end of this operation, all BBs should have a valid count value. |
1609 | void PGOUseFunc::populateCounters() { |
1610 | bool Changes = true; |
1611 | unsigned NumPasses = 0; |
1612 | while (Changes) { |
1613 | NumPasses++; |
1614 | Changes = false; |
1615 | |
1616 | // For efficient traversal, it's better to start from the end as most |
1617 | // of the instrumented edges are at the end. |
1618 | for (auto &BB : reverse(C&: F)) { |
1619 | PGOUseBBInfo *UseBBInfo = findBBInfo(BB: &BB); |
1620 | if (UseBBInfo == nullptr) |
1621 | continue; |
1622 | if (!UseBBInfo->Count) { |
1623 | if (UseBBInfo->UnknownCountOutEdge == 0) { |
1624 | UseBBInfo->Count = sumEdgeCount(Edges: UseBBInfo->OutEdges); |
1625 | Changes = true; |
1626 | } else if (UseBBInfo->UnknownCountInEdge == 0) { |
1627 | UseBBInfo->Count = sumEdgeCount(Edges: UseBBInfo->InEdges); |
1628 | Changes = true; |
1629 | } |
1630 | } |
1631 | if (UseBBInfo->Count) { |
1632 | if (UseBBInfo->UnknownCountOutEdge == 1) { |
1633 | uint64_t Total = 0; |
1634 | uint64_t OutSum = sumEdgeCount(Edges: UseBBInfo->OutEdges); |
1635 | // If the one of the successor block can early terminate (no-return), |
1636 | // we can end up with situation where out edge sum count is larger as |
1637 | // the source BB's count is collected by a post-dominated block. |
1638 | if (*UseBBInfo->Count > OutSum) |
1639 | Total = *UseBBInfo->Count - OutSum; |
1640 | setEdgeCount(Edges&: UseBBInfo->OutEdges, Value: Total); |
1641 | Changes = true; |
1642 | } |
1643 | if (UseBBInfo->UnknownCountInEdge == 1) { |
1644 | uint64_t Total = 0; |
1645 | uint64_t InSum = sumEdgeCount(Edges: UseBBInfo->InEdges); |
1646 | if (*UseBBInfo->Count > InSum) |
1647 | Total = *UseBBInfo->Count - InSum; |
1648 | setEdgeCount(Edges&: UseBBInfo->InEdges, Value: Total); |
1649 | Changes = true; |
1650 | } |
1651 | } |
1652 | } |
1653 | } |
1654 | |
1655 | LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n" ); |
1656 | (void)NumPasses; |
1657 | #ifndef NDEBUG |
1658 | // Assert every BB has a valid counter. |
1659 | for (auto &BB : F) { |
1660 | auto BI = findBBInfo(&BB); |
1661 | if (BI == nullptr) |
1662 | continue; |
1663 | assert(BI->Count && "BB count is not valid" ); |
1664 | } |
1665 | #endif |
1666 | // Now annotate select instructions. This may fixup impossible block counts. |
1667 | FuncInfo.SIVisitor.annotateSelects(UF: this, Ind: &CountPosition); |
1668 | assert(CountPosition == ProfileCountSize); |
1669 | |
1670 | uint64_t FuncEntryCount = *getBBInfo(BB: &*F.begin()).Count; |
1671 | uint64_t FuncMaxCount = FuncEntryCount; |
1672 | for (auto &BB : F) { |
1673 | auto BI = findBBInfo(BB: &BB); |
1674 | if (BI == nullptr) |
1675 | continue; |
1676 | FuncMaxCount = std::max(a: FuncMaxCount, b: *BI->Count); |
1677 | } |
1678 | |
1679 | // Fix the obviously inconsistent entry count. |
1680 | if (FuncMaxCount > 0 && FuncEntryCount == 0) |
1681 | FuncEntryCount = 1; |
1682 | F.setEntryCount(Count: ProfileCount(FuncEntryCount, Function::PCT_Real)); |
1683 | markFunctionAttributes(EntryCount: FuncEntryCount, MaxCount: FuncMaxCount); |
1684 | |
1685 | LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile." )); |
1686 | } |
1687 | |
1688 | // Assign the scaled count values to the BB with multiple out edges. |
1689 | void PGOUseFunc::setBranchWeights() { |
1690 | // Generate MD_prof metadata for every branch instruction. |
1691 | LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() |
1692 | << " IsCS=" << IsCS << "\n" ); |
1693 | for (auto &BB : F) { |
1694 | Instruction *TI = BB.getTerminator(); |
1695 | if (TI->getNumSuccessors() < 2) |
1696 | continue; |
1697 | if (!(isa<BranchInst>(Val: TI) || isa<SwitchInst>(Val: TI) || |
1698 | isa<IndirectBrInst>(Val: TI) || isa<InvokeInst>(Val: TI) || |
1699 | isa<CallBrInst>(Val: TI))) |
1700 | continue; |
1701 | |
1702 | const PGOUseBBInfo &BBCountInfo = getBBInfo(BB: &BB); |
1703 | if (!*BBCountInfo.Count) |
1704 | continue; |
1705 | |
1706 | // We have a non-zero Branch BB. |
1707 | |
1708 | // SuccessorCount can be greater than OutEdgesCount, because |
1709 | // removed edges don't appear in OutEdges. |
1710 | unsigned OutEdgesCount = BBCountInfo.OutEdges.size(); |
1711 | unsigned SuccessorCount = BB.getTerminator()->getNumSuccessors(); |
1712 | assert(OutEdgesCount <= SuccessorCount); |
1713 | |
1714 | SmallVector<uint64_t, 2> EdgeCounts(SuccessorCount, 0); |
1715 | uint64_t MaxCount = 0; |
1716 | for (unsigned It = 0; It < OutEdgesCount; It++) { |
1717 | const PGOUseEdge *E = BBCountInfo.OutEdges[It]; |
1718 | const BasicBlock *SrcBB = E->SrcBB; |
1719 | const BasicBlock *DestBB = E->DestBB; |
1720 | if (DestBB == nullptr) |
1721 | continue; |
1722 | unsigned SuccNum = GetSuccessorNumber(BB: SrcBB, Succ: DestBB); |
1723 | uint64_t EdgeCount = *E->Count; |
1724 | if (EdgeCount > MaxCount) |
1725 | MaxCount = EdgeCount; |
1726 | EdgeCounts[SuccNum] = EdgeCount; |
1727 | } |
1728 | |
1729 | if (MaxCount) |
1730 | setProfMetadata(M, TI, EdgeCounts, MaxCount); |
1731 | else { |
1732 | // A zero MaxCount can come about when we have a BB with a positive |
1733 | // count, and whose successor blocks all have 0 count. This can happen |
1734 | // when there is no exit block and the code exits via a noreturn function. |
1735 | auto &Ctx = M->getContext(); |
1736 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
1737 | M->getName().data(), |
1738 | Twine("Profile in " ) + F.getName().str() + |
1739 | Twine(" partially ignored" ) + |
1740 | Twine(", possibly due to the lack of a return path." ), |
1741 | DS_Warning)); |
1742 | } |
1743 | } |
1744 | } |
1745 | |
1746 | static bool isIndirectBrTarget(BasicBlock *BB) { |
1747 | for (BasicBlock *Pred : predecessors(BB)) { |
1748 | if (isa<IndirectBrInst>(Val: Pred->getTerminator())) |
1749 | return true; |
1750 | } |
1751 | return false; |
1752 | } |
1753 | |
1754 | void PGOUseFunc::() { |
1755 | LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n" ); |
1756 | // Find irr loop headers |
1757 | for (auto &BB : F) { |
1758 | // As a heuristic also annotate indrectbr targets as they have a high chance |
1759 | // to become an irreducible loop header after the indirectbr tail |
1760 | // duplication. |
1761 | if (BFI->isIrrLoopHeader(BB: &BB) || isIndirectBrTarget(BB: &BB)) { |
1762 | Instruction *TI = BB.getTerminator(); |
1763 | const PGOUseBBInfo &BBCountInfo = getBBInfo(BB: &BB); |
1764 | setIrrLoopHeaderMetadata(M, TI, Count: *BBCountInfo.Count); |
1765 | } |
1766 | } |
1767 | } |
1768 | |
1769 | void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { |
1770 | Module *M = F.getParent(); |
1771 | IRBuilder<> Builder(&SI); |
1772 | Type *Int64Ty = Builder.getInt64Ty(); |
1773 | auto *Step = Builder.CreateZExt(V: SI.getCondition(), DestTy: Int64Ty); |
1774 | auto *NormalizedFuncNameVarPtr = |
1775 | ConstantExpr::getPointerBitCastOrAddrSpaceCast( |
1776 | C: FuncNameVar, Ty: PointerType::get(C&: M->getContext(), AddressSpace: 0)); |
1777 | Builder.CreateIntrinsic(ID: Intrinsic::instrprof_increment_step, |
1778 | Args: {NormalizedFuncNameVarPtr, Builder.getInt64(C: FuncHash), |
1779 | Builder.getInt32(C: TotalNumCtrs), |
1780 | Builder.getInt32(C: *CurCtrIdx), Step}); |
1781 | ++(*CurCtrIdx); |
1782 | } |
1783 | |
1784 | void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { |
1785 | std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; |
1786 | assert(*CurCtrIdx < CountFromProfile.size() && |
1787 | "Out of bound access of counters" ); |
1788 | uint64_t SCounts[2]; |
1789 | SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count |
1790 | ++(*CurCtrIdx); |
1791 | uint64_t TotalCount = 0; |
1792 | auto BI = UseFunc->findBBInfo(BB: SI.getParent()); |
1793 | if (BI != nullptr) { |
1794 | TotalCount = *BI->Count; |
1795 | |
1796 | // Fix the block count if it is impossible. |
1797 | if (TotalCount < SCounts[0]) |
1798 | BI->Count = SCounts[0]; |
1799 | } |
1800 | // False Count |
1801 | SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); |
1802 | uint64_t MaxCount = std::max(a: SCounts[0], b: SCounts[1]); |
1803 | if (MaxCount) |
1804 | setProfMetadata(M: F.getParent(), TI: &SI, EdgeCounts: SCounts, MaxCount); |
1805 | } |
1806 | |
1807 | void SelectInstVisitor::visitSelectInst(SelectInst &SI) { |
1808 | if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage) |
1809 | return; |
1810 | // FIXME: do not handle this yet. |
1811 | if (SI.getCondition()->getType()->isVectorTy()) |
1812 | return; |
1813 | |
1814 | switch (Mode) { |
1815 | case VM_counting: |
1816 | NSIs++; |
1817 | return; |
1818 | case VM_instrument: |
1819 | instrumentOneSelectInst(SI); |
1820 | return; |
1821 | case VM_annotate: |
1822 | annotateOneSelectInst(SI); |
1823 | return; |
1824 | } |
1825 | |
1826 | llvm_unreachable("Unknown visiting mode" ); |
1827 | } |
1828 | |
1829 | static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind) { |
1830 | if (ValueProfKind == IPVK_MemOPSize) |
1831 | return MaxNumMemOPAnnotations; |
1832 | if (ValueProfKind == llvm::IPVK_VTableTarget) |
1833 | return MaxNumVTableAnnotations; |
1834 | return MaxNumAnnotations; |
1835 | } |
1836 | |
1837 | // Traverse all valuesites and annotate the instructions for all value kind. |
1838 | void PGOUseFunc::annotateValueSites() { |
1839 | if (DisableValueProfiling) |
1840 | return; |
1841 | |
1842 | // Create the PGOFuncName meta data. |
1843 | createPGOFuncNameMetadata(F, PGOFuncName: FuncInfo.FuncName); |
1844 | |
1845 | for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) |
1846 | annotateValueSites(Kind); |
1847 | } |
1848 | |
1849 | // Annotate the instructions for a specific value kind. |
1850 | void PGOUseFunc::annotateValueSites(uint32_t Kind) { |
1851 | assert(Kind <= IPVK_Last); |
1852 | unsigned ValueSiteIndex = 0; |
1853 | |
1854 | unsigned NumValueSites = ProfileRecord.getNumValueSites(ValueKind: Kind); |
1855 | |
1856 | // Since there isn't a reliable or fast way for profile reader to tell if a |
1857 | // profile is generated with `-enable-vtable-value-profiling` on, we run the |
1858 | // value profile collector over the function IR to find the instrumented sites |
1859 | // iff function profile records shows the number of instrumented vtable sites |
1860 | // is not zero. Function cfg already takes the number of instrumented |
1861 | // indirect call sites into account so it doesn't hash the number of |
1862 | // instrumented vtables; as a side effect it makes it easier to enable |
1863 | // profiling and profile use in two steps if needed. |
1864 | // TODO: Remove this if/when -enable-vtable-value-profiling is on by default. |
1865 | if (NumValueSites > 0 && Kind == IPVK_VTableTarget && |
1866 | NumValueSites != FuncInfo.ValueSites[IPVK_VTableTarget].size() && |
1867 | MaxNumVTableAnnotations != 0) |
1868 | FuncInfo.ValueSites[IPVK_VTableTarget] = VPC.get(Kind: IPVK_VTableTarget); |
1869 | auto &ValueSites = FuncInfo.ValueSites[Kind]; |
1870 | if (NumValueSites != ValueSites.size()) { |
1871 | auto &Ctx = M->getContext(); |
1872 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
1873 | M->getName().data(), |
1874 | Twine("Inconsistent number of value sites for " ) + |
1875 | Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"" ) + |
1876 | F.getName().str() + |
1877 | Twine("\", possibly due to the use of a stale profile." ), |
1878 | DS_Warning)); |
1879 | return; |
1880 | } |
1881 | |
1882 | for (VPCandidateInfo &I : ValueSites) { |
1883 | LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind |
1884 | << "): Index = " << ValueSiteIndex << " out of " |
1885 | << NumValueSites << "\n" ); |
1886 | annotateValueSite( |
1887 | M&: *M, Inst&: *I.AnnotatedInst, InstrProfR: ProfileRecord, |
1888 | ValueKind: static_cast<InstrProfValueKind>(Kind), SiteIndx: ValueSiteIndex, |
1889 | MaxMDCount: getMaxNumAnnotations(ValueProfKind: static_cast<InstrProfValueKind>(Kind))); |
1890 | ValueSiteIndex++; |
1891 | } |
1892 | } |
1893 | |
1894 | // Collect the set of members for each Comdat in module M and store |
1895 | // in ComdatMembers. |
1896 | static void collectComdatMembers( |
1897 | Module &M, |
1898 | std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { |
1899 | if (!DoComdatRenaming) |
1900 | return; |
1901 | for (Function &F : M) |
1902 | if (Comdat *C = F.getComdat()) |
1903 | ComdatMembers.insert(x: std::make_pair(x&: C, y: &F)); |
1904 | for (GlobalVariable &GV : M.globals()) |
1905 | if (Comdat *C = GV.getComdat()) |
1906 | ComdatMembers.insert(x: std::make_pair(x&: C, y: &GV)); |
1907 | for (GlobalAlias &GA : M.aliases()) |
1908 | if (Comdat *C = GA.getComdat()) |
1909 | ComdatMembers.insert(x: std::make_pair(x&: C, y: &GA)); |
1910 | } |
1911 | |
1912 | // Return true if we should not find instrumentation data for this function |
1913 | static bool skipPGOUse(const Function &F) { |
1914 | if (F.isDeclaration()) |
1915 | return true; |
1916 | // If there are too many critical edges, PGO might cause |
1917 | // compiler time problem. Skip PGO if the number of |
1918 | // critical edges execeed the threshold. |
1919 | unsigned NumCriticalEdges = 0; |
1920 | for (auto &BB : F) { |
1921 | const Instruction *TI = BB.getTerminator(); |
1922 | for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { |
1923 | if (isCriticalEdge(TI, SuccNum: I)) |
1924 | NumCriticalEdges++; |
1925 | } |
1926 | } |
1927 | if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) { |
1928 | LLVM_DEBUG(dbgs() << "In func " << F.getName() |
1929 | << ", NumCriticalEdges=" << NumCriticalEdges |
1930 | << " exceed the threshold. Skip PGO.\n" ); |
1931 | return true; |
1932 | } |
1933 | return false; |
1934 | } |
1935 | |
1936 | // Return true if we should not instrument this function |
1937 | static bool skipPGOGen(const Function &F) { |
1938 | if (skipPGOUse(F)) |
1939 | return true; |
1940 | if (F.hasFnAttribute(Kind: llvm::Attribute::Naked)) |
1941 | return true; |
1942 | if (F.hasFnAttribute(Kind: llvm::Attribute::NoProfile)) |
1943 | return true; |
1944 | if (F.hasFnAttribute(Kind: llvm::Attribute::SkipProfile)) |
1945 | return true; |
1946 | if (F.getInstructionCount() < PGOFunctionSizeThreshold) |
1947 | return true; |
1948 | if (PGOInstrumentColdFunctionOnly) { |
1949 | if (auto EntryCount = F.getEntryCount()) |
1950 | return EntryCount->getCount() > PGOColdInstrumentEntryThreshold; |
1951 | return !PGOTreatUnknownAsCold; |
1952 | } |
1953 | return false; |
1954 | } |
1955 | |
1956 | static bool InstrumentAllFunctions( |
1957 | Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, |
1958 | function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, |
1959 | function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, |
1960 | function_ref<LoopInfo *(Function &)> LookupLI, |
1961 | PGOInstrumentationType InstrumentationType) { |
1962 | // For the context-sensitve instrumentation, we should have a separated pass |
1963 | // (before LTO/ThinLTO linking) to create these variables. |
1964 | if (InstrumentationType == PGOInstrumentationType::FDO) |
1965 | createIRLevelProfileFlagVar(M, InstrumentationType); |
1966 | |
1967 | Triple TT(M.getTargetTriple()); |
1968 | LLVMContext &Ctx = M.getContext(); |
1969 | if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling) |
1970 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
1971 | M.getName().data(), |
1972 | Twine("VTable value profiling is presently not " |
1973 | "supported for non-ELF object formats" ), |
1974 | DS_Warning)); |
1975 | std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; |
1976 | collectComdatMembers(M, ComdatMembers); |
1977 | |
1978 | for (auto &F : M) { |
1979 | if (skipPGOGen(F)) |
1980 | continue; |
1981 | TargetLibraryInfo &TLI = LookupTLI(F); |
1982 | BranchProbabilityInfo *BPI = LookupBPI(F); |
1983 | BlockFrequencyInfo *BFI = LookupBFI(F); |
1984 | LoopInfo *LI = LookupLI(F); |
1985 | FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI, LI, |
1986 | InstrumentationType); |
1987 | FI.instrument(); |
1988 | } |
1989 | return true; |
1990 | } |
1991 | |
1992 | PreservedAnalyses |
1993 | PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &MAM) { |
1994 | createProfileFileNameVar(M, InstrProfileOutput: CSInstrName); |
1995 | // The variable in a comdat may be discarded by LTO. Ensure the declaration |
1996 | // will be retained. |
1997 | appendToCompilerUsed( |
1998 | M, Values: createIRLevelProfileFlagVar(M, InstrumentationType: PGOInstrumentationType::CSFDO)); |
1999 | if (ProfileSampling) |
2000 | createProfileSamplingVar(M); |
2001 | PreservedAnalyses PA; |
2002 | PA.preserve<FunctionAnalysisManagerModuleProxy>(); |
2003 | PA.preserveSet<AllAnalysesOn<Function>>(); |
2004 | return PA; |
2005 | } |
2006 | |
2007 | PreservedAnalyses PGOInstrumentationGen::run(Module &M, |
2008 | ModuleAnalysisManager &MAM) { |
2009 | auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager(); |
2010 | auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { |
2011 | return FAM.getResult<TargetLibraryAnalysis>(IR&: F); |
2012 | }; |
2013 | auto LookupBPI = [&FAM](Function &F) { |
2014 | return &FAM.getResult<BranchProbabilityAnalysis>(IR&: F); |
2015 | }; |
2016 | auto LookupBFI = [&FAM](Function &F) { |
2017 | return &FAM.getResult<BlockFrequencyAnalysis>(IR&: F); |
2018 | }; |
2019 | auto LookupLI = [&FAM](Function &F) { |
2020 | return &FAM.getResult<LoopAnalysis>(IR&: F); |
2021 | }; |
2022 | |
2023 | if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, LookupLI, |
2024 | InstrumentationType)) |
2025 | return PreservedAnalyses::all(); |
2026 | |
2027 | return PreservedAnalyses::none(); |
2028 | } |
2029 | |
2030 | // Using the ratio b/w sums of profile count values and BFI count values to |
2031 | // adjust the func entry count. |
2032 | static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, |
2033 | BranchProbabilityInfo &NBPI) { |
2034 | Function &F = Func.getFunc(); |
2035 | BlockFrequencyInfo NBFI(F, NBPI, LI); |
2036 | #ifndef NDEBUG |
2037 | auto BFIEntryCount = F.getEntryCount(); |
2038 | assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) && |
2039 | "Invalid BFI Entrycount" ); |
2040 | #endif |
2041 | auto SumCount = APFloat::getZero(Sem: APFloat::IEEEdouble()); |
2042 | auto SumBFICount = APFloat::getZero(Sem: APFloat::IEEEdouble()); |
2043 | for (auto &BBI : F) { |
2044 | uint64_t CountValue = 0; |
2045 | uint64_t BFICountValue = 0; |
2046 | if (!Func.findBBInfo(BB: &BBI)) |
2047 | continue; |
2048 | auto BFICount = NBFI.getBlockProfileCount(BB: &BBI); |
2049 | CountValue = *Func.getBBInfo(BB: &BBI).Count; |
2050 | BFICountValue = *BFICount; |
2051 | SumCount.add(RHS: APFloat(CountValue * 1.0), RM: APFloat::rmNearestTiesToEven); |
2052 | SumBFICount.add(RHS: APFloat(BFICountValue * 1.0), RM: APFloat::rmNearestTiesToEven); |
2053 | } |
2054 | if (SumCount.isZero()) |
2055 | return; |
2056 | |
2057 | assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan && |
2058 | "Incorrect sum of BFI counts" ); |
2059 | if (SumBFICount.compare(RHS: SumCount) == APFloat::cmpEqual) |
2060 | return; |
2061 | double Scale = (SumCount / SumBFICount).convertToDouble(); |
2062 | if (Scale < 1.001 && Scale > 0.999) |
2063 | return; |
2064 | |
2065 | uint64_t FuncEntryCount = *Func.getBBInfo(BB: &*F.begin()).Count; |
2066 | uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale; |
2067 | if (NewEntryCount == 0) |
2068 | NewEntryCount = 1; |
2069 | if (NewEntryCount != FuncEntryCount) { |
2070 | F.setEntryCount(Count: ProfileCount(NewEntryCount, Function::PCT_Real)); |
2071 | LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() |
2072 | << ", entry_count " << FuncEntryCount << " --> " |
2073 | << NewEntryCount << "\n" ); |
2074 | } |
2075 | } |
2076 | |
2077 | // Compare the profile count values with BFI count values, and print out |
2078 | // the non-matching ones. |
2079 | static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, |
2080 | BranchProbabilityInfo &NBPI, |
2081 | uint64_t HotCountThreshold, |
2082 | uint64_t ColdCountThreshold) { |
2083 | Function &F = Func.getFunc(); |
2084 | BlockFrequencyInfo NBFI(F, NBPI, LI); |
2085 | // bool PrintFunc = false; |
2086 | bool HotBBOnly = PGOVerifyHotBFI; |
2087 | StringRef Msg; |
2088 | OptimizationRemarkEmitter ORE(&F); |
2089 | |
2090 | unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0; |
2091 | for (auto &BBI : F) { |
2092 | PGOUseBBInfo *BBInfo = Func.findBBInfo(BB: &BBI); |
2093 | if (!BBInfo) |
2094 | continue; |
2095 | |
2096 | uint64_t CountValue = BBInfo->Count.value_or(u&: CountValue); |
2097 | uint64_t BFICountValue = 0; |
2098 | |
2099 | BBNum++; |
2100 | if (CountValue) |
2101 | NonZeroBBNum++; |
2102 | auto BFICount = NBFI.getBlockProfileCount(BB: &BBI); |
2103 | if (BFICount) |
2104 | BFICountValue = *BFICount; |
2105 | |
2106 | if (HotBBOnly) { |
2107 | bool rawIsHot = CountValue >= HotCountThreshold; |
2108 | bool BFIIsHot = BFICountValue >= HotCountThreshold; |
2109 | bool rawIsCold = CountValue <= ColdCountThreshold; |
2110 | bool ShowCount = false; |
2111 | if (rawIsHot && !BFIIsHot) { |
2112 | Msg = "raw-Hot to BFI-nonHot" ; |
2113 | ShowCount = true; |
2114 | } else if (rawIsCold && BFIIsHot) { |
2115 | Msg = "raw-Cold to BFI-Hot" ; |
2116 | ShowCount = true; |
2117 | } |
2118 | if (!ShowCount) |
2119 | continue; |
2120 | } else { |
2121 | if ((CountValue < PGOVerifyBFICutoff) && |
2122 | (BFICountValue < PGOVerifyBFICutoff)) |
2123 | continue; |
2124 | uint64_t Diff = (BFICountValue >= CountValue) |
2125 | ? BFICountValue - CountValue |
2126 | : CountValue - BFICountValue; |
2127 | if (Diff <= CountValue / 100 * PGOVerifyBFIRatio) |
2128 | continue; |
2129 | } |
2130 | BBMisMatchNum++; |
2131 | |
2132 | ORE.emit(RemarkBuilder: [&]() { |
2133 | OptimizationRemarkAnalysis (DEBUG_TYPE, "bfi-verify" , |
2134 | F.getSubprogram(), &BBI); |
2135 | Remark << "BB " << ore::NV("Block" , BBI.getName()) |
2136 | << " Count=" << ore::NV("Count" , CountValue) |
2137 | << " BFI_Count=" << ore::NV("Count" , BFICountValue); |
2138 | if (!Msg.empty()) |
2139 | Remark << " (" << Msg << ")" ; |
2140 | return Remark; |
2141 | }); |
2142 | } |
2143 | if (BBMisMatchNum) |
2144 | ORE.emit(RemarkBuilder: [&]() { |
2145 | return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify" , |
2146 | F.getSubprogram(), &F.getEntryBlock()) |
2147 | << "In Func " << ore::NV("Function" , F.getName()) |
2148 | << ": Num_of_BB=" << ore::NV("Count" , BBNum) |
2149 | << ", Num_of_non_zerovalue_BB=" << ore::NV("Count" , NonZeroBBNum) |
2150 | << ", Num_of_mis_matching_BB=" << ore::NV("Count" , BBMisMatchNum); |
2151 | }); |
2152 | } |
2153 | |
2154 | static bool annotateAllFunctions( |
2155 | Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, |
2156 | vfs::FileSystem &FS, |
2157 | function_ref<TargetLibraryInfo &(Function &)> LookupTLI, |
2158 | function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, |
2159 | function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, |
2160 | function_ref<LoopInfo *(Function &)> LookupLI, ProfileSummaryInfo *PSI, |
2161 | bool IsCS) { |
2162 | LLVM_DEBUG(dbgs() << "Read in profile counters: " ); |
2163 | auto &Ctx = M.getContext(); |
2164 | // Read the counter array from file. |
2165 | auto ReaderOrErr = IndexedInstrProfReader::create(Path: ProfileFileName, FS, |
2166 | RemappingPath: ProfileRemappingFileName); |
2167 | if (Error E = ReaderOrErr.takeError()) { |
2168 | handleAllErrors(E: std::move(E), Handlers: [&](const ErrorInfoBase &EI) { |
2169 | Ctx.diagnose( |
2170 | DI: DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); |
2171 | }); |
2172 | return false; |
2173 | } |
2174 | |
2175 | std::unique_ptr<IndexedInstrProfReader> PGOReader = |
2176 | std::move(ReaderOrErr.get()); |
2177 | if (!PGOReader) { |
2178 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile(ProfileFileName.data(), |
2179 | StringRef("Cannot get PGOReader" ))); |
2180 | return false; |
2181 | } |
2182 | if (!PGOReader->hasCSIRLevelProfile() && IsCS) |
2183 | return false; |
2184 | |
2185 | // TODO: might need to change the warning once the clang option is finalized. |
2186 | if (!PGOReader->isIRLevelProfile()) { |
2187 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
2188 | ProfileFileName.data(), "Not an IR level instrumentation profile" )); |
2189 | return false; |
2190 | } |
2191 | if (PGOReader->functionEntryOnly()) { |
2192 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
2193 | ProfileFileName.data(), |
2194 | "Function entry profiles are not yet supported for optimization" )); |
2195 | return false; |
2196 | } |
2197 | |
2198 | if (EnableVTableProfileUse) { |
2199 | for (GlobalVariable &G : M.globals()) { |
2200 | if (!G.hasName() || !G.hasMetadata(KindID: LLVMContext::MD_type)) |
2201 | continue; |
2202 | |
2203 | // Create the PGOFuncName meta data. |
2204 | createPGONameMetadata(GO&: G, PGOName: getPGOName(V: G, InLTO: false /* InLTO*/)); |
2205 | } |
2206 | } |
2207 | |
2208 | // Add the profile summary (read from the header of the indexed summary) here |
2209 | // so that we can use it below when reading counters (which checks if the |
2210 | // function should be marked with a cold or inlinehint attribute). |
2211 | M.setProfileSummary(M: PGOReader->getSummary(UseCS: IsCS).getMD(Context&: M.getContext()), |
2212 | Kind: IsCS ? ProfileSummary::PSK_CSInstr |
2213 | : ProfileSummary::PSK_Instr); |
2214 | PSI->refresh(); |
2215 | |
2216 | std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; |
2217 | collectComdatMembers(M, ComdatMembers); |
2218 | std::vector<Function *> HotFunctions; |
2219 | std::vector<Function *> ColdFunctions; |
2220 | |
2221 | // If the profile marked as always instrument the entry BB, do the |
2222 | // same. Note this can be overwritten by the internal option in CFGMST.h |
2223 | bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); |
2224 | if (PGOInstrumentEntry.getNumOccurrences() > 0) |
2225 | InstrumentFuncEntry = PGOInstrumentEntry; |
2226 | bool InstrumentLoopEntries = PGOReader->instrLoopEntriesEnabled(); |
2227 | if (PGOInstrumentLoopEntries.getNumOccurrences() > 0) |
2228 | InstrumentLoopEntries = PGOInstrumentLoopEntries; |
2229 | |
2230 | bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage(); |
2231 | for (auto &F : M) { |
2232 | if (skipPGOUse(F)) |
2233 | continue; |
2234 | TargetLibraryInfo &TLI = LookupTLI(F); |
2235 | BranchProbabilityInfo *BPI = LookupBPI(F); |
2236 | BlockFrequencyInfo *BFI = LookupBFI(F); |
2237 | LoopInfo *LI = LookupLI(F); |
2238 | if (!HasSingleByteCoverage) { |
2239 | // Split indirectbr critical edges here before computing the MST rather |
2240 | // than later in getInstrBB() to avoid invalidating it. |
2241 | SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, |
2242 | BFI); |
2243 | } |
2244 | PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, LI, PSI, IsCS, |
2245 | InstrumentFuncEntry, InstrumentLoopEntries, |
2246 | HasSingleByteCoverage); |
2247 | if (!Func.getRecord(PGOReader: PGOReader.get())) |
2248 | continue; |
2249 | if (HasSingleByteCoverage) { |
2250 | Func.populateCoverage(); |
2251 | continue; |
2252 | } |
2253 | // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo, |
2254 | // it means the profile for the function is unrepresentative and this |
2255 | // function is actually hot / warm. We will reset the function hot / cold |
2256 | // attribute and drop all the profile counters. |
2257 | InstrProfRecord::CountPseudoKind PseudoKind = InstrProfRecord::NotPseudo; |
2258 | bool AllZeros = false; |
2259 | if (!Func.readCounters(AllZeros, PseudoKind)) |
2260 | continue; |
2261 | if (AllZeros) { |
2262 | F.setEntryCount(Count: ProfileCount(0, Function::PCT_Real)); |
2263 | if (Func.getProgramMaxCount() != 0) |
2264 | ColdFunctions.push_back(x: &F); |
2265 | continue; |
2266 | } |
2267 | if (PseudoKind != InstrProfRecord::NotPseudo) { |
2268 | // Clear function attribute cold. |
2269 | if (F.hasFnAttribute(Kind: Attribute::Cold)) |
2270 | F.removeFnAttr(Kind: Attribute::Cold); |
2271 | // Set function attribute as hot. |
2272 | if (PseudoKind == InstrProfRecord::PseudoHot) |
2273 | F.addFnAttr(Kind: Attribute::Hot); |
2274 | continue; |
2275 | } |
2276 | Func.populateCounters(); |
2277 | Func.setBranchWeights(); |
2278 | Func.annotateValueSites(); |
2279 | Func.annotateIrrLoopHeaderWeights(); |
2280 | PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); |
2281 | if (FreqAttr == PGOUseFunc::FFA_Cold) |
2282 | ColdFunctions.push_back(x: &F); |
2283 | else if (FreqAttr == PGOUseFunc::FFA_Hot) |
2284 | HotFunctions.push_back(x: &F); |
2285 | if (PGOViewCounts != PGOVCT_None && |
2286 | (ViewBlockFreqFuncName.empty() || |
2287 | F.getName() == ViewBlockFreqFuncName)) { |
2288 | LoopInfo LI{DominatorTree(F)}; |
2289 | std::unique_ptr<BranchProbabilityInfo> NewBPI = |
2290 | std::make_unique<BranchProbabilityInfo>(args&: F, args&: LI); |
2291 | std::unique_ptr<BlockFrequencyInfo> NewBFI = |
2292 | std::make_unique<BlockFrequencyInfo>(args&: F, args&: *NewBPI, args&: LI); |
2293 | if (PGOViewCounts == PGOVCT_Graph) |
2294 | NewBFI->view(); |
2295 | else if (PGOViewCounts == PGOVCT_Text) { |
2296 | dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n" ; |
2297 | NewBFI->print(OS&: dbgs()); |
2298 | } |
2299 | } |
2300 | if (PGOViewRawCounts != PGOVCT_None && |
2301 | (ViewBlockFreqFuncName.empty() || |
2302 | F.getName() == ViewBlockFreqFuncName)) { |
2303 | if (PGOViewRawCounts == PGOVCT_Graph) |
2304 | if (ViewBlockFreqFuncName.empty()) |
2305 | WriteGraph(G: &Func, Name: Twine("PGORawCounts_" ) + Func.getFunc().getName()); |
2306 | else |
2307 | ViewGraph(G: &Func, Name: Twine("PGORawCounts_" ) + Func.getFunc().getName()); |
2308 | else if (PGOViewRawCounts == PGOVCT_Text) { |
2309 | dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n" ; |
2310 | Func.dumpInfo(); |
2311 | } |
2312 | } |
2313 | |
2314 | if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) { |
2315 | LoopInfo LI{DominatorTree(F)}; |
2316 | BranchProbabilityInfo NBPI(F, LI); |
2317 | |
2318 | // Fix func entry count. |
2319 | if (PGOFixEntryCount) |
2320 | fixFuncEntryCount(Func, LI, NBPI); |
2321 | |
2322 | // Verify BlockFrequency information. |
2323 | uint64_t HotCountThreshold = 0, ColdCountThreshold = 0; |
2324 | if (PGOVerifyHotBFI) { |
2325 | HotCountThreshold = PSI->getOrCompHotCountThreshold(); |
2326 | ColdCountThreshold = PSI->getOrCompColdCountThreshold(); |
2327 | } |
2328 | verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold); |
2329 | } |
2330 | } |
2331 | |
2332 | // Set function hotness attribute from the profile. |
2333 | // We have to apply these attributes at the end because their presence |
2334 | // can affect the BranchProbabilityInfo of any callers, resulting in an |
2335 | // inconsistent MST between prof-gen and prof-use. |
2336 | for (auto &F : HotFunctions) { |
2337 | F->addFnAttr(Kind: Attribute::InlineHint); |
2338 | LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() |
2339 | << "\n" ); |
2340 | } |
2341 | for (auto &F : ColdFunctions) { |
2342 | // Only set when there is no Attribute::Hot set by the user. For Hot |
2343 | // attribute, user's annotation has the precedence over the profile. |
2344 | if (F->hasFnAttribute(Kind: Attribute::Hot)) { |
2345 | auto &Ctx = M.getContext(); |
2346 | std::string Msg = std::string("Function " ) + F->getName().str() + |
2347 | std::string(" is annotated as a hot function but" |
2348 | " the profile is cold" ); |
2349 | Ctx.diagnose( |
2350 | DI: DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); |
2351 | continue; |
2352 | } |
2353 | F->addFnAttr(Kind: Attribute::Cold); |
2354 | LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() |
2355 | << "\n" ); |
2356 | } |
2357 | return true; |
2358 | } |
2359 | |
2360 | PGOInstrumentationUse::PGOInstrumentationUse( |
2361 | std::string Filename, std::string RemappingFilename, bool IsCS, |
2362 | IntrusiveRefCntPtr<vfs::FileSystem> VFS) |
2363 | : ProfileFileName(std::move(Filename)), |
2364 | ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS), |
2365 | FS(std::move(VFS)) { |
2366 | if (!PGOTestProfileFile.empty()) |
2367 | ProfileFileName = PGOTestProfileFile; |
2368 | if (!PGOTestProfileRemappingFile.empty()) |
2369 | ProfileRemappingFileName = PGOTestProfileRemappingFile; |
2370 | if (!FS) |
2371 | FS = vfs::getRealFileSystem(); |
2372 | } |
2373 | |
2374 | PreservedAnalyses PGOInstrumentationUse::run(Module &M, |
2375 | ModuleAnalysisManager &MAM) { |
2376 | |
2377 | auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager(); |
2378 | auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { |
2379 | return FAM.getResult<TargetLibraryAnalysis>(IR&: F); |
2380 | }; |
2381 | auto LookupBPI = [&FAM](Function &F) { |
2382 | return &FAM.getResult<BranchProbabilityAnalysis>(IR&: F); |
2383 | }; |
2384 | auto LookupBFI = [&FAM](Function &F) { |
2385 | return &FAM.getResult<BlockFrequencyAnalysis>(IR&: F); |
2386 | }; |
2387 | auto LookupLI = [&FAM](Function &F) { |
2388 | return &FAM.getResult<LoopAnalysis>(IR&: F); |
2389 | }; |
2390 | |
2391 | auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(IR&: M); |
2392 | if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, FS&: *FS, |
2393 | LookupTLI, LookupBPI, LookupBFI, LookupLI, PSI, |
2394 | IsCS)) |
2395 | return PreservedAnalyses::all(); |
2396 | |
2397 | return PreservedAnalyses::none(); |
2398 | } |
2399 | |
2400 | static std::string getSimpleNodeName(const BasicBlock *Node) { |
2401 | if (!Node->getName().empty()) |
2402 | return Node->getName().str(); |
2403 | |
2404 | std::string SimpleNodeName; |
2405 | raw_string_ostream OS(SimpleNodeName); |
2406 | Node->printAsOperand(O&: OS, PrintType: false); |
2407 | return SimpleNodeName; |
2408 | } |
2409 | |
2410 | void llvm::setProfMetadata(Module *M, Instruction *TI, |
2411 | ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) { |
2412 | assert(MaxCount > 0 && "Bad max count" ); |
2413 | uint64_t Scale = calculateCountScale(MaxCount); |
2414 | SmallVector<unsigned, 4> Weights; |
2415 | for (const auto &ECI : EdgeCounts) |
2416 | Weights.push_back(Elt: scaleBranchCount(Count: ECI, Scale)); |
2417 | |
2418 | LLVM_DEBUG(dbgs() << "Weight is: " ; for (const auto &W |
2419 | : Weights) { |
2420 | dbgs() << W << " " ; |
2421 | } dbgs() << "\n" ;); |
2422 | |
2423 | misexpect::checkExpectAnnotations(I&: *TI, ExistingWeights: Weights, /*IsFrontend=*/false); |
2424 | |
2425 | setBranchWeights(I&: *TI, Weights, /*IsExpected=*/false); |
2426 | if (EmitBranchProbability) { |
2427 | std::string BrCondStr = getBranchCondString(TI); |
2428 | if (BrCondStr.empty()) |
2429 | return; |
2430 | |
2431 | uint64_t WSum = |
2432 | std::accumulate(first: Weights.begin(), last: Weights.end(), init: (uint64_t)0, |
2433 | binary_op: [](uint64_t w1, uint64_t w2) { return w1 + w2; }); |
2434 | uint64_t TotalCount = |
2435 | std::accumulate(first: EdgeCounts.begin(), last: EdgeCounts.end(), init: (uint64_t)0, |
2436 | binary_op: [](uint64_t c1, uint64_t c2) { return c1 + c2; }); |
2437 | Scale = calculateCountScale(MaxCount: WSum); |
2438 | BranchProbability BP(scaleBranchCount(Count: Weights[0], Scale), |
2439 | scaleBranchCount(Count: WSum, Scale)); |
2440 | std::string BranchProbStr; |
2441 | raw_string_ostream OS(BranchProbStr); |
2442 | OS << BP; |
2443 | OS << " (total count : " << TotalCount << ")" ; |
2444 | Function *F = TI->getParent()->getParent(); |
2445 | OptimizationRemarkEmitter ORE(F); |
2446 | ORE.emit(RemarkBuilder: [&]() { |
2447 | return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation" , TI) |
2448 | << BrCondStr << " is true with probability : " << BranchProbStr; |
2449 | }); |
2450 | } |
2451 | } |
2452 | |
2453 | namespace llvm { |
2454 | |
2455 | void (Module *M, Instruction *TI, uint64_t Count) { |
2456 | MDBuilder MDB(M->getContext()); |
2457 | TI->setMetadata(KindID: llvm::LLVMContext::MD_irr_loop, |
2458 | Node: MDB.createIrrLoopHeaderWeight(Weight: Count)); |
2459 | } |
2460 | |
2461 | template <> struct GraphTraits<PGOUseFunc *> { |
2462 | using NodeRef = const BasicBlock *; |
2463 | using ChildIteratorType = const_succ_iterator; |
2464 | using nodes_iterator = pointer_iterator<Function::const_iterator>; |
2465 | |
2466 | static NodeRef getEntryNode(const PGOUseFunc *G) { |
2467 | return &G->getFunc().front(); |
2468 | } |
2469 | |
2470 | static ChildIteratorType child_begin(const NodeRef N) { |
2471 | return succ_begin(BB: N); |
2472 | } |
2473 | |
2474 | static ChildIteratorType child_end(const NodeRef N) { return succ_end(BB: N); } |
2475 | |
2476 | static nodes_iterator nodes_begin(const PGOUseFunc *G) { |
2477 | return nodes_iterator(G->getFunc().begin()); |
2478 | } |
2479 | |
2480 | static nodes_iterator nodes_end(const PGOUseFunc *G) { |
2481 | return nodes_iterator(G->getFunc().end()); |
2482 | } |
2483 | }; |
2484 | |
2485 | template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { |
2486 | explicit DOTGraphTraits(bool isSimple = false) |
2487 | : DefaultDOTGraphTraits(isSimple) {} |
2488 | |
2489 | static std::string getGraphName(const PGOUseFunc *G) { |
2490 | return std::string(G->getFunc().getName()); |
2491 | } |
2492 | |
2493 | std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { |
2494 | std::string Result; |
2495 | raw_string_ostream OS(Result); |
2496 | |
2497 | OS << getSimpleNodeName(Node) << ":\\l" ; |
2498 | PGOUseBBInfo *BI = Graph->findBBInfo(BB: Node); |
2499 | OS << "Count : " ; |
2500 | if (BI && BI->Count) |
2501 | OS << *BI->Count << "\\l" ; |
2502 | else |
2503 | OS << "Unknown\\l" ; |
2504 | |
2505 | if (!PGOInstrSelect) |
2506 | return Result; |
2507 | |
2508 | for (const Instruction &I : *Node) { |
2509 | if (!isa<SelectInst>(Val: &I)) |
2510 | continue; |
2511 | // Display scaled counts for SELECT instruction: |
2512 | OS << "SELECT : { T = " ; |
2513 | uint64_t TC, FC; |
2514 | bool HasProf = extractBranchWeights(I, TrueVal&: TC, FalseVal&: FC); |
2515 | if (!HasProf) |
2516 | OS << "Unknown, F = Unknown }\\l" ; |
2517 | else |
2518 | OS << TC << ", F = " << FC << " }\\l" ; |
2519 | } |
2520 | return Result; |
2521 | } |
2522 | }; |
2523 | |
2524 | } // end namespace llvm |
2525 | |