1 | //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements PGO instrumentation using a minimum spanning tree based |
10 | // on the following paper: |
11 | // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points |
12 | // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, |
13 | // Issue 3, pp 313-322 |
14 | // The idea of the algorithm based on the fact that for each node (except for |
15 | // the entry and exit), the sum of incoming edge counts equals the sum of |
16 | // outgoing edge counts. The count of edge on spanning tree can be derived from |
17 | // those edges not on the spanning tree. Knuth proves this method instruments |
18 | // the minimum number of edges. |
19 | // |
20 | // The minimal spanning tree here is actually a maximum weight tree -- on-tree |
21 | // edges have higher frequencies (more likely to execute). The idea is to |
22 | // instrument those less frequently executed edges to reduce the runtime |
23 | // overhead of instrumented binaries. |
24 | // |
25 | // This file contains two passes: |
26 | // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge |
27 | // count profile, and generates the instrumentation for indirect call |
28 | // profiling. |
29 | // (2) Pass PGOInstrumentationUse which reads the edge count profile and |
30 | // annotates the branch weights. It also reads the indirect call value |
31 | // profiling records and annotate the indirect call instructions. |
32 | // |
33 | // To get the precise counter information, These two passes need to invoke at |
34 | // the same compilation point (so they see the same IR). For pass |
35 | // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For |
36 | // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and |
37 | // the profile is opened in module level and passed to each PGOUseFunc instance. |
38 | // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put |
39 | // in class FuncPGOInstrumentation. |
40 | // |
41 | // Class PGOEdge represents a CFG edge and some auxiliary information. Class |
42 | // BBInfo contains auxiliary information for each BB. These two classes are used |
43 | // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived |
44 | // class of PGOEdge and BBInfo, respectively. They contains extra data structure |
45 | // used in populating profile counters. |
46 | // The MST implementation is in Class CFGMST (CFGMST.h). |
47 | // |
48 | //===----------------------------------------------------------------------===// |
49 | |
50 | #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" |
51 | #include "ValueProfileCollector.h" |
52 | #include "llvm/ADT/APInt.h" |
53 | #include "llvm/ADT/ArrayRef.h" |
54 | #include "llvm/ADT/STLExtras.h" |
55 | #include "llvm/ADT/SmallVector.h" |
56 | #include "llvm/ADT/Statistic.h" |
57 | #include "llvm/ADT/StringRef.h" |
58 | #include "llvm/ADT/Twine.h" |
59 | #include "llvm/ADT/iterator.h" |
60 | #include "llvm/ADT/iterator_range.h" |
61 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
62 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
63 | #include "llvm/Analysis/CFG.h" |
64 | #include "llvm/Analysis/LoopInfo.h" |
65 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
66 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
67 | #include "llvm/Analysis/TargetLibraryInfo.h" |
68 | #include "llvm/IR/Attributes.h" |
69 | #include "llvm/IR/BasicBlock.h" |
70 | #include "llvm/IR/CFG.h" |
71 | #include "llvm/IR/Comdat.h" |
72 | #include "llvm/IR/Constant.h" |
73 | #include "llvm/IR/Constants.h" |
74 | #include "llvm/IR/DiagnosticInfo.h" |
75 | #include "llvm/IR/Dominators.h" |
76 | #include "llvm/IR/EHPersonalities.h" |
77 | #include "llvm/IR/Function.h" |
78 | #include "llvm/IR/GlobalAlias.h" |
79 | #include "llvm/IR/GlobalValue.h" |
80 | #include "llvm/IR/GlobalVariable.h" |
81 | #include "llvm/IR/IRBuilder.h" |
82 | #include "llvm/IR/InstVisitor.h" |
83 | #include "llvm/IR/InstrTypes.h" |
84 | #include "llvm/IR/Instruction.h" |
85 | #include "llvm/IR/Instructions.h" |
86 | #include "llvm/IR/IntrinsicInst.h" |
87 | #include "llvm/IR/Intrinsics.h" |
88 | #include "llvm/IR/LLVMContext.h" |
89 | #include "llvm/IR/MDBuilder.h" |
90 | #include "llvm/IR/Module.h" |
91 | #include "llvm/IR/PassManager.h" |
92 | #include "llvm/IR/ProfDataUtils.h" |
93 | #include "llvm/IR/ProfileSummary.h" |
94 | #include "llvm/IR/Type.h" |
95 | #include "llvm/IR/Value.h" |
96 | #include "llvm/ProfileData/InstrProf.h" |
97 | #include "llvm/ProfileData/InstrProfReader.h" |
98 | #include "llvm/Support/BranchProbability.h" |
99 | #include "llvm/Support/CRC.h" |
100 | #include "llvm/Support/Casting.h" |
101 | #include "llvm/Support/CommandLine.h" |
102 | #include "llvm/Support/DOTGraphTraits.h" |
103 | #include "llvm/Support/Debug.h" |
104 | #include "llvm/Support/Error.h" |
105 | #include "llvm/Support/ErrorHandling.h" |
106 | #include "llvm/Support/GraphWriter.h" |
107 | #include "llvm/Support/VirtualFileSystem.h" |
108 | #include "llvm/Support/raw_ostream.h" |
109 | #include "llvm/TargetParser/Triple.h" |
110 | #include "llvm/Transforms/Instrumentation.h" |
111 | #include "llvm/Transforms/Instrumentation/BlockCoverageInference.h" |
112 | #include "llvm/Transforms/Instrumentation/CFGMST.h" |
113 | #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" |
114 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
115 | #include "llvm/Transforms/Utils/MisExpect.h" |
116 | #include "llvm/Transforms/Utils/ModuleUtils.h" |
117 | #include <algorithm> |
118 | #include <cassert> |
119 | #include <cstdint> |
120 | #include <memory> |
121 | #include <numeric> |
122 | #include <optional> |
123 | #include <stack> |
124 | #include <string> |
125 | #include <unordered_map> |
126 | #include <utility> |
127 | #include <vector> |
128 | |
129 | using namespace llvm; |
130 | using ProfileCount = Function::ProfileCount; |
131 | using VPCandidateInfo = ValueProfileCollector::CandidateInfo; |
132 | |
133 | #define DEBUG_TYPE "pgo-instrumentation" |
134 | |
135 | STATISTIC(NumOfPGOInstrument, "Number of edges instrumented." ); |
136 | STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented." ); |
137 | STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented." ); |
138 | STATISTIC(NumOfPGOEdge, "Number of edges." ); |
139 | STATISTIC(NumOfPGOBB, "Number of basic-blocks." ); |
140 | STATISTIC(NumOfPGOSplit, "Number of critical edge splits." ); |
141 | STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts." ); |
142 | STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile." ); |
143 | STATISTIC(NumOfPGOMissing, "Number of functions without profile." ); |
144 | STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations." ); |
145 | STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO." ); |
146 | STATISTIC(NumOfCSPGOSelectInsts, |
147 | "Number of select instruction instrumented in CSPGO." ); |
148 | STATISTIC(NumOfCSPGOMemIntrinsics, |
149 | "Number of mem intrinsics instrumented in CSPGO." ); |
150 | STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO." ); |
151 | STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO." ); |
152 | STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO." ); |
153 | STATISTIC(NumOfCSPGOFunc, |
154 | "Number of functions having valid profile counts in CSPGO." ); |
155 | STATISTIC(NumOfCSPGOMismatch, |
156 | "Number of functions having mismatch profile in CSPGO." ); |
157 | STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO." ); |
158 | STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed" ); |
159 | |
160 | // Command line option to specify the file to read profile from. This is |
161 | // mainly used for testing. |
162 | static cl::opt<std::string> |
163 | PGOTestProfileFile("pgo-test-profile-file" , cl::init(Val: "" ), cl::Hidden, |
164 | cl::value_desc("filename" ), |
165 | cl::desc("Specify the path of profile data file. This is" |
166 | "mainly for test purpose." )); |
167 | static cl::opt<std::string> PGOTestProfileRemappingFile( |
168 | "pgo-test-profile-remapping-file" , cl::init(Val: "" ), cl::Hidden, |
169 | cl::value_desc("filename" ), |
170 | cl::desc("Specify the path of profile remapping file. This is mainly for " |
171 | "test purpose." )); |
172 | |
173 | // Command line option to disable value profiling. The default is false: |
174 | // i.e. value profiling is enabled by default. This is for debug purpose. |
175 | static cl::opt<bool> DisableValueProfiling("disable-vp" , cl::init(Val: false), |
176 | cl::Hidden, |
177 | cl::desc("Disable Value Profiling" )); |
178 | |
179 | // Command line option to set the maximum number of VP annotations to write to |
180 | // the metadata for a single indirect call callsite. |
181 | static cl::opt<unsigned> MaxNumAnnotations( |
182 | "icp-max-annotations" , cl::init(Val: 3), cl::Hidden, |
183 | cl::desc("Max number of annotations for a single indirect " |
184 | "call callsite" )); |
185 | |
186 | // Command line option to set the maximum number of value annotations |
187 | // to write to the metadata for a single memop intrinsic. |
188 | static cl::opt<unsigned> MaxNumMemOPAnnotations( |
189 | "memop-max-annotations" , cl::init(Val: 4), cl::Hidden, |
190 | cl::desc("Max number of preicise value annotations for a single memop" |
191 | "intrinsic" )); |
192 | |
193 | // Command line option to control appending FunctionHash to the name of a COMDAT |
194 | // function. This is to avoid the hash mismatch caused by the preinliner. |
195 | static cl::opt<bool> DoComdatRenaming( |
196 | "do-comdat-renaming" , cl::init(Val: false), cl::Hidden, |
197 | cl::desc("Append function hash to the name of COMDAT function to avoid " |
198 | "function hash mismatch due to the preinliner" )); |
199 | |
200 | namespace llvm { |
201 | // Command line option to enable/disable the warning about missing profile |
202 | // information. |
203 | cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function" , cl::init(Val: false), |
204 | cl::Hidden, |
205 | cl::desc("Use this option to turn on/off " |
206 | "warnings about missing profile data for " |
207 | "functions." )); |
208 | |
209 | // Command line option to enable/disable the warning about a hash mismatch in |
210 | // the profile data. |
211 | cl::opt<bool> |
212 | NoPGOWarnMismatch("no-pgo-warn-mismatch" , cl::init(Val: false), cl::Hidden, |
213 | cl::desc("Use this option to turn off/on " |
214 | "warnings about profile cfg mismatch." )); |
215 | |
216 | // Command line option to enable/disable the warning about a hash mismatch in |
217 | // the profile data for Comdat functions, which often turns out to be false |
218 | // positive due to the pre-instrumentation inline. |
219 | cl::opt<bool> NoPGOWarnMismatchComdatWeak( |
220 | "no-pgo-warn-mismatch-comdat-weak" , cl::init(Val: true), cl::Hidden, |
221 | cl::desc("The option is used to turn on/off " |
222 | "warnings about hash mismatch for comdat " |
223 | "or weak functions." )); |
224 | } // namespace llvm |
225 | |
226 | // Command line option to enable/disable select instruction instrumentation. |
227 | static cl::opt<bool> |
228 | PGOInstrSelect("pgo-instr-select" , cl::init(Val: true), cl::Hidden, |
229 | cl::desc("Use this option to turn on/off SELECT " |
230 | "instruction instrumentation. " )); |
231 | |
232 | // Command line option to turn on CFG dot or text dump of raw profile counts |
233 | static cl::opt<PGOViewCountsType> PGOViewRawCounts( |
234 | "pgo-view-raw-counts" , cl::Hidden, |
235 | cl::desc("A boolean option to show CFG dag or text " |
236 | "with raw profile counts from " |
237 | "profile data. See also option " |
238 | "-pgo-view-counts. To limit graph " |
239 | "display to only one function, use " |
240 | "filtering option -view-bfi-func-name." ), |
241 | cl::values(clEnumValN(PGOVCT_None, "none" , "do not show." ), |
242 | clEnumValN(PGOVCT_Graph, "graph" , "show a graph." ), |
243 | clEnumValN(PGOVCT_Text, "text" , "show in text." ))); |
244 | |
245 | // Command line option to enable/disable memop intrinsic call.size profiling. |
246 | static cl::opt<bool> |
247 | PGOInstrMemOP("pgo-instr-memop" , cl::init(Val: true), cl::Hidden, |
248 | cl::desc("Use this option to turn on/off " |
249 | "memory intrinsic size profiling." )); |
250 | |
251 | // Emit branch probability as optimization remarks. |
252 | static cl::opt<bool> |
253 | EmitBranchProbability("pgo-emit-branch-prob" , cl::init(Val: false), cl::Hidden, |
254 | cl::desc("When this option is on, the annotated " |
255 | "branch probability will be emitted as " |
256 | "optimization remarks: -{Rpass|" |
257 | "pass-remarks}=pgo-instrumentation" )); |
258 | |
259 | static cl::opt<bool> PGOInstrumentEntry( |
260 | "pgo-instrument-entry" , cl::init(Val: false), cl::Hidden, |
261 | cl::desc("Force to instrument function entry basicblock." )); |
262 | |
263 | static cl::opt<bool> PGOFunctionEntryCoverage( |
264 | "pgo-function-entry-coverage" , cl::Hidden, |
265 | cl::desc( |
266 | "Use this option to enable function entry coverage instrumentation." )); |
267 | |
268 | static cl::opt<bool> PGOBlockCoverage( |
269 | "pgo-block-coverage" , |
270 | cl::desc("Use this option to enable basic block coverage instrumentation" )); |
271 | |
272 | static cl::opt<bool> |
273 | PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph" , |
274 | cl::desc("Create a dot file of CFGs with block " |
275 | "coverage inference information" )); |
276 | |
277 | static cl::opt<bool> PGOTemporalInstrumentation( |
278 | "pgo-temporal-instrumentation" , |
279 | cl::desc("Use this option to enable temporal instrumentation" )); |
280 | |
281 | static cl::opt<bool> |
282 | PGOFixEntryCount("pgo-fix-entry-count" , cl::init(Val: true), cl::Hidden, |
283 | cl::desc("Fix function entry count in profile use." )); |
284 | |
285 | static cl::opt<bool> PGOVerifyHotBFI( |
286 | "pgo-verify-hot-bfi" , cl::init(Val: false), cl::Hidden, |
287 | cl::desc("Print out the non-match BFI count if a hot raw profile count " |
288 | "becomes non-hot, or a cold raw profile count becomes hot. " |
289 | "The print is enabled under -Rpass-analysis=pgo, or " |
290 | "internal option -pass-remakrs-analysis=pgo." )); |
291 | |
292 | static cl::opt<bool> PGOVerifyBFI( |
293 | "pgo-verify-bfi" , cl::init(Val: false), cl::Hidden, |
294 | cl::desc("Print out mismatched BFI counts after setting profile metadata " |
295 | "The print is enabled under -Rpass-analysis=pgo, or " |
296 | "internal option -pass-remakrs-analysis=pgo." )); |
297 | |
298 | static cl::opt<unsigned> PGOVerifyBFIRatio( |
299 | "pgo-verify-bfi-ratio" , cl::init(Val: 2), cl::Hidden, |
300 | cl::desc("Set the threshold for pgo-verify-bfi: only print out " |
301 | "mismatched BFI if the difference percentage is greater than " |
302 | "this value (in percentage)." )); |
303 | |
304 | static cl::opt<unsigned> PGOVerifyBFICutoff( |
305 | "pgo-verify-bfi-cutoff" , cl::init(Val: 5), cl::Hidden, |
306 | cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " |
307 | "profile count value is below." )); |
308 | |
309 | static cl::opt<std::string> PGOTraceFuncHash( |
310 | "pgo-trace-func-hash" , cl::init(Val: "-" ), cl::Hidden, |
311 | cl::value_desc("function name" ), |
312 | cl::desc("Trace the hash of the function with this name." )); |
313 | |
314 | static cl::opt<unsigned> PGOFunctionSizeThreshold( |
315 | "pgo-function-size-threshold" , cl::Hidden, |
316 | cl::desc("Do not instrument functions smaller than this threshold." )); |
317 | |
318 | static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold( |
319 | "pgo-critical-edge-threshold" , cl::init(Val: 20000), cl::Hidden, |
320 | cl::desc("Do not instrument functions with the number of critical edges " |
321 | " greater than this threshold." )); |
322 | |
323 | extern cl::opt<unsigned> MaxNumVTableAnnotations; |
324 | |
325 | namespace llvm { |
326 | // Command line option to turn on CFG dot dump after profile annotation. |
327 | // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts |
328 | extern cl::opt<PGOViewCountsType> PGOViewCounts; |
329 | |
330 | // Command line option to specify the name of the function for CFG dump |
331 | // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= |
332 | extern cl::opt<std::string> ViewBlockFreqFuncName; |
333 | |
334 | // Command line option to enable vtable value profiling. Defined in |
335 | // ProfileData/InstrProf.cpp: -enable-vtable-value-profiling= |
336 | extern cl::opt<bool> EnableVTableValueProfiling; |
337 | extern cl::opt<bool> EnableVTableProfileUse; |
338 | extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate; |
339 | } // namespace llvm |
340 | |
341 | bool shouldInstrumentEntryBB() { |
342 | return PGOInstrumentEntry || |
343 | PGOCtxProfLoweringPass::isContextualIRPGOEnabled(); |
344 | } |
345 | |
346 | // FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx |
347 | // profiling implicitly captures indirect call cases, but not other values. |
348 | // Supporting other values is relatively straight-forward - just another counter |
349 | // range within the context. |
350 | bool isValueProfilingDisabled() { |
351 | return DisableValueProfiling || |
352 | PGOCtxProfLoweringPass::isContextualIRPGOEnabled(); |
353 | } |
354 | |
355 | // Return a string describing the branch condition that can be |
356 | // used in static branch probability heuristics: |
357 | static std::string getBranchCondString(Instruction *TI) { |
358 | BranchInst *BI = dyn_cast<BranchInst>(Val: TI); |
359 | if (!BI || !BI->isConditional()) |
360 | return std::string(); |
361 | |
362 | Value *Cond = BI->getCondition(); |
363 | ICmpInst *CI = dyn_cast<ICmpInst>(Val: Cond); |
364 | if (!CI) |
365 | return std::string(); |
366 | |
367 | std::string result; |
368 | raw_string_ostream OS(result); |
369 | OS << CI->getPredicate() << "_" ; |
370 | CI->getOperand(i_nocapture: 0)->getType()->print(O&: OS, IsForDebug: true); |
371 | |
372 | Value *RHS = CI->getOperand(i_nocapture: 1); |
373 | ConstantInt *CV = dyn_cast<ConstantInt>(Val: RHS); |
374 | if (CV) { |
375 | if (CV->isZero()) |
376 | OS << "_Zero" ; |
377 | else if (CV->isOne()) |
378 | OS << "_One" ; |
379 | else if (CV->isMinusOne()) |
380 | OS << "_MinusOne" ; |
381 | else |
382 | OS << "_Const" ; |
383 | } |
384 | OS.flush(); |
385 | return result; |
386 | } |
387 | |
388 | static const char *ValueProfKindDescr[] = { |
389 | #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, |
390 | #include "llvm/ProfileData/InstrProfData.inc" |
391 | }; |
392 | |
393 | // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime |
394 | // aware this is an ir_level profile so it can set the version flag. |
395 | static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) { |
396 | const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); |
397 | Type *IntTy64 = Type::getInt64Ty(C&: M.getContext()); |
398 | uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); |
399 | if (IsCS) |
400 | ProfileVersion |= VARIANT_MASK_CSIR_PROF; |
401 | if (shouldInstrumentEntryBB()) |
402 | ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; |
403 | if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) |
404 | ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; |
405 | if (PGOFunctionEntryCoverage) |
406 | ProfileVersion |= |
407 | VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY; |
408 | if (PGOBlockCoverage) |
409 | ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE; |
410 | if (PGOTemporalInstrumentation) |
411 | ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF; |
412 | auto IRLevelVersionVariable = new GlobalVariable( |
413 | M, IntTy64, true, GlobalValue::WeakAnyLinkage, |
414 | Constant::getIntegerValue(Ty: IntTy64, V: APInt(64, ProfileVersion)), VarName); |
415 | IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility); |
416 | Triple TT(M.getTargetTriple()); |
417 | if (TT.supportsCOMDAT()) { |
418 | IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); |
419 | IRLevelVersionVariable->setComdat(M.getOrInsertComdat(Name: VarName)); |
420 | } |
421 | return IRLevelVersionVariable; |
422 | } |
423 | |
424 | namespace { |
425 | |
426 | /// The select instruction visitor plays three roles specified |
427 | /// by the mode. In \c VM_counting mode, it simply counts the number of |
428 | /// select instructions. In \c VM_instrument mode, it inserts code to count |
429 | /// the number times TrueValue of select is taken. In \c VM_annotate mode, |
430 | /// it reads the profile data and annotate the select instruction with metadata. |
431 | enum VisitMode { VM_counting, VM_instrument, VM_annotate }; |
432 | class PGOUseFunc; |
433 | |
434 | /// Instruction Visitor class to visit select instructions. |
435 | struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { |
436 | Function &F; |
437 | unsigned NSIs = 0; // Number of select instructions instrumented. |
438 | VisitMode Mode = VM_counting; // Visiting mode. |
439 | unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. |
440 | unsigned TotalNumCtrs = 0; // Total number of counters |
441 | GlobalVariable *FuncNameVar = nullptr; |
442 | uint64_t FuncHash = 0; |
443 | PGOUseFunc *UseFunc = nullptr; |
444 | bool HasSingleByteCoverage; |
445 | |
446 | SelectInstVisitor(Function &Func, bool HasSingleByteCoverage) |
447 | : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {} |
448 | |
449 | void countSelects() { |
450 | NSIs = 0; |
451 | Mode = VM_counting; |
452 | visit(F); |
453 | } |
454 | |
455 | // Visit the IR stream and instrument all select instructions. \p |
456 | // Ind is a pointer to the counter index variable; \p TotalNC |
457 | // is the total number of counters; \p FNV is the pointer to the |
458 | // PGO function name var; \p FHash is the function hash. |
459 | void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalVariable *FNV, |
460 | uint64_t FHash) { |
461 | Mode = VM_instrument; |
462 | CurCtrIdx = Ind; |
463 | TotalNumCtrs = TotalNC; |
464 | FuncHash = FHash; |
465 | FuncNameVar = FNV; |
466 | visit(F); |
467 | } |
468 | |
469 | // Visit the IR stream and annotate all select instructions. |
470 | void annotateSelects(PGOUseFunc *UF, unsigned *Ind) { |
471 | Mode = VM_annotate; |
472 | UseFunc = UF; |
473 | CurCtrIdx = Ind; |
474 | visit(F); |
475 | } |
476 | |
477 | void instrumentOneSelectInst(SelectInst &SI); |
478 | void annotateOneSelectInst(SelectInst &SI); |
479 | |
480 | // Visit \p SI instruction and perform tasks according to visit mode. |
481 | void visitSelectInst(SelectInst &SI); |
482 | |
483 | // Return the number of select instructions. This needs be called after |
484 | // countSelects(). |
485 | unsigned getNumOfSelectInsts() const { return NSIs; } |
486 | }; |
487 | |
488 | /// This class implements the CFG edges for the Minimum Spanning Tree (MST) |
489 | /// based instrumentation. |
490 | /// Note that the CFG can be a multi-graph. So there might be multiple edges |
491 | /// with the same SrcBB and DestBB. |
492 | struct PGOEdge { |
493 | BasicBlock *SrcBB; |
494 | BasicBlock *DestBB; |
495 | uint64_t Weight; |
496 | bool InMST = false; |
497 | bool Removed = false; |
498 | bool IsCritical = false; |
499 | |
500 | PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1) |
501 | : SrcBB(Src), DestBB(Dest), Weight(W) {} |
502 | |
503 | /// Return the information string of an edge. |
504 | std::string infoString() const { |
505 | return (Twine(Removed ? "-" : " " ) + (InMST ? " " : "*" ) + |
506 | (IsCritical ? "c" : " " ) + " W=" + Twine(Weight)) |
507 | .str(); |
508 | } |
509 | }; |
510 | |
511 | /// This class stores the auxiliary information for each BB in the MST. |
512 | struct PGOBBInfo { |
513 | PGOBBInfo *Group; |
514 | uint32_t Index; |
515 | uint32_t Rank = 0; |
516 | |
517 | PGOBBInfo(unsigned IX) : Group(this), Index(IX) {} |
518 | |
519 | /// Return the information string of this object. |
520 | std::string infoString() const { |
521 | return (Twine("Index=" ) + Twine(Index)).str(); |
522 | } |
523 | }; |
524 | |
525 | // This class implements the CFG edges. Note the CFG can be a multi-graph. |
526 | template <class Edge, class BBInfo> class FuncPGOInstrumentation { |
527 | private: |
528 | Function &F; |
529 | |
530 | // Is this is context-sensitive instrumentation. |
531 | bool IsCS; |
532 | |
533 | // A map that stores the Comdat group in function F. |
534 | std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; |
535 | |
536 | ValueProfileCollector VPC; |
537 | |
538 | void computeCFGHash(); |
539 | void renameComdatFunction(); |
540 | |
541 | public: |
542 | const TargetLibraryInfo &TLI; |
543 | std::vector<std::vector<VPCandidateInfo>> ValueSites; |
544 | SelectInstVisitor SIVisitor; |
545 | std::string FuncName; |
546 | std::string DeprecatedFuncName; |
547 | GlobalVariable *FuncNameVar; |
548 | |
549 | // CFG hash value for this function. |
550 | uint64_t FunctionHash = 0; |
551 | |
552 | // The Minimum Spanning Tree of function CFG. |
553 | CFGMST<Edge, BBInfo> MST; |
554 | |
555 | const std::optional<BlockCoverageInference> BCI; |
556 | |
557 | static std::optional<BlockCoverageInference> |
558 | constructBCI(Function &Func, bool HasSingleByteCoverage, |
559 | bool InstrumentFuncEntry) { |
560 | if (HasSingleByteCoverage) |
561 | return BlockCoverageInference(Func, InstrumentFuncEntry); |
562 | return {}; |
563 | } |
564 | |
565 | // Collect all the BBs that will be instrumented, and store them in |
566 | // InstrumentBBs. |
567 | void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); |
568 | |
569 | // Give an edge, find the BB that will be instrumented. |
570 | // Return nullptr if there is no BB to be instrumented. |
571 | BasicBlock *getInstrBB(Edge *E); |
572 | |
573 | // Return the auxiliary BB information. |
574 | BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } |
575 | |
576 | // Return the auxiliary BB information if available. |
577 | BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } |
578 | |
579 | // Dump edges and BB information. |
580 | void dumpInfo(StringRef Str = "" ) const { |
581 | MST.dumpEdges(dbgs(), Twine("Dump Function " ) + FuncName + |
582 | " Hash: " + Twine(FunctionHash) + "\t" + Str); |
583 | } |
584 | |
585 | FuncPGOInstrumentation( |
586 | Function &Func, TargetLibraryInfo &TLI, |
587 | std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, |
588 | bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, |
589 | BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, |
590 | bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false) |
591 | : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), |
592 | TLI(TLI), ValueSites(IPVK_Last + 1), |
593 | SIVisitor(Func, HasSingleByteCoverage), |
594 | MST(F, InstrumentFuncEntry, BPI, BFI), |
595 | BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) { |
596 | if (BCI && PGOViewBlockCoverageGraph) |
597 | BCI->viewBlockCoverageGraph(); |
598 | // This should be done before CFG hash computation. |
599 | SIVisitor.countSelects(); |
600 | ValueSites[IPVK_MemOPSize] = VPC.get(Kind: IPVK_MemOPSize); |
601 | if (!IsCS) { |
602 | NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); |
603 | NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); |
604 | NumOfPGOBB += MST.bbInfoSize(); |
605 | ValueSites[IPVK_IndirectCallTarget] = VPC.get(Kind: IPVK_IndirectCallTarget); |
606 | if (EnableVTableValueProfiling) |
607 | ValueSites[IPVK_VTableTarget] = VPC.get(Kind: IPVK_VTableTarget); |
608 | } else { |
609 | NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); |
610 | NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); |
611 | NumOfCSPGOBB += MST.bbInfoSize(); |
612 | } |
613 | |
614 | FuncName = getIRPGOFuncName(F); |
615 | DeprecatedFuncName = getPGOFuncName(F); |
616 | computeCFGHash(); |
617 | if (!ComdatMembers.empty()) |
618 | renameComdatFunction(); |
619 | LLVM_DEBUG(dumpInfo("after CFGMST" )); |
620 | |
621 | for (const auto &E : MST.allEdges()) { |
622 | if (E->Removed) |
623 | continue; |
624 | IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; |
625 | if (!E->InMST) |
626 | IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; |
627 | } |
628 | |
629 | if (CreateGlobalVar) |
630 | FuncNameVar = createPGOFuncNameVar(F, PGOFuncName: FuncName); |
631 | } |
632 | }; |
633 | |
634 | } // end anonymous namespace |
635 | |
636 | // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index |
637 | // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers |
638 | // of selects, indirect calls, mem ops and edges. |
639 | template <class Edge, class BBInfo> |
640 | void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { |
641 | std::vector<uint8_t> Indexes; |
642 | JamCRC JC; |
643 | for (auto &BB : F) { |
644 | for (BasicBlock *Succ : successors(BB: &BB)) { |
645 | auto BI = findBBInfo(BB: Succ); |
646 | if (BI == nullptr) |
647 | continue; |
648 | uint32_t Index = BI->Index; |
649 | for (int J = 0; J < 4; J++) |
650 | Indexes.push_back(x: (uint8_t)(Index >> (J * 8))); |
651 | } |
652 | } |
653 | JC.update(Data: Indexes); |
654 | |
655 | JamCRC JCH; |
656 | // The higher 32 bits. |
657 | auto updateJCH = [&JCH](uint64_t Num) { |
658 | uint8_t Data[8]; |
659 | support::endian::write64le(P: Data, V: Num); |
660 | JCH.update(Data); |
661 | }; |
662 | updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); |
663 | updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); |
664 | updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); |
665 | if (BCI) { |
666 | updateJCH(BCI->getInstrumentedBlocksHash()); |
667 | } else { |
668 | updateJCH((uint64_t)MST.numEdges()); |
669 | } |
670 | |
671 | // Hash format for context sensitive profile. Reserve 4 bits for other |
672 | // information. |
673 | FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); |
674 | |
675 | // Reserve bit 60-63 for other information purpose. |
676 | FunctionHash &= 0x0FFFFFFFFFFFFFFF; |
677 | if (IsCS) |
678 | NamedInstrProfRecord::setCSFlagInHash(FunctionHash); |
679 | LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" |
680 | << " CRC = " << JC.getCRC() |
681 | << ", Selects = " << SIVisitor.getNumOfSelectInsts() |
682 | << ", Edges = " << MST.numEdges() << ", ICSites = " |
683 | << ValueSites[IPVK_IndirectCallTarget].size() |
684 | << ", Memops = " << ValueSites[IPVK_MemOPSize].size() |
685 | << ", High32 CRC = " << JCH.getCRC() |
686 | << ", Hash = " << FunctionHash << "\n" ;); |
687 | |
688 | if (PGOTraceFuncHash != "-" && F.getName().contains(Other: PGOTraceFuncHash)) |
689 | dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash |
690 | << " in building " << F.getParent()->getSourceFileName() << "\n" ; |
691 | } |
692 | |
693 | // Check if we can safely rename this Comdat function. |
694 | static bool canRenameComdat( |
695 | Function &F, |
696 | std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { |
697 | if (!DoComdatRenaming || !canRenameComdatFunc(F, CheckAddressTaken: true)) |
698 | return false; |
699 | |
700 | // FIXME: Current only handle those Comdat groups that only containing one |
701 | // function. |
702 | // (1) For a Comdat group containing multiple functions, we need to have a |
703 | // unique postfix based on the hashes for each function. There is a |
704 | // non-trivial code refactoring to do this efficiently. |
705 | // (2) Variables can not be renamed, so we can not rename Comdat function in a |
706 | // group including global vars. |
707 | Comdat *C = F.getComdat(); |
708 | for (auto &&CM : make_range(p: ComdatMembers.equal_range(x: C))) { |
709 | assert(!isa<GlobalAlias>(CM.second)); |
710 | Function *FM = dyn_cast<Function>(Val: CM.second); |
711 | if (FM != &F) |
712 | return false; |
713 | } |
714 | return true; |
715 | } |
716 | |
717 | // Append the CFGHash to the Comdat function name. |
718 | template <class Edge, class BBInfo> |
719 | void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { |
720 | if (!canRenameComdat(F, ComdatMembers)) |
721 | return; |
722 | std::string OrigName = F.getName().str(); |
723 | std::string NewFuncName = |
724 | Twine(F.getName() + "." + Twine(FunctionHash)).str(); |
725 | F.setName(Twine(NewFuncName)); |
726 | GlobalAlias::create(Linkage: GlobalValue::WeakAnyLinkage, Name: OrigName, Aliasee: &F); |
727 | FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); |
728 | Comdat *NewComdat; |
729 | Module *M = F.getParent(); |
730 | // For AvailableExternallyLinkage functions, change the linkage to |
731 | // LinkOnceODR and put them into comdat. This is because after renaming, there |
732 | // is no backup external copy available for the function. |
733 | if (!F.hasComdat()) { |
734 | assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); |
735 | NewComdat = M->getOrInsertComdat(Name: StringRef(NewFuncName)); |
736 | F.setLinkage(GlobalValue::LinkOnceODRLinkage); |
737 | F.setComdat(NewComdat); |
738 | return; |
739 | } |
740 | |
741 | // This function belongs to a single function Comdat group. |
742 | Comdat *OrigComdat = F.getComdat(); |
743 | std::string NewComdatName = |
744 | Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); |
745 | NewComdat = M->getOrInsertComdat(Name: StringRef(NewComdatName)); |
746 | NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); |
747 | |
748 | for (auto &&CM : make_range(p: ComdatMembers.equal_range(x: OrigComdat))) { |
749 | // Must be a function. |
750 | cast<Function>(Val: CM.second)->setComdat(NewComdat); |
751 | } |
752 | } |
753 | |
754 | /// Collect all the BBs that will be instruments and add them to |
755 | /// `InstrumentBBs`. |
756 | template <class Edge, class BBInfo> |
757 | void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( |
758 | std::vector<BasicBlock *> &InstrumentBBs) { |
759 | if (BCI) { |
760 | for (auto &BB : F) |
761 | if (BCI->shouldInstrumentBlock(BB)) |
762 | InstrumentBBs.push_back(x: &BB); |
763 | return; |
764 | } |
765 | |
766 | // Use a worklist as we will update the vector during the iteration. |
767 | std::vector<Edge *> EdgeList; |
768 | EdgeList.reserve(MST.numEdges()); |
769 | for (const auto &E : MST.allEdges()) |
770 | EdgeList.push_back(E.get()); |
771 | |
772 | for (auto &E : EdgeList) { |
773 | BasicBlock *InstrBB = getInstrBB(E); |
774 | if (InstrBB) |
775 | InstrumentBBs.push_back(x: InstrBB); |
776 | } |
777 | } |
778 | |
779 | // Given a CFG E to be instrumented, find which BB to place the instrumented |
780 | // code. The function will split the critical edge if necessary. |
781 | template <class Edge, class BBInfo> |
782 | BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { |
783 | if (E->InMST || E->Removed) |
784 | return nullptr; |
785 | |
786 | BasicBlock *SrcBB = E->SrcBB; |
787 | BasicBlock *DestBB = E->DestBB; |
788 | // For a fake edge, instrument the real BB. |
789 | if (SrcBB == nullptr) |
790 | return DestBB; |
791 | if (DestBB == nullptr) |
792 | return SrcBB; |
793 | |
794 | auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { |
795 | // There are basic blocks (such as catchswitch) cannot be instrumented. |
796 | // If the returned first insertion point is the end of BB, skip this BB. |
797 | if (BB->getFirstInsertionPt() == BB->end()) |
798 | return nullptr; |
799 | return BB; |
800 | }; |
801 | |
802 | // Instrument the SrcBB if it has a single successor, |
803 | // otherwise, the DestBB if this is not a critical edge. |
804 | Instruction *TI = SrcBB->getTerminator(); |
805 | if (TI->getNumSuccessors() <= 1) |
806 | return canInstrument(SrcBB); |
807 | if (!E->IsCritical) |
808 | return canInstrument(DestBB); |
809 | |
810 | // Some IndirectBr critical edges cannot be split by the previous |
811 | // SplitIndirectBrCriticalEdges call. Bail out. |
812 | unsigned SuccNum = GetSuccessorNumber(BB: SrcBB, Succ: DestBB); |
813 | BasicBlock *InstrBB = |
814 | isa<IndirectBrInst>(Val: TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); |
815 | if (!InstrBB) { |
816 | LLVM_DEBUG( |
817 | dbgs() << "Fail to split critical edge: not instrument this edge.\n" ); |
818 | return nullptr; |
819 | } |
820 | // For a critical edge, we have to split. Instrument the newly |
821 | // created BB. |
822 | IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; |
823 | LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index |
824 | << " --> " << getBBInfo(DestBB).Index << "\n" ); |
825 | // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. |
826 | MST.addEdge(SrcBB, InstrBB, 0); |
827 | // Second one: Add new edge of InstrBB->DestBB. |
828 | Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); |
829 | NewEdge1.InMST = true; |
830 | E->Removed = true; |
831 | |
832 | return canInstrument(InstrBB); |
833 | } |
834 | |
835 | // When generating value profiling calls on Windows routines that make use of |
836 | // handler funclets for exception processing an operand bundle needs to attached |
837 | // to the called function. This routine will set \p OpBundles to contain the |
838 | // funclet information, if any is needed, that should be placed on the generated |
839 | // value profiling call for the value profile candidate call. |
840 | static void |
841 | populateEHOperandBundle(VPCandidateInfo &Cand, |
842 | DenseMap<BasicBlock *, ColorVector> &BlockColors, |
843 | SmallVectorImpl<OperandBundleDef> &OpBundles) { |
844 | auto *OrigCall = dyn_cast<CallBase>(Val: Cand.AnnotatedInst); |
845 | if (!OrigCall) |
846 | return; |
847 | |
848 | if (!isa<IntrinsicInst>(Val: OrigCall)) { |
849 | // The instrumentation call should belong to the same funclet as a |
850 | // non-intrinsic call, so just copy the operand bundle, if any exists. |
851 | std::optional<OperandBundleUse> ParentFunclet = |
852 | OrigCall->getOperandBundle(ID: LLVMContext::OB_funclet); |
853 | if (ParentFunclet) |
854 | OpBundles.emplace_back(Args: OperandBundleDef(*ParentFunclet)); |
855 | } else { |
856 | // Intrinsics or other instructions do not get funclet information from the |
857 | // front-end. Need to use the BlockColors that was computed by the routine |
858 | // colorEHFunclets to determine whether a funclet is needed. |
859 | if (!BlockColors.empty()) { |
860 | const ColorVector &CV = BlockColors.find(Val: OrigCall->getParent())->second; |
861 | assert(CV.size() == 1 && "non-unique color for block!" ); |
862 | Instruction *EHPad = CV.front()->getFirstNonPHI(); |
863 | if (EHPad->isEHPad()) |
864 | OpBundles.emplace_back(Args: "funclet" , Args&: EHPad); |
865 | } |
866 | } |
867 | } |
868 | |
869 | // Visit all edge and instrument the edges not in MST, and do value profiling. |
870 | // Critical edges will be split. |
871 | static void instrumentOneFunc( |
872 | Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, |
873 | BlockFrequencyInfo *BFI, |
874 | std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, |
875 | bool IsCS) { |
876 | if (!PGOBlockCoverage) { |
877 | // Split indirectbr critical edges here before computing the MST rather than |
878 | // later in getInstrBB() to avoid invalidating it. |
879 | SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); |
880 | } |
881 | |
882 | FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo( |
883 | F, TLI, ComdatMembers, true, BPI, BFI, IsCS, shouldInstrumentEntryBB(), |
884 | PGOBlockCoverage); |
885 | |
886 | auto Name = FuncInfo.FuncNameVar; |
887 | auto CFGHash = ConstantInt::get(Ty: Type::getInt64Ty(C&: M->getContext()), |
888 | V: FuncInfo.FunctionHash); |
889 | if (PGOFunctionEntryCoverage) { |
890 | auto &EntryBB = F.getEntryBlock(); |
891 | IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); |
892 | // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>, |
893 | // i32 <index>) |
894 | Builder.CreateCall( |
895 | Callee: Intrinsic::getDeclaration(M, id: Intrinsic::instrprof_cover), |
896 | Args: {Name, CFGHash, Builder.getInt32(C: 1), Builder.getInt32(C: 0)}); |
897 | return; |
898 | } |
899 | |
900 | std::vector<BasicBlock *> InstrumentBBs; |
901 | FuncInfo.getInstrumentBBs(InstrumentBBs); |
902 | unsigned NumCounters = |
903 | InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); |
904 | |
905 | if (PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) { |
906 | auto *CSIntrinsic = |
907 | Intrinsic::getDeclaration(M, id: Intrinsic::instrprof_callsite); |
908 | // We want to count the instrumentable callsites, then instrument them. This |
909 | // is because the llvm.instrprof.callsite intrinsic has an argument (like |
910 | // the other instrprof intrinsics) capturing the total number of |
911 | // instrumented objects (counters, or callsites, in this case). In this |
912 | // case, we want that value so we can readily pass it to the compiler-rt |
913 | // APIs that may have to allocate memory based on the nr of callsites. |
914 | // The traversal logic is the same for both counting and instrumentation, |
915 | // just needs to be done in succession. |
916 | auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) { |
917 | for (auto &BB : F) |
918 | for (auto &Instr : BB) |
919 | if (auto *CS = dyn_cast<CallBase>(Val: &Instr)) { |
920 | if ((CS->getCalledFunction() && |
921 | CS->getCalledFunction()->isIntrinsic()) || |
922 | dyn_cast<InlineAsm>(Val: CS->getCalledOperand())) |
923 | continue; |
924 | Visitor(CS); |
925 | } |
926 | }; |
927 | // First, count callsites. |
928 | uint32_t TotalNrCallsites = 0; |
929 | Visit([&TotalNrCallsites](auto *) { ++TotalNrCallsites; }); |
930 | |
931 | // Now instrument. |
932 | uint32_t CallsiteIndex = 0; |
933 | Visit([&](auto *CB) { |
934 | IRBuilder<> Builder(CB); |
935 | Builder.CreateCall(CSIntrinsic, |
936 | {Name, CFGHash, Builder.getInt32(C: TotalNrCallsites), |
937 | Builder.getInt32(C: CallsiteIndex++), |
938 | CB->getCalledOperand()}); |
939 | }); |
940 | } |
941 | |
942 | uint32_t I = 0; |
943 | if (PGOTemporalInstrumentation) { |
944 | NumCounters += PGOBlockCoverage ? 8 : 1; |
945 | auto &EntryBB = F.getEntryBlock(); |
946 | IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); |
947 | // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>, |
948 | // i32 <index>) |
949 | Builder.CreateCall( |
950 | Callee: Intrinsic::getDeclaration(M, id: Intrinsic::instrprof_timestamp), |
951 | Args: {Name, CFGHash, Builder.getInt32(C: NumCounters), Builder.getInt32(C: I)}); |
952 | I += PGOBlockCoverage ? 8 : 1; |
953 | } |
954 | |
955 | for (auto *InstrBB : InstrumentBBs) { |
956 | IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); |
957 | assert(Builder.GetInsertPoint() != InstrBB->end() && |
958 | "Cannot get the Instrumentation point" ); |
959 | // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>, |
960 | // i32 <index>) |
961 | Builder.CreateCall( |
962 | Callee: Intrinsic::getDeclaration(M, id: PGOBlockCoverage |
963 | ? Intrinsic::instrprof_cover |
964 | : Intrinsic::instrprof_increment), |
965 | Args: {Name, CFGHash, Builder.getInt32(C: NumCounters), Builder.getInt32(C: I++)}); |
966 | } |
967 | |
968 | // Now instrument select instructions: |
969 | FuncInfo.SIVisitor.instrumentSelects(Ind: &I, TotalNC: NumCounters, FNV: FuncInfo.FuncNameVar, |
970 | FHash: FuncInfo.FunctionHash); |
971 | assert(I == NumCounters); |
972 | |
973 | if (isValueProfilingDisabled()) |
974 | return; |
975 | |
976 | NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); |
977 | |
978 | // Intrinsic function calls do not have funclet operand bundles needed for |
979 | // Windows exception handling attached to them. However, if value profiling is |
980 | // inserted for one of these calls, then a funclet value will need to be set |
981 | // on the instrumentation call based on the funclet coloring. |
982 | DenseMap<BasicBlock *, ColorVector> BlockColors; |
983 | if (F.hasPersonalityFn() && |
984 | isScopedEHPersonality(Pers: classifyEHPersonality(Pers: F.getPersonalityFn()))) |
985 | BlockColors = colorEHFunclets(F); |
986 | |
987 | // For each VP Kind, walk the VP candidates and instrument each one. |
988 | for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { |
989 | unsigned SiteIndex = 0; |
990 | if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) |
991 | continue; |
992 | |
993 | for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { |
994 | LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] |
995 | << " site: CallSite Index = " << SiteIndex << "\n" ); |
996 | |
997 | IRBuilder<> Builder(Cand.InsertPt); |
998 | assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && |
999 | "Cannot get the Instrumentation point" ); |
1000 | |
1001 | Value *ToProfile = nullptr; |
1002 | if (Cand.V->getType()->isIntegerTy()) |
1003 | ToProfile = Builder.CreateZExtOrTrunc(V: Cand.V, DestTy: Builder.getInt64Ty()); |
1004 | else if (Cand.V->getType()->isPointerTy()) |
1005 | ToProfile = Builder.CreatePtrToInt(V: Cand.V, DestTy: Builder.getInt64Ty()); |
1006 | assert(ToProfile && "value profiling Value is of unexpected type" ); |
1007 | |
1008 | SmallVector<OperandBundleDef, 1> OpBundles; |
1009 | populateEHOperandBundle(Cand, BlockColors, OpBundles); |
1010 | Builder.CreateCall( |
1011 | Callee: Intrinsic::getDeclaration(M, id: Intrinsic::instrprof_value_profile), |
1012 | Args: {FuncInfo.FuncNameVar, Builder.getInt64(C: FuncInfo.FunctionHash), |
1013 | ToProfile, Builder.getInt32(C: Kind), Builder.getInt32(C: SiteIndex++)}, |
1014 | OpBundles); |
1015 | } |
1016 | } // IPVK_First <= Kind <= IPVK_Last |
1017 | } |
1018 | |
1019 | namespace { |
1020 | |
1021 | // This class represents a CFG edge in profile use compilation. |
1022 | struct PGOUseEdge : public PGOEdge { |
1023 | using PGOEdge::PGOEdge; |
1024 | |
1025 | std::optional<uint64_t> Count; |
1026 | |
1027 | // Set edge count value |
1028 | void setEdgeCount(uint64_t Value) { Count = Value; } |
1029 | |
1030 | // Return the information string for this object. |
1031 | std::string infoString() const { |
1032 | if (!Count) |
1033 | return PGOEdge::infoString(); |
1034 | return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str(); |
1035 | } |
1036 | }; |
1037 | |
1038 | using DirectEdges = SmallVector<PGOUseEdge *, 2>; |
1039 | |
1040 | // This class stores the auxiliary information for each BB. |
1041 | struct PGOUseBBInfo : public PGOBBInfo { |
1042 | std::optional<uint64_t> Count; |
1043 | int32_t UnknownCountInEdge = 0; |
1044 | int32_t UnknownCountOutEdge = 0; |
1045 | DirectEdges InEdges; |
1046 | DirectEdges OutEdges; |
1047 | |
1048 | PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {} |
1049 | |
1050 | // Set the profile count value for this BB. |
1051 | void setBBInfoCount(uint64_t Value) { Count = Value; } |
1052 | |
1053 | // Return the information string of this object. |
1054 | std::string infoString() const { |
1055 | if (!Count) |
1056 | return PGOBBInfo::infoString(); |
1057 | return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str(); |
1058 | } |
1059 | |
1060 | // Add an OutEdge and update the edge count. |
1061 | void addOutEdge(PGOUseEdge *E) { |
1062 | OutEdges.push_back(Elt: E); |
1063 | UnknownCountOutEdge++; |
1064 | } |
1065 | |
1066 | // Add an InEdge and update the edge count. |
1067 | void addInEdge(PGOUseEdge *E) { |
1068 | InEdges.push_back(Elt: E); |
1069 | UnknownCountInEdge++; |
1070 | } |
1071 | }; |
1072 | |
1073 | } // end anonymous namespace |
1074 | |
1075 | // Sum up the count values for all the edges. |
1076 | static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { |
1077 | uint64_t Total = 0; |
1078 | for (const auto &E : Edges) { |
1079 | if (E->Removed) |
1080 | continue; |
1081 | if (E->Count) |
1082 | Total += *E->Count; |
1083 | } |
1084 | return Total; |
1085 | } |
1086 | |
1087 | namespace { |
1088 | |
1089 | class PGOUseFunc { |
1090 | public: |
1091 | PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, |
1092 | std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, |
1093 | BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, |
1094 | ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry, |
1095 | bool HasSingleByteCoverage) |
1096 | : F(Func), M(Modu), BFI(BFIin), PSI(PSI), |
1097 | FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, |
1098 | InstrumentFuncEntry, HasSingleByteCoverage), |
1099 | FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {} |
1100 | |
1101 | void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum); |
1102 | |
1103 | // Read counts for the instrumented BB from profile. |
1104 | bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, |
1105 | InstrProfRecord::CountPseudoKind &PseudoKind); |
1106 | |
1107 | // Populate the counts for all BBs. |
1108 | void populateCounters(); |
1109 | |
1110 | // Set block coverage based on profile coverage values. |
1111 | void populateCoverage(IndexedInstrProfReader *PGOReader); |
1112 | |
1113 | // Set the branch weights based on the count values. |
1114 | void setBranchWeights(); |
1115 | |
1116 | // Annotate the value profile call sites for all value kind. |
1117 | void annotateValueSites(); |
1118 | |
1119 | // Annotate the value profile call sites for one value kind. |
1120 | void annotateValueSites(uint32_t Kind); |
1121 | |
1122 | // Annotate the irreducible loop header weights. |
1123 | void annotateIrrLoopHeaderWeights(); |
1124 | |
1125 | // The hotness of the function from the profile count. |
1126 | enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; |
1127 | |
1128 | // Return the function hotness from the profile. |
1129 | FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } |
1130 | |
1131 | // Return the function hash. |
1132 | uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } |
1133 | |
1134 | // Return the profile record for this function; |
1135 | InstrProfRecord &getProfileRecord() { return ProfileRecord; } |
1136 | |
1137 | // Return the auxiliary BB information. |
1138 | PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const { |
1139 | return FuncInfo.getBBInfo(BB); |
1140 | } |
1141 | |
1142 | // Return the auxiliary BB information if available. |
1143 | PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const { |
1144 | return FuncInfo.findBBInfo(BB); |
1145 | } |
1146 | |
1147 | Function &getFunc() const { return F; } |
1148 | |
1149 | void dumpInfo(StringRef Str = "" ) const { FuncInfo.dumpInfo(Str); } |
1150 | |
1151 | uint64_t getProgramMaxCount() const { return ProgramMaxCount; } |
1152 | |
1153 | private: |
1154 | Function &F; |
1155 | Module *M; |
1156 | BlockFrequencyInfo *BFI; |
1157 | ProfileSummaryInfo *PSI; |
1158 | |
1159 | // This member stores the shared information with class PGOGenFunc. |
1160 | FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo; |
1161 | |
1162 | // The maximum count value in the profile. This is only used in PGO use |
1163 | // compilation. |
1164 | uint64_t ProgramMaxCount; |
1165 | |
1166 | // Position of counter that remains to be read. |
1167 | uint32_t CountPosition = 0; |
1168 | |
1169 | // Total size of the profile count for this function. |
1170 | uint32_t ProfileCountSize = 0; |
1171 | |
1172 | // ProfileRecord for this function. |
1173 | InstrProfRecord ProfileRecord; |
1174 | |
1175 | // Function hotness info derived from profile. |
1176 | FuncFreqAttr FreqAttr; |
1177 | |
1178 | // Is to use the context sensitive profile. |
1179 | bool IsCS; |
1180 | |
1181 | ValueProfileCollector VPC; |
1182 | |
1183 | // Find the Instrumented BB and set the value. Return false on error. |
1184 | bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); |
1185 | |
1186 | // Set the edge counter value for the unknown edge -- there should be only |
1187 | // one unknown edge. |
1188 | void setEdgeCount(DirectEdges &Edges, uint64_t Value); |
1189 | |
1190 | // Set the hot/cold inline hints based on the count values. |
1191 | // FIXME: This function should be removed once the functionality in |
1192 | // the inliner is implemented. |
1193 | void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { |
1194 | if (PSI->isHotCount(C: EntryCount)) |
1195 | FreqAttr = FFA_Hot; |
1196 | else if (PSI->isColdCount(C: MaxCount)) |
1197 | FreqAttr = FFA_Cold; |
1198 | } |
1199 | }; |
1200 | |
1201 | } // end anonymous namespace |
1202 | |
1203 | /// Set up InEdges/OutEdges for all BBs in the MST. |
1204 | static void setupBBInfoEdges( |
1205 | const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) { |
1206 | // This is not required when there is block coverage inference. |
1207 | if (FuncInfo.BCI) |
1208 | return; |
1209 | for (const auto &E : FuncInfo.MST.allEdges()) { |
1210 | if (E->Removed) |
1211 | continue; |
1212 | const BasicBlock *SrcBB = E->SrcBB; |
1213 | const BasicBlock *DestBB = E->DestBB; |
1214 | PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(BB: SrcBB); |
1215 | PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(BB: DestBB); |
1216 | SrcInfo.addOutEdge(E: E.get()); |
1217 | DestInfo.addInEdge(E: E.get()); |
1218 | } |
1219 | } |
1220 | |
1221 | // Visit all the edges and assign the count value for the instrumented |
1222 | // edges and the BB. Return false on error. |
1223 | bool PGOUseFunc::setInstrumentedCounts( |
1224 | const std::vector<uint64_t> &CountFromProfile) { |
1225 | |
1226 | std::vector<BasicBlock *> InstrumentBBs; |
1227 | FuncInfo.getInstrumentBBs(InstrumentBBs); |
1228 | |
1229 | setupBBInfoEdges(FuncInfo); |
1230 | |
1231 | unsigned NumCounters = |
1232 | InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); |
1233 | // The number of counters here should match the number of counters |
1234 | // in profile. Return if they mismatch. |
1235 | if (NumCounters != CountFromProfile.size()) { |
1236 | return false; |
1237 | } |
1238 | auto *FuncEntry = &*F.begin(); |
1239 | |
1240 | // Set the profile count to the Instrumented BBs. |
1241 | uint32_t I = 0; |
1242 | for (BasicBlock *InstrBB : InstrumentBBs) { |
1243 | uint64_t CountValue = CountFromProfile[I++]; |
1244 | PGOUseBBInfo &Info = getBBInfo(BB: InstrBB); |
1245 | // If we reach here, we know that we have some nonzero count |
1246 | // values in this function. The entry count should not be 0. |
1247 | // Fix it if necessary. |
1248 | if (InstrBB == FuncEntry && CountValue == 0) |
1249 | CountValue = 1; |
1250 | Info.setBBInfoCount(CountValue); |
1251 | } |
1252 | ProfileCountSize = CountFromProfile.size(); |
1253 | CountPosition = I; |
1254 | |
1255 | // Set the edge count and update the count of unknown edges for BBs. |
1256 | auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { |
1257 | E->setEdgeCount(Value); |
1258 | this->getBBInfo(BB: E->SrcBB).UnknownCountOutEdge--; |
1259 | this->getBBInfo(BB: E->DestBB).UnknownCountInEdge--; |
1260 | }; |
1261 | |
1262 | // Set the profile count the Instrumented edges. There are BBs that not in |
1263 | // MST but not instrumented. Need to set the edge count value so that we can |
1264 | // populate the profile counts later. |
1265 | for (const auto &E : FuncInfo.MST.allEdges()) { |
1266 | if (E->Removed || E->InMST) |
1267 | continue; |
1268 | const BasicBlock *SrcBB = E->SrcBB; |
1269 | PGOUseBBInfo &SrcInfo = getBBInfo(BB: SrcBB); |
1270 | |
1271 | // If only one out-edge, the edge profile count should be the same as BB |
1272 | // profile count. |
1273 | if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1) |
1274 | setEdgeCount(E.get(), *SrcInfo.Count); |
1275 | else { |
1276 | const BasicBlock *DestBB = E->DestBB; |
1277 | PGOUseBBInfo &DestInfo = getBBInfo(BB: DestBB); |
1278 | // If only one in-edge, the edge profile count should be the same as BB |
1279 | // profile count. |
1280 | if (DestInfo.Count && DestInfo.InEdges.size() == 1) |
1281 | setEdgeCount(E.get(), *DestInfo.Count); |
1282 | } |
1283 | if (E->Count) |
1284 | continue; |
1285 | // E's count should have been set from profile. If not, this meenas E skips |
1286 | // the instrumentation. We set the count to 0. |
1287 | setEdgeCount(E.get(), 0); |
1288 | } |
1289 | return true; |
1290 | } |
1291 | |
1292 | // Set the count value for the unknown edge. There should be one and only one |
1293 | // unknown edge in Edges vector. |
1294 | void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { |
1295 | for (auto &E : Edges) { |
1296 | if (E->Count) |
1297 | continue; |
1298 | E->setEdgeCount(Value); |
1299 | |
1300 | getBBInfo(BB: E->SrcBB).UnknownCountOutEdge--; |
1301 | getBBInfo(BB: E->DestBB).UnknownCountInEdge--; |
1302 | return; |
1303 | } |
1304 | llvm_unreachable("Cannot find the unknown count edge" ); |
1305 | } |
1306 | |
1307 | // Emit function metadata indicating PGO profile mismatch. |
1308 | static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx) { |
1309 | const char MetadataName[] = "instr_prof_hash_mismatch" ; |
1310 | SmallVector<Metadata *, 2> Names; |
1311 | // If this metadata already exists, ignore. |
1312 | auto *Existing = F.getMetadata(KindID: LLVMContext::MD_annotation); |
1313 | if (Existing) { |
1314 | MDTuple *Tuple = cast<MDTuple>(Val: Existing); |
1315 | for (const auto &N : Tuple->operands()) { |
1316 | if (N.equalsStr(Str: MetadataName)) |
1317 | return; |
1318 | Names.push_back(Elt: N.get()); |
1319 | } |
1320 | } |
1321 | |
1322 | MDBuilder MDB(ctx); |
1323 | Names.push_back(Elt: MDB.createString(Str: MetadataName)); |
1324 | MDNode *MD = MDTuple::get(Context&: ctx, MDs: Names); |
1325 | F.setMetadata(KindID: LLVMContext::MD_annotation, Node: MD); |
1326 | } |
1327 | |
1328 | void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) { |
1329 | handleAllErrors(E: std::move(Err), Handlers: [&](const InstrProfError &IPE) { |
1330 | auto &Ctx = M->getContext(); |
1331 | auto Err = IPE.get(); |
1332 | bool SkipWarning = false; |
1333 | LLVM_DEBUG(dbgs() << "Error in reading profile for Func " |
1334 | << FuncInfo.FuncName << ": " ); |
1335 | if (Err == instrprof_error::unknown_function) { |
1336 | IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; |
1337 | SkipWarning = !PGOWarnMissing; |
1338 | LLVM_DEBUG(dbgs() << "unknown function" ); |
1339 | } else if (Err == instrprof_error::hash_mismatch || |
1340 | Err == instrprof_error::malformed) { |
1341 | IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; |
1342 | SkipWarning = |
1343 | NoPGOWarnMismatch || |
1344 | (NoPGOWarnMismatchComdatWeak && |
1345 | (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage || |
1346 | F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); |
1347 | LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash |
1348 | << " skip=" << SkipWarning << ")" ); |
1349 | // Emit function metadata indicating PGO profile mismatch. |
1350 | annotateFunctionWithHashMismatch(F, ctx&: M->getContext()); |
1351 | } |
1352 | |
1353 | LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n" ); |
1354 | if (SkipWarning) |
1355 | return; |
1356 | |
1357 | std::string Msg = |
1358 | IPE.message() + std::string(" " ) + F.getName().str() + |
1359 | std::string(" Hash = " ) + std::to_string(val: FuncInfo.FunctionHash) + |
1360 | std::string(" up to " ) + std::to_string(val: MismatchedFuncSum) + |
1361 | std::string(" count discarded" ); |
1362 | |
1363 | Ctx.diagnose( |
1364 | DI: DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); |
1365 | }); |
1366 | } |
1367 | |
1368 | // Read the profile from ProfileFileName and assign the value to the |
1369 | // instrumented BB and the edges. This function also updates ProgramMaxCount. |
1370 | // Return true if the profile are successfully read, and false on errors. |
1371 | bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, |
1372 | InstrProfRecord::CountPseudoKind &PseudoKind) { |
1373 | auto &Ctx = M->getContext(); |
1374 | uint64_t MismatchedFuncSum = 0; |
1375 | Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord( |
1376 | FuncName: FuncInfo.FuncName, FuncHash: FuncInfo.FunctionHash, DeprecatedFuncName: FuncInfo.DeprecatedFuncName, |
1377 | MismatchedFuncSum: &MismatchedFuncSum); |
1378 | if (Error E = Result.takeError()) { |
1379 | handleInstrProfError(Err: std::move(E), MismatchedFuncSum); |
1380 | return false; |
1381 | } |
1382 | ProfileRecord = std::move(Result.get()); |
1383 | PseudoKind = ProfileRecord.getCountPseudoKind(); |
1384 | if (PseudoKind != InstrProfRecord::NotPseudo) { |
1385 | return true; |
1386 | } |
1387 | std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; |
1388 | |
1389 | IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; |
1390 | LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n" ); |
1391 | |
1392 | uint64_t ValueSum = 0; |
1393 | for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { |
1394 | LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n" ); |
1395 | ValueSum += CountFromProfile[I]; |
1396 | } |
1397 | AllZeros = (ValueSum == 0); |
1398 | |
1399 | LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n" ); |
1400 | |
1401 | getBBInfo(BB: nullptr).UnknownCountOutEdge = 2; |
1402 | getBBInfo(BB: nullptr).UnknownCountInEdge = 2; |
1403 | |
1404 | if (!setInstrumentedCounts(CountFromProfile)) { |
1405 | LLVM_DEBUG( |
1406 | dbgs() << "Inconsistent number of counts, skipping this function" ); |
1407 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
1408 | M->getName().data(), |
1409 | Twine("Inconsistent number of counts in " ) + F.getName().str() + |
1410 | Twine(": the profile may be stale or there is a function name " |
1411 | "collision." ), |
1412 | DS_Warning)); |
1413 | return false; |
1414 | } |
1415 | ProgramMaxCount = PGOReader->getMaximumFunctionCount(UseCS: IsCS); |
1416 | return true; |
1417 | } |
1418 | |
1419 | void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) { |
1420 | uint64_t MismatchedFuncSum = 0; |
1421 | Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord( |
1422 | FuncName: FuncInfo.FuncName, FuncHash: FuncInfo.FunctionHash, DeprecatedFuncName: FuncInfo.DeprecatedFuncName, |
1423 | MismatchedFuncSum: &MismatchedFuncSum); |
1424 | if (auto Err = Result.takeError()) { |
1425 | handleInstrProfError(Err: std::move(Err), MismatchedFuncSum); |
1426 | return; |
1427 | } |
1428 | IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; |
1429 | |
1430 | std::vector<uint64_t> &CountsFromProfile = Result.get().Counts; |
1431 | DenseMap<const BasicBlock *, bool> Coverage; |
1432 | unsigned Index = 0; |
1433 | for (auto &BB : F) |
1434 | if (FuncInfo.BCI->shouldInstrumentBlock(BB)) |
1435 | Coverage[&BB] = (CountsFromProfile[Index++] != 0); |
1436 | assert(Index == CountsFromProfile.size()); |
1437 | |
1438 | // For each B in InverseDependencies[A], if A is covered then B is covered. |
1439 | DenseMap<const BasicBlock *, DenseSet<const BasicBlock *>> |
1440 | InverseDependencies; |
1441 | for (auto &BB : F) { |
1442 | for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) { |
1443 | // If Dep is covered then BB is covered. |
1444 | InverseDependencies[Dep].insert(V: &BB); |
1445 | } |
1446 | } |
1447 | |
1448 | // Infer coverage of the non-instrumented blocks using a flood-fill algorithm. |
1449 | std::stack<const BasicBlock *> CoveredBlocksToProcess; |
1450 | for (auto &[BB, IsCovered] : Coverage) |
1451 | if (IsCovered) |
1452 | CoveredBlocksToProcess.push(x: BB); |
1453 | |
1454 | while (!CoveredBlocksToProcess.empty()) { |
1455 | auto *CoveredBlock = CoveredBlocksToProcess.top(); |
1456 | assert(Coverage[CoveredBlock]); |
1457 | CoveredBlocksToProcess.pop(); |
1458 | for (auto *BB : InverseDependencies[CoveredBlock]) { |
1459 | // If CoveredBlock is covered then BB is covered. |
1460 | if (Coverage[BB]) |
1461 | continue; |
1462 | Coverage[BB] = true; |
1463 | CoveredBlocksToProcess.push(x: BB); |
1464 | } |
1465 | } |
1466 | |
1467 | // Annotate block coverage. |
1468 | MDBuilder MDB(F.getContext()); |
1469 | // We set the entry count to 10000 if the entry block is covered so that BFI |
1470 | // can propagate a fraction of this count to the other covered blocks. |
1471 | F.setEntryCount(Count: Coverage[&F.getEntryBlock()] ? 10000 : 0); |
1472 | for (auto &BB : F) { |
1473 | // For a block A and its successor B, we set the edge weight as follows: |
1474 | // If A is covered and B is covered, set weight=1. |
1475 | // If A is covered and B is uncovered, set weight=0. |
1476 | // If A is uncovered, set weight=1. |
1477 | // This setup will allow BFI to give nonzero profile counts to only covered |
1478 | // blocks. |
1479 | SmallVector<uint32_t, 4> Weights; |
1480 | for (auto *Succ : successors(BB: &BB)) |
1481 | Weights.push_back(Elt: (Coverage[Succ] || !Coverage[&BB]) ? 1 : 0); |
1482 | if (Weights.size() >= 2) |
1483 | llvm::setBranchWeights(I&: *BB.getTerminator(), Weights, |
1484 | /*IsExpected=*/false); |
1485 | } |
1486 | |
1487 | unsigned NumCorruptCoverage = 0; |
1488 | DominatorTree DT(F); |
1489 | LoopInfo LI(DT); |
1490 | BranchProbabilityInfo BPI(F, LI); |
1491 | BlockFrequencyInfo BFI(F, BPI, LI); |
1492 | auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> { |
1493 | if (auto C = BFI.getBlockProfileCount(BB: &BB)) |
1494 | return C == 0; |
1495 | return {}; |
1496 | }; |
1497 | LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n" ); |
1498 | for (auto &BB : F) { |
1499 | LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " " ) |
1500 | << (Coverage[&BB] ? "X " : " " ) << " " << BB.getName() |
1501 | << "\n" ); |
1502 | // In some cases it is possible to find a covered block that has no covered |
1503 | // successors, e.g., when a block calls a function that may call exit(). In |
1504 | // those cases, BFI could find its successor to be covered while BCI could |
1505 | // find its successor to be dead. |
1506 | if (Coverage[&BB] == IsBlockDead(BB).value_or(u: false)) { |
1507 | LLVM_DEBUG( |
1508 | dbgs() << "Found inconsistent block covearge for " << BB.getName() |
1509 | << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead" ) << " BFI=" |
1510 | << (IsBlockDead(BB).value() ? "Dead" : "Covered" ) << "\n" ); |
1511 | ++NumCorruptCoverage; |
1512 | } |
1513 | if (Coverage[&BB]) |
1514 | ++NumCoveredBlocks; |
1515 | } |
1516 | if (PGOVerifyBFI && NumCorruptCoverage) { |
1517 | auto &Ctx = M->getContext(); |
1518 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
1519 | M->getName().data(), |
1520 | Twine("Found inconsistent block coverage for function " ) + F.getName() + |
1521 | " in " + Twine(NumCorruptCoverage) + " blocks." , |
1522 | DS_Warning)); |
1523 | } |
1524 | if (PGOViewBlockCoverageGraph) |
1525 | FuncInfo.BCI->viewBlockCoverageGraph(Coverage: &Coverage); |
1526 | } |
1527 | |
1528 | // Populate the counters from instrumented BBs to all BBs. |
1529 | // In the end of this operation, all BBs should have a valid count value. |
1530 | void PGOUseFunc::populateCounters() { |
1531 | bool Changes = true; |
1532 | unsigned NumPasses = 0; |
1533 | while (Changes) { |
1534 | NumPasses++; |
1535 | Changes = false; |
1536 | |
1537 | // For efficient traversal, it's better to start from the end as most |
1538 | // of the instrumented edges are at the end. |
1539 | for (auto &BB : reverse(C&: F)) { |
1540 | PGOUseBBInfo *UseBBInfo = findBBInfo(BB: &BB); |
1541 | if (UseBBInfo == nullptr) |
1542 | continue; |
1543 | if (!UseBBInfo->Count) { |
1544 | if (UseBBInfo->UnknownCountOutEdge == 0) { |
1545 | UseBBInfo->Count = sumEdgeCount(Edges: UseBBInfo->OutEdges); |
1546 | Changes = true; |
1547 | } else if (UseBBInfo->UnknownCountInEdge == 0) { |
1548 | UseBBInfo->Count = sumEdgeCount(Edges: UseBBInfo->InEdges); |
1549 | Changes = true; |
1550 | } |
1551 | } |
1552 | if (UseBBInfo->Count) { |
1553 | if (UseBBInfo->UnknownCountOutEdge == 1) { |
1554 | uint64_t Total = 0; |
1555 | uint64_t OutSum = sumEdgeCount(Edges: UseBBInfo->OutEdges); |
1556 | // If the one of the successor block can early terminate (no-return), |
1557 | // we can end up with situation where out edge sum count is larger as |
1558 | // the source BB's count is collected by a post-dominated block. |
1559 | if (*UseBBInfo->Count > OutSum) |
1560 | Total = *UseBBInfo->Count - OutSum; |
1561 | setEdgeCount(Edges&: UseBBInfo->OutEdges, Value: Total); |
1562 | Changes = true; |
1563 | } |
1564 | if (UseBBInfo->UnknownCountInEdge == 1) { |
1565 | uint64_t Total = 0; |
1566 | uint64_t InSum = sumEdgeCount(Edges: UseBBInfo->InEdges); |
1567 | if (*UseBBInfo->Count > InSum) |
1568 | Total = *UseBBInfo->Count - InSum; |
1569 | setEdgeCount(Edges&: UseBBInfo->InEdges, Value: Total); |
1570 | Changes = true; |
1571 | } |
1572 | } |
1573 | } |
1574 | } |
1575 | |
1576 | LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n" ); |
1577 | (void)NumPasses; |
1578 | #ifndef NDEBUG |
1579 | // Assert every BB has a valid counter. |
1580 | for (auto &BB : F) { |
1581 | auto BI = findBBInfo(&BB); |
1582 | if (BI == nullptr) |
1583 | continue; |
1584 | assert(BI->Count && "BB count is not valid" ); |
1585 | } |
1586 | #endif |
1587 | uint64_t FuncEntryCount = *getBBInfo(BB: &*F.begin()).Count; |
1588 | uint64_t FuncMaxCount = FuncEntryCount; |
1589 | for (auto &BB : F) { |
1590 | auto BI = findBBInfo(BB: &BB); |
1591 | if (BI == nullptr) |
1592 | continue; |
1593 | FuncMaxCount = std::max(a: FuncMaxCount, b: *BI->Count); |
1594 | } |
1595 | |
1596 | // Fix the obviously inconsistent entry count. |
1597 | if (FuncMaxCount > 0 && FuncEntryCount == 0) |
1598 | FuncEntryCount = 1; |
1599 | F.setEntryCount(Count: ProfileCount(FuncEntryCount, Function::PCT_Real)); |
1600 | markFunctionAttributes(EntryCount: FuncEntryCount, MaxCount: FuncMaxCount); |
1601 | |
1602 | // Now annotate select instructions |
1603 | FuncInfo.SIVisitor.annotateSelects(UF: this, Ind: &CountPosition); |
1604 | assert(CountPosition == ProfileCountSize); |
1605 | |
1606 | LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile." )); |
1607 | } |
1608 | |
1609 | // Assign the scaled count values to the BB with multiple out edges. |
1610 | void PGOUseFunc::setBranchWeights() { |
1611 | // Generate MD_prof metadata for every branch instruction. |
1612 | LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() |
1613 | << " IsCS=" << IsCS << "\n" ); |
1614 | for (auto &BB : F) { |
1615 | Instruction *TI = BB.getTerminator(); |
1616 | if (TI->getNumSuccessors() < 2) |
1617 | continue; |
1618 | if (!(isa<BranchInst>(Val: TI) || isa<SwitchInst>(Val: TI) || |
1619 | isa<IndirectBrInst>(Val: TI) || isa<InvokeInst>(Val: TI) || |
1620 | isa<CallBrInst>(Val: TI))) |
1621 | continue; |
1622 | |
1623 | const PGOUseBBInfo &BBCountInfo = getBBInfo(BB: &BB); |
1624 | if (!*BBCountInfo.Count) |
1625 | continue; |
1626 | |
1627 | // We have a non-zero Branch BB. |
1628 | |
1629 | // SuccessorCount can be greater than OutEdgesCount, because |
1630 | // removed edges don't appear in OutEdges. |
1631 | unsigned OutEdgesCount = BBCountInfo.OutEdges.size(); |
1632 | unsigned SuccessorCount = BB.getTerminator()->getNumSuccessors(); |
1633 | assert(OutEdgesCount <= SuccessorCount); |
1634 | |
1635 | SmallVector<uint64_t, 2> EdgeCounts(SuccessorCount, 0); |
1636 | uint64_t MaxCount = 0; |
1637 | for (unsigned It = 0; It < OutEdgesCount; It++) { |
1638 | const PGOUseEdge *E = BBCountInfo.OutEdges[It]; |
1639 | const BasicBlock *SrcBB = E->SrcBB; |
1640 | const BasicBlock *DestBB = E->DestBB; |
1641 | if (DestBB == nullptr) |
1642 | continue; |
1643 | unsigned SuccNum = GetSuccessorNumber(BB: SrcBB, Succ: DestBB); |
1644 | uint64_t EdgeCount = *E->Count; |
1645 | if (EdgeCount > MaxCount) |
1646 | MaxCount = EdgeCount; |
1647 | EdgeCounts[SuccNum] = EdgeCount; |
1648 | } |
1649 | |
1650 | if (MaxCount) |
1651 | setProfMetadata(M, TI, EdgeCounts, MaxCount); |
1652 | else { |
1653 | // A zero MaxCount can come about when we have a BB with a positive |
1654 | // count, and whose successor blocks all have 0 count. This can happen |
1655 | // when there is no exit block and the code exits via a noreturn function. |
1656 | auto &Ctx = M->getContext(); |
1657 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
1658 | M->getName().data(), |
1659 | Twine("Profile in " ) + F.getName().str() + |
1660 | Twine(" partially ignored" ) + |
1661 | Twine(", possibly due to the lack of a return path." ), |
1662 | DS_Warning)); |
1663 | } |
1664 | } |
1665 | } |
1666 | |
1667 | static bool isIndirectBrTarget(BasicBlock *BB) { |
1668 | for (BasicBlock *Pred : predecessors(BB)) { |
1669 | if (isa<IndirectBrInst>(Val: Pred->getTerminator())) |
1670 | return true; |
1671 | } |
1672 | return false; |
1673 | } |
1674 | |
1675 | void PGOUseFunc::() { |
1676 | LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n" ); |
1677 | // Find irr loop headers |
1678 | for (auto &BB : F) { |
1679 | // As a heuristic also annotate indrectbr targets as they have a high chance |
1680 | // to become an irreducible loop header after the indirectbr tail |
1681 | // duplication. |
1682 | if (BFI->isIrrLoopHeader(BB: &BB) || isIndirectBrTarget(BB: &BB)) { |
1683 | Instruction *TI = BB.getTerminator(); |
1684 | const PGOUseBBInfo &BBCountInfo = getBBInfo(BB: &BB); |
1685 | setIrrLoopHeaderMetadata(M, TI, Count: *BBCountInfo.Count); |
1686 | } |
1687 | } |
1688 | } |
1689 | |
1690 | void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { |
1691 | Module *M = F.getParent(); |
1692 | IRBuilder<> Builder(&SI); |
1693 | Type *Int64Ty = Builder.getInt64Ty(); |
1694 | auto *Step = Builder.CreateZExt(V: SI.getCondition(), DestTy: Int64Ty); |
1695 | Builder.CreateCall( |
1696 | Callee: Intrinsic::getDeclaration(M, id: Intrinsic::instrprof_increment_step), |
1697 | Args: {FuncNameVar, Builder.getInt64(C: FuncHash), Builder.getInt32(C: TotalNumCtrs), |
1698 | Builder.getInt32(C: *CurCtrIdx), Step}); |
1699 | ++(*CurCtrIdx); |
1700 | } |
1701 | |
1702 | void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { |
1703 | std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; |
1704 | assert(*CurCtrIdx < CountFromProfile.size() && |
1705 | "Out of bound access of counters" ); |
1706 | uint64_t SCounts[2]; |
1707 | SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count |
1708 | ++(*CurCtrIdx); |
1709 | uint64_t TotalCount = 0; |
1710 | auto BI = UseFunc->findBBInfo(BB: SI.getParent()); |
1711 | if (BI != nullptr) |
1712 | TotalCount = *BI->Count; |
1713 | // False Count |
1714 | SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); |
1715 | uint64_t MaxCount = std::max(a: SCounts[0], b: SCounts[1]); |
1716 | if (MaxCount) |
1717 | setProfMetadata(M: F.getParent(), TI: &SI, EdgeCounts: SCounts, MaxCount); |
1718 | } |
1719 | |
1720 | void SelectInstVisitor::visitSelectInst(SelectInst &SI) { |
1721 | if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage) |
1722 | return; |
1723 | // FIXME: do not handle this yet. |
1724 | if (SI.getCondition()->getType()->isVectorTy()) |
1725 | return; |
1726 | |
1727 | switch (Mode) { |
1728 | case VM_counting: |
1729 | NSIs++; |
1730 | return; |
1731 | case VM_instrument: |
1732 | instrumentOneSelectInst(SI); |
1733 | return; |
1734 | case VM_annotate: |
1735 | annotateOneSelectInst(SI); |
1736 | return; |
1737 | } |
1738 | |
1739 | llvm_unreachable("Unknown visiting mode" ); |
1740 | } |
1741 | |
1742 | static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind) { |
1743 | if (ValueProfKind == IPVK_MemOPSize) |
1744 | return MaxNumMemOPAnnotations; |
1745 | if (ValueProfKind == llvm::IPVK_VTableTarget) |
1746 | return MaxNumVTableAnnotations; |
1747 | return MaxNumAnnotations; |
1748 | } |
1749 | |
1750 | // Traverse all valuesites and annotate the instructions for all value kind. |
1751 | void PGOUseFunc::annotateValueSites() { |
1752 | if (isValueProfilingDisabled()) |
1753 | return; |
1754 | |
1755 | // Create the PGOFuncName meta data. |
1756 | createPGOFuncNameMetadata(F, PGOFuncName: FuncInfo.FuncName); |
1757 | |
1758 | for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) |
1759 | annotateValueSites(Kind); |
1760 | } |
1761 | |
1762 | // Annotate the instructions for a specific value kind. |
1763 | void PGOUseFunc::annotateValueSites(uint32_t Kind) { |
1764 | assert(Kind <= IPVK_Last); |
1765 | unsigned ValueSiteIndex = 0; |
1766 | |
1767 | unsigned NumValueSites = ProfileRecord.getNumValueSites(ValueKind: Kind); |
1768 | |
1769 | // Since there isn't a reliable or fast way for profile reader to tell if a |
1770 | // profile is generated with `-enable-vtable-value-profiling` on, we run the |
1771 | // value profile collector over the function IR to find the instrumented sites |
1772 | // iff function profile records shows the number of instrumented vtable sites |
1773 | // is not zero. Function cfg already takes the number of instrumented |
1774 | // indirect call sites into account so it doesn't hash the number of |
1775 | // instrumented vtables; as a side effect it makes it easier to enable |
1776 | // profiling and profile use in two steps if needed. |
1777 | // TODO: Remove this if/when -enable-vtable-value-profiling is on by default. |
1778 | if (NumValueSites > 0 && Kind == IPVK_VTableTarget && |
1779 | NumValueSites != FuncInfo.ValueSites[IPVK_VTableTarget].size() && |
1780 | MaxNumVTableAnnotations != 0) |
1781 | FuncInfo.ValueSites[IPVK_VTableTarget] = VPC.get(Kind: IPVK_VTableTarget); |
1782 | auto &ValueSites = FuncInfo.ValueSites[Kind]; |
1783 | if (NumValueSites != ValueSites.size()) { |
1784 | auto &Ctx = M->getContext(); |
1785 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
1786 | M->getName().data(), |
1787 | Twine("Inconsistent number of value sites for " ) + |
1788 | Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"" ) + |
1789 | F.getName().str() + |
1790 | Twine("\", possibly due to the use of a stale profile." ), |
1791 | DS_Warning)); |
1792 | return; |
1793 | } |
1794 | |
1795 | for (VPCandidateInfo &I : ValueSites) { |
1796 | LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind |
1797 | << "): Index = " << ValueSiteIndex << " out of " |
1798 | << NumValueSites << "\n" ); |
1799 | annotateValueSite( |
1800 | M&: *M, Inst&: *I.AnnotatedInst, InstrProfR: ProfileRecord, |
1801 | ValueKind: static_cast<InstrProfValueKind>(Kind), SiteIndx: ValueSiteIndex, |
1802 | MaxMDCount: getMaxNumAnnotations(ValueProfKind: static_cast<InstrProfValueKind>(Kind))); |
1803 | ValueSiteIndex++; |
1804 | } |
1805 | } |
1806 | |
1807 | // Collect the set of members for each Comdat in module M and store |
1808 | // in ComdatMembers. |
1809 | static void collectComdatMembers( |
1810 | Module &M, |
1811 | std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { |
1812 | if (!DoComdatRenaming) |
1813 | return; |
1814 | for (Function &F : M) |
1815 | if (Comdat *C = F.getComdat()) |
1816 | ComdatMembers.insert(x: std::make_pair(x&: C, y: &F)); |
1817 | for (GlobalVariable &GV : M.globals()) |
1818 | if (Comdat *C = GV.getComdat()) |
1819 | ComdatMembers.insert(x: std::make_pair(x&: C, y: &GV)); |
1820 | for (GlobalAlias &GA : M.aliases()) |
1821 | if (Comdat *C = GA.getComdat()) |
1822 | ComdatMembers.insert(x: std::make_pair(x&: C, y: &GA)); |
1823 | } |
1824 | |
1825 | // Return true if we should not find instrumentation data for this function |
1826 | static bool skipPGOUse(const Function &F) { |
1827 | if (F.isDeclaration()) |
1828 | return true; |
1829 | // If there are too many critical edges, PGO might cause |
1830 | // compiler time problem. Skip PGO if the number of |
1831 | // critical edges execeed the threshold. |
1832 | unsigned NumCriticalEdges = 0; |
1833 | for (auto &BB : F) { |
1834 | const Instruction *TI = BB.getTerminator(); |
1835 | for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { |
1836 | if (isCriticalEdge(TI, SuccNum: I)) |
1837 | NumCriticalEdges++; |
1838 | } |
1839 | } |
1840 | if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) { |
1841 | LLVM_DEBUG(dbgs() << "In func " << F.getName() |
1842 | << ", NumCriticalEdges=" << NumCriticalEdges |
1843 | << " exceed the threshold. Skip PGO.\n" ); |
1844 | return true; |
1845 | } |
1846 | return false; |
1847 | } |
1848 | |
1849 | // Return true if we should not instrument this function |
1850 | static bool skipPGOGen(const Function &F) { |
1851 | if (skipPGOUse(F)) |
1852 | return true; |
1853 | if (F.hasFnAttribute(Kind: llvm::Attribute::Naked)) |
1854 | return true; |
1855 | if (F.hasFnAttribute(Kind: llvm::Attribute::NoProfile)) |
1856 | return true; |
1857 | if (F.hasFnAttribute(Kind: llvm::Attribute::SkipProfile)) |
1858 | return true; |
1859 | if (F.getInstructionCount() < PGOFunctionSizeThreshold) |
1860 | return true; |
1861 | return false; |
1862 | } |
1863 | |
1864 | static bool InstrumentAllFunctions( |
1865 | Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, |
1866 | function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, |
1867 | function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { |
1868 | // For the context-sensitve instrumentation, we should have a separated pass |
1869 | // (before LTO/ThinLTO linking) to create these variables. |
1870 | if (!IsCS && !PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) |
1871 | createIRLevelProfileFlagVar(M, /*IsCS=*/false); |
1872 | |
1873 | Triple TT(M.getTargetTriple()); |
1874 | LLVMContext &Ctx = M.getContext(); |
1875 | if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling) |
1876 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
1877 | M.getName().data(), |
1878 | Twine("VTable value profiling is presently not " |
1879 | "supported for non-ELF object formats" ), |
1880 | DS_Warning)); |
1881 | std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; |
1882 | collectComdatMembers(M, ComdatMembers); |
1883 | |
1884 | for (auto &F : M) { |
1885 | if (skipPGOGen(F)) |
1886 | continue; |
1887 | auto &TLI = LookupTLI(F); |
1888 | auto *BPI = LookupBPI(F); |
1889 | auto *BFI = LookupBFI(F); |
1890 | instrumentOneFunc(F, M: &M, TLI, BPI, BFI, ComdatMembers, IsCS); |
1891 | } |
1892 | return true; |
1893 | } |
1894 | |
1895 | PreservedAnalyses |
1896 | PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &MAM) { |
1897 | createProfileFileNameVar(M, InstrProfileOutput: CSInstrName); |
1898 | // The variable in a comdat may be discarded by LTO. Ensure the declaration |
1899 | // will be retained. |
1900 | appendToCompilerUsed(M, Values: createIRLevelProfileFlagVar(M, /*IsCS=*/true)); |
1901 | if (ProfileSampling) |
1902 | createProfileSamplingVar(M); |
1903 | PreservedAnalyses PA; |
1904 | PA.preserve<FunctionAnalysisManagerModuleProxy>(); |
1905 | PA.preserveSet<AllAnalysesOn<Function>>(); |
1906 | return PA; |
1907 | } |
1908 | |
1909 | PreservedAnalyses PGOInstrumentationGen::run(Module &M, |
1910 | ModuleAnalysisManager &MAM) { |
1911 | auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager(); |
1912 | auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { |
1913 | return FAM.getResult<TargetLibraryAnalysis>(IR&: F); |
1914 | }; |
1915 | auto LookupBPI = [&FAM](Function &F) { |
1916 | return &FAM.getResult<BranchProbabilityAnalysis>(IR&: F); |
1917 | }; |
1918 | auto LookupBFI = [&FAM](Function &F) { |
1919 | return &FAM.getResult<BlockFrequencyAnalysis>(IR&: F); |
1920 | }; |
1921 | |
1922 | if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) |
1923 | return PreservedAnalyses::all(); |
1924 | |
1925 | return PreservedAnalyses::none(); |
1926 | } |
1927 | |
1928 | // Using the ratio b/w sums of profile count values and BFI count values to |
1929 | // adjust the func entry count. |
1930 | static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, |
1931 | BranchProbabilityInfo &NBPI) { |
1932 | Function &F = Func.getFunc(); |
1933 | BlockFrequencyInfo NBFI(F, NBPI, LI); |
1934 | #ifndef NDEBUG |
1935 | auto BFIEntryCount = F.getEntryCount(); |
1936 | assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) && |
1937 | "Invalid BFI Entrycount" ); |
1938 | #endif |
1939 | auto SumCount = APFloat::getZero(Sem: APFloat::IEEEdouble()); |
1940 | auto SumBFICount = APFloat::getZero(Sem: APFloat::IEEEdouble()); |
1941 | for (auto &BBI : F) { |
1942 | uint64_t CountValue = 0; |
1943 | uint64_t BFICountValue = 0; |
1944 | if (!Func.findBBInfo(BB: &BBI)) |
1945 | continue; |
1946 | auto BFICount = NBFI.getBlockProfileCount(BB: &BBI); |
1947 | CountValue = *Func.getBBInfo(BB: &BBI).Count; |
1948 | BFICountValue = *BFICount; |
1949 | SumCount.add(RHS: APFloat(CountValue * 1.0), RM: APFloat::rmNearestTiesToEven); |
1950 | SumBFICount.add(RHS: APFloat(BFICountValue * 1.0), RM: APFloat::rmNearestTiesToEven); |
1951 | } |
1952 | if (SumCount.isZero()) |
1953 | return; |
1954 | |
1955 | assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan && |
1956 | "Incorrect sum of BFI counts" ); |
1957 | if (SumBFICount.compare(RHS: SumCount) == APFloat::cmpEqual) |
1958 | return; |
1959 | double Scale = (SumCount / SumBFICount).convertToDouble(); |
1960 | if (Scale < 1.001 && Scale > 0.999) |
1961 | return; |
1962 | |
1963 | uint64_t FuncEntryCount = *Func.getBBInfo(BB: &*F.begin()).Count; |
1964 | uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale; |
1965 | if (NewEntryCount == 0) |
1966 | NewEntryCount = 1; |
1967 | if (NewEntryCount != FuncEntryCount) { |
1968 | F.setEntryCount(Count: ProfileCount(NewEntryCount, Function::PCT_Real)); |
1969 | LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() |
1970 | << ", entry_count " << FuncEntryCount << " --> " |
1971 | << NewEntryCount << "\n" ); |
1972 | } |
1973 | } |
1974 | |
1975 | // Compare the profile count values with BFI count values, and print out |
1976 | // the non-matching ones. |
1977 | static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI, |
1978 | BranchProbabilityInfo &NBPI, |
1979 | uint64_t HotCountThreshold, |
1980 | uint64_t ColdCountThreshold) { |
1981 | Function &F = Func.getFunc(); |
1982 | BlockFrequencyInfo NBFI(F, NBPI, LI); |
1983 | // bool PrintFunc = false; |
1984 | bool HotBBOnly = PGOVerifyHotBFI; |
1985 | StringRef Msg; |
1986 | OptimizationRemarkEmitter ORE(&F); |
1987 | |
1988 | unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0; |
1989 | for (auto &BBI : F) { |
1990 | uint64_t CountValue = 0; |
1991 | uint64_t BFICountValue = 0; |
1992 | |
1993 | CountValue = Func.getBBInfo(BB: &BBI).Count.value_or(u&: CountValue); |
1994 | |
1995 | BBNum++; |
1996 | if (CountValue) |
1997 | NonZeroBBNum++; |
1998 | auto BFICount = NBFI.getBlockProfileCount(BB: &BBI); |
1999 | if (BFICount) |
2000 | BFICountValue = *BFICount; |
2001 | |
2002 | if (HotBBOnly) { |
2003 | bool rawIsHot = CountValue >= HotCountThreshold; |
2004 | bool BFIIsHot = BFICountValue >= HotCountThreshold; |
2005 | bool rawIsCold = CountValue <= ColdCountThreshold; |
2006 | bool ShowCount = false; |
2007 | if (rawIsHot && !BFIIsHot) { |
2008 | Msg = "raw-Hot to BFI-nonHot" ; |
2009 | ShowCount = true; |
2010 | } else if (rawIsCold && BFIIsHot) { |
2011 | Msg = "raw-Cold to BFI-Hot" ; |
2012 | ShowCount = true; |
2013 | } |
2014 | if (!ShowCount) |
2015 | continue; |
2016 | } else { |
2017 | if ((CountValue < PGOVerifyBFICutoff) && |
2018 | (BFICountValue < PGOVerifyBFICutoff)) |
2019 | continue; |
2020 | uint64_t Diff = (BFICountValue >= CountValue) |
2021 | ? BFICountValue - CountValue |
2022 | : CountValue - BFICountValue; |
2023 | if (Diff <= CountValue / 100 * PGOVerifyBFIRatio) |
2024 | continue; |
2025 | } |
2026 | BBMisMatchNum++; |
2027 | |
2028 | ORE.emit(RemarkBuilder: [&]() { |
2029 | OptimizationRemarkAnalysis (DEBUG_TYPE, "bfi-verify" , |
2030 | F.getSubprogram(), &BBI); |
2031 | Remark << "BB " << ore::NV("Block" , BBI.getName()) |
2032 | << " Count=" << ore::NV("Count" , CountValue) |
2033 | << " BFI_Count=" << ore::NV("Count" , BFICountValue); |
2034 | if (!Msg.empty()) |
2035 | Remark << " (" << Msg << ")" ; |
2036 | return Remark; |
2037 | }); |
2038 | } |
2039 | if (BBMisMatchNum) |
2040 | ORE.emit(RemarkBuilder: [&]() { |
2041 | return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify" , |
2042 | F.getSubprogram(), &F.getEntryBlock()) |
2043 | << "In Func " << ore::NV("Function" , F.getName()) |
2044 | << ": Num_of_BB=" << ore::NV("Count" , BBNum) |
2045 | << ", Num_of_non_zerovalue_BB=" << ore::NV("Count" , NonZeroBBNum) |
2046 | << ", Num_of_mis_matching_BB=" << ore::NV("Count" , BBMisMatchNum); |
2047 | }); |
2048 | } |
2049 | |
2050 | static bool annotateAllFunctions( |
2051 | Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, |
2052 | vfs::FileSystem &FS, |
2053 | function_ref<TargetLibraryInfo &(Function &)> LookupTLI, |
2054 | function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, |
2055 | function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, |
2056 | ProfileSummaryInfo *PSI, bool IsCS) { |
2057 | LLVM_DEBUG(dbgs() << "Read in profile counters: " ); |
2058 | auto &Ctx = M.getContext(); |
2059 | // Read the counter array from file. |
2060 | auto ReaderOrErr = IndexedInstrProfReader::create(Path: ProfileFileName, FS, |
2061 | RemappingPath: ProfileRemappingFileName); |
2062 | if (Error E = ReaderOrErr.takeError()) { |
2063 | handleAllErrors(E: std::move(E), Handlers: [&](const ErrorInfoBase &EI) { |
2064 | Ctx.diagnose( |
2065 | DI: DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); |
2066 | }); |
2067 | return false; |
2068 | } |
2069 | |
2070 | std::unique_ptr<IndexedInstrProfReader> PGOReader = |
2071 | std::move(ReaderOrErr.get()); |
2072 | if (!PGOReader) { |
2073 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile(ProfileFileName.data(), |
2074 | StringRef("Cannot get PGOReader" ))); |
2075 | return false; |
2076 | } |
2077 | if (!PGOReader->hasCSIRLevelProfile() && IsCS) |
2078 | return false; |
2079 | |
2080 | // TODO: might need to change the warning once the clang option is finalized. |
2081 | if (!PGOReader->isIRLevelProfile()) { |
2082 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
2083 | ProfileFileName.data(), "Not an IR level instrumentation profile" )); |
2084 | return false; |
2085 | } |
2086 | if (PGOReader->functionEntryOnly()) { |
2087 | Ctx.diagnose(DI: DiagnosticInfoPGOProfile( |
2088 | ProfileFileName.data(), |
2089 | "Function entry profiles are not yet supported for optimization" )); |
2090 | return false; |
2091 | } |
2092 | |
2093 | if (EnableVTableProfileUse) { |
2094 | for (GlobalVariable &G : M.globals()) { |
2095 | if (!G.hasName() || !G.hasMetadata(KindID: LLVMContext::MD_type)) |
2096 | continue; |
2097 | |
2098 | // Create the PGOFuncName meta data. |
2099 | createPGONameMetadata(GO&: G, PGOName: getPGOName(V: G, InLTO: false /* InLTO*/)); |
2100 | } |
2101 | } |
2102 | |
2103 | // Add the profile summary (read from the header of the indexed summary) here |
2104 | // so that we can use it below when reading counters (which checks if the |
2105 | // function should be marked with a cold or inlinehint attribute). |
2106 | M.setProfileSummary(M: PGOReader->getSummary(UseCS: IsCS).getMD(Context&: M.getContext()), |
2107 | Kind: IsCS ? ProfileSummary::PSK_CSInstr |
2108 | : ProfileSummary::PSK_Instr); |
2109 | PSI->refresh(); |
2110 | |
2111 | std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; |
2112 | collectComdatMembers(M, ComdatMembers); |
2113 | std::vector<Function *> HotFunctions; |
2114 | std::vector<Function *> ColdFunctions; |
2115 | |
2116 | // If the profile marked as always instrument the entry BB, do the |
2117 | // same. Note this can be overwritten by the internal option in CFGMST.h |
2118 | bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); |
2119 | if (PGOInstrumentEntry.getNumOccurrences() > 0) |
2120 | InstrumentFuncEntry = PGOInstrumentEntry; |
2121 | InstrumentFuncEntry |= PGOCtxProfLoweringPass::isContextualIRPGOEnabled(); |
2122 | |
2123 | bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage(); |
2124 | for (auto &F : M) { |
2125 | if (skipPGOUse(F)) |
2126 | continue; |
2127 | auto &TLI = LookupTLI(F); |
2128 | auto *BPI = LookupBPI(F); |
2129 | auto *BFI = LookupBFI(F); |
2130 | if (!HasSingleByteCoverage) { |
2131 | // Split indirectbr critical edges here before computing the MST rather |
2132 | // than later in getInstrBB() to avoid invalidating it. |
2133 | SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, |
2134 | BFI); |
2135 | } |
2136 | PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, |
2137 | InstrumentFuncEntry, HasSingleByteCoverage); |
2138 | if (HasSingleByteCoverage) { |
2139 | Func.populateCoverage(PGOReader: PGOReader.get()); |
2140 | continue; |
2141 | } |
2142 | // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo, |
2143 | // it means the profile for the function is unrepresentative and this |
2144 | // function is actually hot / warm. We will reset the function hot / cold |
2145 | // attribute and drop all the profile counters. |
2146 | InstrProfRecord::CountPseudoKind PseudoKind = InstrProfRecord::NotPseudo; |
2147 | bool AllZeros = false; |
2148 | if (!Func.readCounters(PGOReader: PGOReader.get(), AllZeros, PseudoKind)) |
2149 | continue; |
2150 | if (AllZeros) { |
2151 | F.setEntryCount(Count: ProfileCount(0, Function::PCT_Real)); |
2152 | if (Func.getProgramMaxCount() != 0) |
2153 | ColdFunctions.push_back(x: &F); |
2154 | continue; |
2155 | } |
2156 | if (PseudoKind != InstrProfRecord::NotPseudo) { |
2157 | // Clear function attribute cold. |
2158 | if (F.hasFnAttribute(Kind: Attribute::Cold)) |
2159 | F.removeFnAttr(Kind: Attribute::Cold); |
2160 | // Set function attribute as hot. |
2161 | if (PseudoKind == InstrProfRecord::PseudoHot) |
2162 | F.addFnAttr(Kind: Attribute::Hot); |
2163 | continue; |
2164 | } |
2165 | Func.populateCounters(); |
2166 | Func.setBranchWeights(); |
2167 | Func.annotateValueSites(); |
2168 | Func.annotateIrrLoopHeaderWeights(); |
2169 | PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); |
2170 | if (FreqAttr == PGOUseFunc::FFA_Cold) |
2171 | ColdFunctions.push_back(x: &F); |
2172 | else if (FreqAttr == PGOUseFunc::FFA_Hot) |
2173 | HotFunctions.push_back(x: &F); |
2174 | if (PGOViewCounts != PGOVCT_None && |
2175 | (ViewBlockFreqFuncName.empty() || |
2176 | F.getName() == ViewBlockFreqFuncName)) { |
2177 | LoopInfo LI{DominatorTree(F)}; |
2178 | std::unique_ptr<BranchProbabilityInfo> NewBPI = |
2179 | std::make_unique<BranchProbabilityInfo>(args&: F, args&: LI); |
2180 | std::unique_ptr<BlockFrequencyInfo> NewBFI = |
2181 | std::make_unique<BlockFrequencyInfo>(args&: F, args&: *NewBPI, args&: LI); |
2182 | if (PGOViewCounts == PGOVCT_Graph) |
2183 | NewBFI->view(); |
2184 | else if (PGOViewCounts == PGOVCT_Text) { |
2185 | dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n" ; |
2186 | NewBFI->print(OS&: dbgs()); |
2187 | } |
2188 | } |
2189 | if (PGOViewRawCounts != PGOVCT_None && |
2190 | (ViewBlockFreqFuncName.empty() || |
2191 | F.getName() == ViewBlockFreqFuncName)) { |
2192 | if (PGOViewRawCounts == PGOVCT_Graph) |
2193 | if (ViewBlockFreqFuncName.empty()) |
2194 | WriteGraph(G: &Func, Name: Twine("PGORawCounts_" ) + Func.getFunc().getName()); |
2195 | else |
2196 | ViewGraph(G: &Func, Name: Twine("PGORawCounts_" ) + Func.getFunc().getName()); |
2197 | else if (PGOViewRawCounts == PGOVCT_Text) { |
2198 | dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n" ; |
2199 | Func.dumpInfo(); |
2200 | } |
2201 | } |
2202 | |
2203 | if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) { |
2204 | LoopInfo LI{DominatorTree(F)}; |
2205 | BranchProbabilityInfo NBPI(F, LI); |
2206 | |
2207 | // Fix func entry count. |
2208 | if (PGOFixEntryCount) |
2209 | fixFuncEntryCount(Func, LI, NBPI); |
2210 | |
2211 | // Verify BlockFrequency information. |
2212 | uint64_t HotCountThreshold = 0, ColdCountThreshold = 0; |
2213 | if (PGOVerifyHotBFI) { |
2214 | HotCountThreshold = PSI->getOrCompHotCountThreshold(); |
2215 | ColdCountThreshold = PSI->getOrCompColdCountThreshold(); |
2216 | } |
2217 | verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold); |
2218 | } |
2219 | } |
2220 | |
2221 | // Set function hotness attribute from the profile. |
2222 | // We have to apply these attributes at the end because their presence |
2223 | // can affect the BranchProbabilityInfo of any callers, resulting in an |
2224 | // inconsistent MST between prof-gen and prof-use. |
2225 | for (auto &F : HotFunctions) { |
2226 | F->addFnAttr(Kind: Attribute::InlineHint); |
2227 | LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() |
2228 | << "\n" ); |
2229 | } |
2230 | for (auto &F : ColdFunctions) { |
2231 | // Only set when there is no Attribute::Hot set by the user. For Hot |
2232 | // attribute, user's annotation has the precedence over the profile. |
2233 | if (F->hasFnAttribute(Kind: Attribute::Hot)) { |
2234 | auto &Ctx = M.getContext(); |
2235 | std::string Msg = std::string("Function " ) + F->getName().str() + |
2236 | std::string(" is annotated as a hot function but" |
2237 | " the profile is cold" ); |
2238 | Ctx.diagnose( |
2239 | DI: DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); |
2240 | continue; |
2241 | } |
2242 | F->addFnAttr(Kind: Attribute::Cold); |
2243 | LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() |
2244 | << "\n" ); |
2245 | } |
2246 | return true; |
2247 | } |
2248 | |
2249 | PGOInstrumentationUse::PGOInstrumentationUse( |
2250 | std::string Filename, std::string RemappingFilename, bool IsCS, |
2251 | IntrusiveRefCntPtr<vfs::FileSystem> VFS) |
2252 | : ProfileFileName(std::move(Filename)), |
2253 | ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS), |
2254 | FS(std::move(VFS)) { |
2255 | if (!PGOTestProfileFile.empty()) |
2256 | ProfileFileName = PGOTestProfileFile; |
2257 | if (!PGOTestProfileRemappingFile.empty()) |
2258 | ProfileRemappingFileName = PGOTestProfileRemappingFile; |
2259 | if (!FS) |
2260 | FS = vfs::getRealFileSystem(); |
2261 | } |
2262 | |
2263 | PreservedAnalyses PGOInstrumentationUse::run(Module &M, |
2264 | ModuleAnalysisManager &MAM) { |
2265 | |
2266 | auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager(); |
2267 | auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { |
2268 | return FAM.getResult<TargetLibraryAnalysis>(IR&: F); |
2269 | }; |
2270 | auto LookupBPI = [&FAM](Function &F) { |
2271 | return &FAM.getResult<BranchProbabilityAnalysis>(IR&: F); |
2272 | }; |
2273 | auto LookupBFI = [&FAM](Function &F) { |
2274 | return &FAM.getResult<BlockFrequencyAnalysis>(IR&: F); |
2275 | }; |
2276 | |
2277 | auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(IR&: M); |
2278 | if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, FS&: *FS, |
2279 | LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) |
2280 | return PreservedAnalyses::all(); |
2281 | |
2282 | return PreservedAnalyses::none(); |
2283 | } |
2284 | |
2285 | static std::string getSimpleNodeName(const BasicBlock *Node) { |
2286 | if (!Node->getName().empty()) |
2287 | return Node->getName().str(); |
2288 | |
2289 | std::string SimpleNodeName; |
2290 | raw_string_ostream OS(SimpleNodeName); |
2291 | Node->printAsOperand(O&: OS, PrintType: false); |
2292 | return SimpleNodeName; |
2293 | } |
2294 | |
2295 | void llvm::setProfMetadata(Module *M, Instruction *TI, |
2296 | ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) { |
2297 | assert(MaxCount > 0 && "Bad max count" ); |
2298 | uint64_t Scale = calculateCountScale(MaxCount); |
2299 | SmallVector<unsigned, 4> Weights; |
2300 | for (const auto &ECI : EdgeCounts) |
2301 | Weights.push_back(Elt: scaleBranchCount(Count: ECI, Scale)); |
2302 | |
2303 | LLVM_DEBUG(dbgs() << "Weight is: " ; for (const auto &W |
2304 | : Weights) { |
2305 | dbgs() << W << " " ; |
2306 | } dbgs() << "\n" ;); |
2307 | |
2308 | misexpect::checkExpectAnnotations(I&: *TI, ExistingWeights: Weights, /*IsFrontend=*/false); |
2309 | |
2310 | setBranchWeights(I&: *TI, Weights, /*IsExpected=*/false); |
2311 | if (EmitBranchProbability) { |
2312 | std::string BrCondStr = getBranchCondString(TI); |
2313 | if (BrCondStr.empty()) |
2314 | return; |
2315 | |
2316 | uint64_t WSum = |
2317 | std::accumulate(first: Weights.begin(), last: Weights.end(), init: (uint64_t)0, |
2318 | binary_op: [](uint64_t w1, uint64_t w2) { return w1 + w2; }); |
2319 | uint64_t TotalCount = |
2320 | std::accumulate(first: EdgeCounts.begin(), last: EdgeCounts.end(), init: (uint64_t)0, |
2321 | binary_op: [](uint64_t c1, uint64_t c2) { return c1 + c2; }); |
2322 | Scale = calculateCountScale(MaxCount: WSum); |
2323 | BranchProbability BP(scaleBranchCount(Count: Weights[0], Scale), |
2324 | scaleBranchCount(Count: WSum, Scale)); |
2325 | std::string BranchProbStr; |
2326 | raw_string_ostream OS(BranchProbStr); |
2327 | OS << BP; |
2328 | OS << " (total count : " << TotalCount << ")" ; |
2329 | OS.flush(); |
2330 | Function *F = TI->getParent()->getParent(); |
2331 | OptimizationRemarkEmitter ORE(F); |
2332 | ORE.emit(RemarkBuilder: [&]() { |
2333 | return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation" , TI) |
2334 | << BrCondStr << " is true with probability : " << BranchProbStr; |
2335 | }); |
2336 | } |
2337 | } |
2338 | |
2339 | namespace llvm { |
2340 | |
2341 | void (Module *M, Instruction *TI, uint64_t Count) { |
2342 | MDBuilder MDB(M->getContext()); |
2343 | TI->setMetadata(KindID: llvm::LLVMContext::MD_irr_loop, |
2344 | Node: MDB.createIrrLoopHeaderWeight(Weight: Count)); |
2345 | } |
2346 | |
2347 | template <> struct GraphTraits<PGOUseFunc *> { |
2348 | using NodeRef = const BasicBlock *; |
2349 | using ChildIteratorType = const_succ_iterator; |
2350 | using nodes_iterator = pointer_iterator<Function::const_iterator>; |
2351 | |
2352 | static NodeRef getEntryNode(const PGOUseFunc *G) { |
2353 | return &G->getFunc().front(); |
2354 | } |
2355 | |
2356 | static ChildIteratorType child_begin(const NodeRef N) { |
2357 | return succ_begin(BB: N); |
2358 | } |
2359 | |
2360 | static ChildIteratorType child_end(const NodeRef N) { return succ_end(BB: N); } |
2361 | |
2362 | static nodes_iterator nodes_begin(const PGOUseFunc *G) { |
2363 | return nodes_iterator(G->getFunc().begin()); |
2364 | } |
2365 | |
2366 | static nodes_iterator nodes_end(const PGOUseFunc *G) { |
2367 | return nodes_iterator(G->getFunc().end()); |
2368 | } |
2369 | }; |
2370 | |
2371 | template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { |
2372 | explicit DOTGraphTraits(bool isSimple = false) |
2373 | : DefaultDOTGraphTraits(isSimple) {} |
2374 | |
2375 | static std::string getGraphName(const PGOUseFunc *G) { |
2376 | return std::string(G->getFunc().getName()); |
2377 | } |
2378 | |
2379 | std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { |
2380 | std::string Result; |
2381 | raw_string_ostream OS(Result); |
2382 | |
2383 | OS << getSimpleNodeName(Node) << ":\\l" ; |
2384 | PGOUseBBInfo *BI = Graph->findBBInfo(BB: Node); |
2385 | OS << "Count : " ; |
2386 | if (BI && BI->Count) |
2387 | OS << *BI->Count << "\\l" ; |
2388 | else |
2389 | OS << "Unknown\\l" ; |
2390 | |
2391 | if (!PGOInstrSelect) |
2392 | return Result; |
2393 | |
2394 | for (const Instruction &I : *Node) { |
2395 | if (!isa<SelectInst>(Val: &I)) |
2396 | continue; |
2397 | // Display scaled counts for SELECT instruction: |
2398 | OS << "SELECT : { T = " ; |
2399 | uint64_t TC, FC; |
2400 | bool HasProf = extractBranchWeights(I, TrueVal&: TC, FalseVal&: FC); |
2401 | if (!HasProf) |
2402 | OS << "Unknown, F = Unknown }\\l" ; |
2403 | else |
2404 | OS << TC << ", F = " << FC << " }\\l" ; |
2405 | } |
2406 | return Result; |
2407 | } |
2408 | }; |
2409 | |
2410 | } // end namespace llvm |
2411 | |