1//===- llvm-profdata.cpp - LLVM profile data tool -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// llvm-profdata merges .profdata files.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/DenseMap.h"
14#include "llvm/ADT/ScopeExit.h"
15#include "llvm/ADT/SmallSet.h"
16#include "llvm/ADT/SmallVector.h"
17#include "llvm/ADT/StringRef.h"
18#include "llvm/HTTP/HTTPClient.h"
19#include "llvm/IR/LLVMContext.h"
20#include "llvm/Object/Binary.h"
21#include "llvm/ProfileData/DataAccessProf.h"
22#include "llvm/ProfileData/InstrProfCorrelator.h"
23#include "llvm/ProfileData/InstrProfReader.h"
24#include "llvm/ProfileData/InstrProfWriter.h"
25#include "llvm/ProfileData/MemProf.h"
26#include "llvm/ProfileData/MemProfReader.h"
27#include "llvm/ProfileData/MemProfSummaryBuilder.h"
28#include "llvm/ProfileData/MemProfYAML.h"
29#include "llvm/ProfileData/ProfileCommon.h"
30#include "llvm/ProfileData/SampleProfReader.h"
31#include "llvm/ProfileData/SampleProfWriter.h"
32#include "llvm/Support/BalancedPartitioning.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/Discriminator.h"
35#include "llvm/Support/Errc.h"
36#include "llvm/Support/FileSystem.h"
37#include "llvm/Support/Format.h"
38#include "llvm/Support/FormattedStream.h"
39#include "llvm/Support/InitLLVM.h"
40#include "llvm/Support/MD5.h"
41#include "llvm/Support/MemoryBuffer.h"
42#include "llvm/Support/Path.h"
43#include "llvm/Support/Regex.h"
44#include "llvm/Support/ThreadPool.h"
45#include "llvm/Support/Threading.h"
46#include "llvm/Support/VirtualFileSystem.h"
47#include "llvm/Support/WithColor.h"
48#include "llvm/Support/raw_ostream.h"
49#include <algorithm>
50#include <cmath>
51#include <optional>
52
53using namespace llvm;
54using ProfCorrelatorKind = InstrProfCorrelator::ProfCorrelatorKind;
55
56// https://llvm.org/docs/CommandGuide/llvm-profdata.html has documentations
57// on each subcommand.
58cl::SubCommand ShowSubcommand(
59 "show",
60 "Takes a profile data file and displays the profiles. See detailed "
61 "documentation in "
62 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-show");
63cl::SubCommand OrderSubcommand(
64 "order",
65 "Reads temporal profiling traces from a profile and outputs a function "
66 "order that reduces the number of page faults for those traces. See "
67 "detailed documentation in "
68 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-order");
69cl::SubCommand OverlapSubcommand(
70 "overlap",
71 "Computes and displays the overlap between two profiles. See detailed "
72 "documentation in "
73 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-overlap");
74cl::SubCommand MergeSubcommand(
75 "merge",
76 "Takes several profiles and merge them together. See detailed "
77 "documentation in "
78 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge");
79
80namespace {
81enum ProfileKinds { instr, sample, memory };
82enum FailureMode { warnOnly, failIfAnyAreInvalid, failIfAllAreInvalid };
83
84enum ProfileFormat {
85 PF_None = 0,
86 PF_Text,
87 PF_Compact_Binary, // Deprecated
88 PF_Ext_Binary,
89 PF_GCC,
90 PF_Binary
91};
92
93enum class ShowFormat { Text, Json, Yaml };
94} // namespace
95
96// Common options.
97cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
98 cl::init(Val: "-"), cl::desc("Output file"),
99 cl::sub(ShowSubcommand),
100 cl::sub(OrderSubcommand),
101 cl::sub(OverlapSubcommand),
102 cl::sub(MergeSubcommand));
103// NOTE: cl::alias must not have cl::sub(), since aliased option's cl::sub()
104// will be used. llvm::cl::alias::done() method asserts this condition.
105static cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
106 cl::aliasopt(OutputFilename));
107
108// Options common to at least two commands.
109static cl::opt<ProfileKinds> ProfileKind(
110 cl::desc("Profile kind:"), cl::sub(MergeSubcommand),
111 cl::sub(OverlapSubcommand), cl::init(Val: instr),
112 cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
113 clEnumVal(sample, "Sample profile")));
114static cl::opt<std::string> Filename(cl::Positional,
115 cl::desc("<profdata-file>"),
116 cl::sub(ShowSubcommand),
117 cl::sub(OrderSubcommand));
118static cl::opt<unsigned> MaxDbgCorrelationWarnings(
119 "max-debug-info-correlation-warnings",
120 cl::desc("The maximum number of warnings to emit when correlating "
121 "profile from debug info (0 = no limit)"),
122 cl::sub(MergeSubcommand), cl::sub(ShowSubcommand), cl::init(Val: 5));
123static cl::opt<std::string> ProfiledBinary(
124 "profiled-binary", cl::init(Val: ""),
125 cl::desc("Path to binary from which the profile was collected."),
126 cl::sub(ShowSubcommand), cl::sub(MergeSubcommand));
127static cl::opt<std::string> DebugInfoFilename(
128 "debug-info", cl::init(Val: ""),
129 cl::desc(
130 "For show, read and extract profile metadata from debug info and show "
131 "the functions it found. For merge, use the provided debug info to "
132 "correlate the raw profile."),
133 cl::sub(ShowSubcommand), cl::sub(MergeSubcommand));
134static cl::opt<std::string>
135 BinaryFilename("binary-file", cl::init(Val: ""),
136 cl::desc("For merge, use the provided unstripped binary to "
137 "correlate the raw profile."),
138 cl::sub(MergeSubcommand));
139static cl::list<std::string> DebugFileDirectory(
140 "debug-file-directory",
141 cl::desc("Directories to search for object files by build ID"));
142static cl::opt<bool> DebugInfod("debuginfod", cl::init(Val: false), cl::Hidden,
143 cl::sub(MergeSubcommand),
144 cl::desc("Enable debuginfod"));
145static cl::opt<ProfCorrelatorKind> BIDFetcherProfileCorrelate(
146 "correlate",
147 cl::desc("Use debug-info or binary correlation to correlate profiles with "
148 "build id fetcher"),
149 cl::init(Val: InstrProfCorrelator::NONE),
150 cl::values(clEnumValN(InstrProfCorrelator::NONE, "",
151 "No profile correlation"),
152 clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info",
153 "Use debug info to correlate"),
154 clEnumValN(InstrProfCorrelator::BINARY, "binary",
155 "Use binary to correlate")));
156static cl::opt<std::string> FuncNameFilter(
157 "function",
158 cl::desc("Only functions matching the filter are shown in the output. For "
159 "overlapping CSSPGO, this takes a function name with calling "
160 "context."),
161 cl::sub(ShowSubcommand), cl::sub(OverlapSubcommand),
162 cl::sub(MergeSubcommand));
163
164// TODO: Consider creating a template class (e.g., MergeOption, ShowOption) to
165// factor out the common cl::sub in cl::opt constructor for subcommand-specific
166// options.
167
168// Options specific to merge subcommand.
169static cl::list<std::string> InputFilenames(cl::Positional,
170 cl::sub(MergeSubcommand),
171 cl::desc("<filename...>"));
172static cl::list<std::string>
173 WeightedInputFilenames("weighted-input", cl::sub(MergeSubcommand),
174 cl::desc("<weight>,<filename>"));
175static cl::opt<ProfileFormat> OutputFormat(
176 cl::desc("Format of output profile"), cl::sub(MergeSubcommand),
177 cl::init(Val: PF_Ext_Binary),
178 cl::values(clEnumValN(PF_Binary, "binary", "Binary encoding"),
179 clEnumValN(PF_Ext_Binary, "extbinary",
180 "Extensible binary encoding "
181 "(default)"),
182 clEnumValN(PF_Text, "text", "Text encoding"),
183 clEnumValN(PF_GCC, "gcc",
184 "GCC encoding (only meaningful for -sample)")));
185static cl::opt<std::string>
186 InputFilenamesFile("input-files", cl::init(Val: ""), cl::sub(MergeSubcommand),
187 cl::desc("Path to file containing newline-separated "
188 "[<weight>,]<filename> entries"));
189static cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
190 cl::aliasopt(InputFilenamesFile));
191static cl::opt<bool> DumpInputFileList(
192 "dump-input-file-list", cl::init(Val: false), cl::Hidden,
193 cl::sub(MergeSubcommand),
194 cl::desc("Dump the list of input files and their weights, then exit"));
195static cl::opt<std::string> RemappingFile("remapping-file",
196 cl::value_desc("file"),
197 cl::sub(MergeSubcommand),
198 cl::desc("Symbol remapping file"));
199static cl::alias RemappingFileA("r", cl::desc("Alias for --remapping-file"),
200 cl::aliasopt(RemappingFile));
201static cl::opt<bool>
202 UseMD5("use-md5", cl::init(Val: false), cl::Hidden,
203 cl::desc("Choose to use MD5 to represent string in name table (only "
204 "meaningful for -extbinary)"),
205 cl::sub(MergeSubcommand));
206static cl::opt<bool> CompressAllSections(
207 "compress-all-sections", cl::init(Val: false), cl::Hidden,
208 cl::sub(MergeSubcommand),
209 cl::desc("Compress all sections when writing the profile (only "
210 "meaningful for -extbinary)"));
211static cl::opt<bool> SampleMergeColdContext(
212 "sample-merge-cold-context", cl::init(Val: false), cl::Hidden,
213 cl::sub(MergeSubcommand),
214 cl::desc(
215 "Merge context sample profiles whose count is below cold threshold"));
216static cl::opt<bool> SampleTrimColdContext(
217 "sample-trim-cold-context", cl::init(Val: false), cl::Hidden,
218 cl::sub(MergeSubcommand),
219 cl::desc(
220 "Trim context sample profiles whose count is below cold threshold"));
221static cl::opt<uint32_t> SampleColdContextFrameDepth(
222 "sample-frame-depth-for-cold-context", cl::init(Val: 1),
223 cl::sub(MergeSubcommand),
224 cl::desc("Keep the last K frames while merging cold profile. 1 means the "
225 "context-less base profile"));
226static cl::opt<size_t> OutputSizeLimit(
227 "output-size-limit", cl::init(Val: 0), cl::Hidden, cl::sub(MergeSubcommand),
228 cl::desc("Trim cold functions until profile size is below specified "
229 "limit in bytes. This uses a heursitic and functions may be "
230 "excessively trimmed"));
231static cl::opt<bool> GenPartialProfile(
232 "gen-partial-profile", cl::init(Val: false), cl::Hidden,
233 cl::sub(MergeSubcommand),
234 cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
235static cl::opt<bool> SplitLayout(
236 "split-layout", cl::init(Val: false), cl::Hidden, cl::sub(MergeSubcommand),
237 cl::desc("Split the profile to two sections with one containing sample "
238 "profiles with inlined functions and the other without (only "
239 "meaningful for -extbinary)"));
240static cl::opt<std::string> SupplInstrWithSample(
241 "supplement-instr-with-sample", cl::init(Val: ""), cl::Hidden,
242 cl::sub(MergeSubcommand),
243 cl::desc("Supplement an instr profile with sample profile, to correct "
244 "the profile unrepresentativeness issue. The sample "
245 "profile is the input of the flag. Output will be in instr "
246 "format (The flag only works with -instr)"));
247static cl::opt<float> ZeroCounterThreshold(
248 "zero-counter-threshold", cl::init(Val: 0.7), cl::Hidden,
249 cl::sub(MergeSubcommand),
250 cl::desc("For the function which is cold in instr profile but hot in "
251 "sample profile, if the ratio of the number of zero counters "
252 "divided by the total number of counters is above the "
253 "threshold, the profile of the function will be regarded as "
254 "being harmful for performance and will be dropped."));
255static cl::opt<unsigned> SupplMinSizeThreshold(
256 "suppl-min-size-threshold", cl::init(Val: 10), cl::Hidden,
257 cl::sub(MergeSubcommand),
258 cl::desc("If the size of a function is smaller than the threshold, "
259 "assume it can be inlined by PGO early inliner and it won't "
260 "be adjusted based on sample profile."));
261static cl::opt<unsigned> InstrProfColdThreshold(
262 "instr-prof-cold-threshold", cl::init(Val: 0), cl::Hidden,
263 cl::sub(MergeSubcommand),
264 cl::desc("User specified cold threshold for instr profile which will "
265 "override the cold threshold got from profile summary. "));
266// WARNING: This reservoir size value is propagated to any input indexed
267// profiles for simplicity. Changing this value between invocations could
268// result in sample bias.
269static cl::opt<uint64_t> TemporalProfTraceReservoirSize(
270 "temporal-profile-trace-reservoir-size", cl::init(Val: 100),
271 cl::sub(MergeSubcommand),
272 cl::desc("The maximum number of stored temporal profile traces (default: "
273 "100)"));
274static cl::opt<uint64_t> TemporalProfMaxTraceLength(
275 "temporal-profile-max-trace-length", cl::init(Val: 10000),
276 cl::sub(MergeSubcommand),
277 cl::desc("The maximum length of a single temporal profile trace "
278 "(default: 10000)"));
279static cl::opt<std::string> FuncNameNegativeFilter(
280 "no-function", cl::init(Val: ""), cl::sub(MergeSubcommand),
281 cl::desc("Exclude functions matching the filter from the output."));
282
283static cl::opt<FailureMode>
284 FailMode("failure-mode", cl::init(Val: failIfAnyAreInvalid),
285 cl::desc("Failure mode:"), cl::sub(MergeSubcommand),
286 cl::values(clEnumValN(warnOnly, "warn",
287 "Do not fail and just print warnings."),
288 clEnumValN(failIfAnyAreInvalid, "any",
289 "Fail if any profile is invalid."),
290 clEnumValN(failIfAllAreInvalid, "all",
291 "Fail only if all profiles are invalid.")));
292
293static cl::opt<bool> OutputSparse(
294 "sparse", cl::init(Val: false), cl::sub(MergeSubcommand),
295 cl::desc("Generate a sparse profile (only meaningful for -instr)"));
296static cl::opt<unsigned> NumThreads(
297 "num-threads", cl::init(Val: 0), cl::sub(MergeSubcommand),
298 cl::desc("Number of merge threads to use (default: autodetect)"));
299static cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"),
300 cl::aliasopt(NumThreads));
301
302static cl::opt<std::string> ProfileSymbolListFile(
303 "prof-sym-list", cl::init(Val: ""), cl::sub(MergeSubcommand),
304 cl::desc("Path to file containing the list of function symbols "
305 "used to populate profile symbol list"));
306
307static cl::opt<SampleProfileLayout> ProfileLayout(
308 "convert-sample-profile-layout",
309 cl::desc("Convert the generated profile to a profile with a new layout"),
310 cl::sub(MergeSubcommand), cl::init(Val: SPL_None),
311 cl::values(
312 clEnumValN(SPL_Nest, "nest",
313 "Nested profile, the input should be CS flat profile"),
314 clEnumValN(SPL_Flat, "flat",
315 "Profile with nested inlinee flatten out")));
316
317static cl::opt<bool> DropProfileSymbolList(
318 "drop-profile-symbol-list", cl::init(Val: false), cl::Hidden,
319 cl::sub(MergeSubcommand),
320 cl::desc("Drop the profile symbol list when merging AutoFDO profiles "
321 "(only meaningful for -sample)"));
322
323static cl::opt<bool> KeepVTableSymbols(
324 "keep-vtable-symbols", cl::init(Val: false), cl::Hidden,
325 cl::sub(MergeSubcommand),
326 cl::desc("If true, keep the vtable symbols in indexed profiles"));
327
328// Temporary support for writing the previous version of the format, to enable
329// some forward compatibility.
330// TODO: Consider enabling this with future version changes as well, to ease
331// deployment of newer versions of llvm-profdata.
332static cl::opt<bool> DoWritePrevVersion(
333 "write-prev-version", cl::init(Val: false), cl::Hidden,
334 cl::desc("Write the previous version of indexed format, to enable "
335 "some forward compatibility."));
336
337static cl::opt<memprof::IndexedVersion> MemProfVersionRequested(
338 "memprof-version", cl::Hidden, cl::sub(MergeSubcommand),
339 cl::desc("Specify the version of the memprof format to use"),
340 cl::init(Val: memprof::Version3),
341 cl::values(clEnumValN(memprof::Version2, "2", "version 2"),
342 clEnumValN(memprof::Version3, "3", "version 3"),
343 clEnumValN(memprof::Version4, "4", "version 4")));
344
345static cl::opt<bool> MemProfFullSchema(
346 "memprof-full-schema", cl::Hidden, cl::sub(MergeSubcommand),
347 cl::desc("Use the full schema for serialization"), cl::init(Val: false));
348
349static cl::opt<bool>
350 MemprofGenerateRandomHotness("memprof-random-hotness", cl::init(Val: false),
351 cl::Hidden, cl::sub(MergeSubcommand),
352 cl::desc("Generate random hotness values"));
353static cl::opt<unsigned> MemprofGenerateRandomHotnessSeed(
354 "memprof-random-hotness-seed", cl::init(Val: 0), cl::Hidden,
355 cl::sub(MergeSubcommand),
356 cl::desc("Random hotness seed to use (0 to generate new seed)"));
357
358// Options specific to overlap subcommand.
359static cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
360 cl::desc("<base profile file>"),
361 cl::sub(OverlapSubcommand));
362static cl::opt<std::string> TestFilename(cl::Positional, cl::Required,
363 cl::desc("<test profile file>"),
364 cl::sub(OverlapSubcommand));
365
366static cl::opt<unsigned long long> SimilarityCutoff(
367 "similarity-cutoff", cl::init(Val: 0),
368 cl::desc("For sample profiles, list function names (with calling context "
369 "for csspgo) for overlapped functions "
370 "with similarities below the cutoff (percentage times 10000)."),
371 cl::sub(OverlapSubcommand));
372
373static cl::opt<bool> IsCS(
374 "cs", cl::init(Val: false),
375 cl::desc("For context sensitive PGO counts. Does not work with CSSPGO."),
376 cl::sub(OverlapSubcommand));
377
378static cl::opt<unsigned long long> OverlapValueCutoff(
379 "value-cutoff", cl::init(Val: -1),
380 cl::desc(
381 "Function level overlap information for every function (with calling "
382 "context for csspgo) in test "
383 "profile with max count value greater than the parameter value"),
384 cl::sub(OverlapSubcommand));
385
386// Options specific to show subcommand.
387static cl::opt<bool>
388 ShowCounts("counts", cl::init(Val: false),
389 cl::desc("Show counter values for shown functions"),
390 cl::sub(ShowSubcommand));
391static cl::opt<ShowFormat>
392 SFormat("show-format", cl::init(Val: ShowFormat::Text),
393 cl::desc("Emit output in the selected format if supported"),
394 cl::sub(ShowSubcommand),
395 cl::values(clEnumValN(ShowFormat::Text, "text",
396 "emit normal text output (default)"),
397 clEnumValN(ShowFormat::Json, "json", "emit JSON"),
398 clEnumValN(ShowFormat::Yaml, "yaml", "emit YAML")));
399// TODO: Consider replacing this with `--show-format=text-encoding`.
400static cl::opt<bool>
401 TextFormat("text", cl::init(Val: false),
402 cl::desc("Show instr profile data in text dump format"),
403 cl::sub(ShowSubcommand));
404static cl::opt<bool>
405 JsonFormat("json",
406 cl::desc("Show sample profile data in the JSON format "
407 "(deprecated, please use --show-format=json)"),
408 cl::sub(ShowSubcommand));
409static cl::opt<bool> ShowIndirectCallTargets(
410 "ic-targets", cl::init(Val: false),
411 cl::desc("Show indirect call site target values for shown functions"),
412 cl::sub(ShowSubcommand));
413static cl::opt<bool>
414 ShowVTables("show-vtables", cl::init(Val: false),
415 cl::desc("Show vtable names for shown functions"),
416 cl::sub(ShowSubcommand));
417static cl::opt<bool> ShowMemOPSizes(
418 "memop-sizes", cl::init(Val: false),
419 cl::desc("Show the profiled sizes of the memory intrinsic calls "
420 "for shown functions"),
421 cl::sub(ShowSubcommand));
422static cl::opt<bool>
423 ShowDetailedSummary("detailed-summary", cl::init(Val: false),
424 cl::desc("Show detailed profile summary"),
425 cl::sub(ShowSubcommand));
426static cl::list<uint32_t> DetailedSummaryCutoffs(
427 cl::CommaSeparated, "detailed-summary-cutoffs",
428 cl::desc(
429 "Cutoff percentages (times 10000) for generating detailed summary"),
430 cl::value_desc("800000,901000,999999"), cl::sub(ShowSubcommand));
431static cl::opt<bool>
432 ShowHotFuncList("hot-func-list", cl::init(Val: false),
433 cl::desc("Show profile summary of a list of hot functions"),
434 cl::sub(ShowSubcommand));
435static cl::opt<bool>
436 ShowAllFunctions("all-functions", cl::init(Val: false),
437 cl::desc("Details for each and every function"),
438 cl::sub(ShowSubcommand));
439static cl::opt<bool> ShowCS("showcs", cl::init(Val: false),
440 cl::desc("Show context sensitive counts"),
441 cl::sub(ShowSubcommand));
442static cl::opt<ProfileKinds> ShowProfileKind(
443 cl::desc("Profile kind supported by show:"), cl::sub(ShowSubcommand),
444 cl::init(Val: instr),
445 cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
446 clEnumVal(sample, "Sample profile"),
447 clEnumVal(memory, "MemProf memory access profile")));
448static cl::opt<uint32_t> TopNFunctions(
449 "topn", cl::init(Val: 0),
450 cl::desc("Show the list of functions with the largest internal counts"),
451 cl::sub(ShowSubcommand));
452static cl::opt<uint32_t> ShowValueCutoff(
453 "value-cutoff", cl::init(Val: 0),
454 cl::desc("Set the count value cutoff. Functions with the maximum count "
455 "less than this value will not be printed out. (Default is 0)"),
456 cl::sub(ShowSubcommand));
457static cl::opt<bool> OnlyListBelow(
458 "list-below-cutoff", cl::init(Val: false),
459 cl::desc("Only output names of functions whose max count values are "
460 "below the cutoff value"),
461 cl::sub(ShowSubcommand));
462static cl::opt<bool> ShowProfileSymbolList(
463 "show-prof-sym-list", cl::init(Val: false),
464 cl::desc("Show profile symbol list if it exists in the profile. "),
465 cl::sub(ShowSubcommand));
466static cl::opt<bool> ShowSectionInfoOnly(
467 "show-sec-info-only", cl::init(Val: false),
468 cl::desc("Show the information of each section in the sample profile. "
469 "The flag is only usable when the sample profile is in "
470 "extbinary format"),
471 cl::sub(ShowSubcommand));
472static cl::opt<bool> ShowBinaryIds("binary-ids", cl::init(Val: false),
473 cl::desc("Show binary ids in the profile. "),
474 cl::sub(ShowSubcommand));
475static cl::opt<bool> ShowTemporalProfTraces(
476 "temporal-profile-traces",
477 cl::desc("Show temporal profile traces in the profile."),
478 cl::sub(ShowSubcommand));
479
480static cl::opt<bool>
481 ShowCovered("covered", cl::init(Val: false),
482 cl::desc("Show only the functions that have been executed."),
483 cl::sub(ShowSubcommand));
484
485static cl::opt<bool> ShowProfileVersion("profile-version", cl::init(Val: false),
486 cl::desc("Show profile version. "),
487 cl::sub(ShowSubcommand));
488
489// Options specific to order subcommand.
490static cl::opt<unsigned>
491 NumTestTraces("num-test-traces", cl::init(Val: 0),
492 cl::desc("Keep aside the last <num-test-traces> traces in "
493 "the profile when computing the function order and "
494 "instead use them to evaluate that order"),
495 cl::sub(OrderSubcommand));
496
497// We use this string to indicate that there are
498// multiple static functions map to the same name.
499const std::string DuplicateNameStr = "----";
500
501static void warn(Twine Message, StringRef Whence = "", StringRef Hint = "") {
502 WithColor::warning();
503 if (!Whence.empty())
504 errs() << Whence << ": ";
505 errs() << Message << "\n";
506 if (!Hint.empty())
507 WithColor::note() << Hint << "\n";
508}
509
510static void warn(Error E, StringRef Whence = "") {
511 if (E.isA<InstrProfError>()) {
512 handleAllErrors(E: std::move(E), Handlers: [&](const InstrProfError &IPE) {
513 warn(Message: IPE.message(), Whence);
514 });
515 }
516}
517
518static void exitWithError(Twine Message, StringRef Whence = "",
519 StringRef Hint = "") {
520 WithColor::error();
521 if (!Whence.empty())
522 errs() << Whence << ": ";
523 errs() << Message << "\n";
524 if (!Hint.empty())
525 WithColor::note() << Hint << "\n";
526 ::exit(status: 1);
527}
528
529static void exitWithError(Error E, StringRef Whence = "") {
530 if (E.isA<InstrProfError>()) {
531 handleAllErrors(E: std::move(E), Handlers: [&](const InstrProfError &IPE) {
532 instrprof_error instrError = IPE.get();
533 StringRef Hint = "";
534 if (instrError == instrprof_error::unrecognized_format) {
535 // Hint in case user missed specifying the profile type.
536 Hint = "Perhaps you forgot to use the --sample or --memory option?";
537 }
538 exitWithError(Message: IPE.message(), Whence, Hint);
539 });
540 return;
541 }
542
543 exitWithError(Message: toString(E: std::move(E)), Whence);
544}
545
546static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
547 exitWithError(Message: EC.message(), Whence);
548}
549
550static void warnOrExitGivenError(FailureMode FailMode, std::error_code EC,
551 StringRef Whence = "") {
552 if (FailMode == failIfAnyAreInvalid)
553 exitWithErrorCode(EC, Whence);
554 else
555 warn(Message: EC.message(), Whence);
556}
557
558static void handleMergeWriterError(Error E, StringRef WhenceFile = "",
559 StringRef WhenceFunction = "",
560 bool ShowHint = true) {
561 if (!WhenceFile.empty())
562 errs() << WhenceFile << ": ";
563 if (!WhenceFunction.empty())
564 errs() << WhenceFunction << ": ";
565
566 auto IPE = instrprof_error::success;
567 E = handleErrors(E: std::move(E),
568 Hs: [&IPE](std::unique_ptr<InstrProfError> E) -> Error {
569 IPE = E->get();
570 return Error(std::move(E));
571 });
572 errs() << toString(E: std::move(E)) << "\n";
573
574 if (ShowHint) {
575 StringRef Hint = "";
576 if (IPE != instrprof_error::success) {
577 switch (IPE) {
578 case instrprof_error::hash_mismatch:
579 case instrprof_error::count_mismatch:
580 case instrprof_error::value_site_count_mismatch:
581 Hint = "Make sure that all profile data to be merged is generated "
582 "from the same binary.";
583 break;
584 default:
585 break;
586 }
587 }
588
589 if (!Hint.empty())
590 errs() << Hint << "\n";
591 }
592}
593
594namespace {
595/// A remapper from original symbol names to new symbol names based on a file
596/// containing a list of mappings from old name to new name.
597class SymbolRemapper {
598 std::unique_ptr<MemoryBuffer> File;
599 DenseMap<StringRef, StringRef> RemappingTable;
600
601public:
602 /// Build a SymbolRemapper from a file containing a list of old/new symbols.
603 static std::unique_ptr<SymbolRemapper> create(StringRef InputFile) {
604 auto BufOrError = MemoryBuffer::getFileOrSTDIN(Filename: InputFile);
605 if (!BufOrError)
606 exitWithErrorCode(EC: BufOrError.getError(), Whence: InputFile);
607
608 auto Remapper = std::make_unique<SymbolRemapper>();
609 Remapper->File = std::move(BufOrError.get());
610
611 for (line_iterator LineIt(*Remapper->File, /*SkipBlanks=*/true, '#');
612 !LineIt.is_at_eof(); ++LineIt) {
613 std::pair<StringRef, StringRef> Parts = LineIt->split(Separator: ' ');
614 if (Parts.first.empty() || Parts.second.empty() ||
615 Parts.second.count(C: ' ')) {
616 exitWithError(Message: "unexpected line in remapping file",
617 Whence: (InputFile + ":" + Twine(LineIt.line_number())).str(),
618 Hint: "expected 'old_symbol new_symbol'");
619 }
620 Remapper->RemappingTable.insert(KV: Parts);
621 }
622 return Remapper;
623 }
624
625 /// Attempt to map the given old symbol into a new symbol.
626 ///
627 /// \return The new symbol, or \p Name if no such symbol was found.
628 StringRef operator()(StringRef Name) {
629 StringRef New = RemappingTable.lookup(Val: Name);
630 return New.empty() ? Name : New;
631 }
632
633 FunctionId operator()(FunctionId Name) {
634 // MD5 name cannot be remapped.
635 if (!Name.isStringRef())
636 return Name;
637 StringRef New = RemappingTable.lookup(Val: Name.stringRef());
638 return New.empty() ? Name : FunctionId(New);
639 }
640};
641}
642
643struct WeightedFile {
644 std::string Filename;
645 uint64_t Weight;
646};
647typedef SmallVector<WeightedFile, 5> WeightedFileVector;
648
649/// Keep track of merged data and reported errors.
650struct WriterContext {
651 std::mutex Lock;
652 InstrProfWriter Writer;
653 std::vector<std::pair<Error, std::string>> Errors;
654 std::mutex &ErrLock;
655 SmallSet<instrprof_error, 4> &WriterErrorCodes;
656
657 WriterContext(bool IsSparse, std::mutex &ErrLock,
658 SmallSet<instrprof_error, 4> &WriterErrorCodes,
659 uint64_t ReservoirSize = 0, uint64_t MaxTraceLength = 0)
660 : Writer(IsSparse, ReservoirSize, MaxTraceLength, DoWritePrevVersion,
661 MemProfVersionRequested, MemProfFullSchema,
662 MemprofGenerateRandomHotness, MemprofGenerateRandomHotnessSeed),
663 ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {}
664};
665
666/// Computer the overlap b/w profile BaseFilename and TestFileName,
667/// and store the program level result to Overlap.
668static void overlapInput(const std::string &BaseFilename,
669 const std::string &TestFilename, WriterContext *WC,
670 OverlapStats &Overlap,
671 const OverlapFuncFilters &FuncFilter,
672 raw_fd_ostream &OS, bool IsCS) {
673 auto FS = vfs::getRealFileSystem();
674 auto ReaderOrErr = InstrProfReader::create(Path: TestFilename, FS&: *FS);
675 if (Error E = ReaderOrErr.takeError()) {
676 // Skip the empty profiles by returning sliently.
677 auto [ErrorCode, Msg] = InstrProfError::take(E: std::move(E));
678 if (ErrorCode != instrprof_error::empty_raw_profile)
679 WC->Errors.emplace_back(args: make_error<InstrProfError>(Args&: ErrorCode, Args&: Msg),
680 args: TestFilename);
681 return;
682 }
683
684 auto Reader = std::move(ReaderOrErr.get());
685 for (auto &I : *Reader) {
686 OverlapStats FuncOverlap(OverlapStats::FunctionLevel);
687 FuncOverlap.setFuncInfo(Name: I.Name, Hash: I.Hash);
688
689 WC->Writer.overlapRecord(Other: std::move(I), Overlap, FuncLevelOverlap&: FuncOverlap, FuncFilter);
690 FuncOverlap.dump(OS);
691 }
692}
693
694/// Load an input into a writer context.
695static void
696loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
697 const InstrProfCorrelator *Correlator, const StringRef ProfiledBinary,
698 WriterContext *WC, const object::BuildIDFetcher *BIDFetcher = nullptr,
699 const ProfCorrelatorKind *BIDFetcherCorrelatorKind = nullptr) {
700 std::unique_lock<std::mutex> CtxGuard{WC->Lock};
701
702 // Copy the filename, because llvm::ThreadPool copied the input "const
703 // WeightedFile &" by value, making a reference to the filename within it
704 // invalid outside of this packaged task.
705 std::string Filename = Input.Filename;
706
707 using ::llvm::memprof::RawMemProfReader;
708 if (RawMemProfReader::hasFormat(Path: Input.Filename)) {
709 auto ReaderOrErr = RawMemProfReader::create(Path: Input.Filename, ProfiledBinary);
710 if (!ReaderOrErr) {
711 exitWithError(E: ReaderOrErr.takeError(), Whence: Input.Filename);
712 }
713 std::unique_ptr<RawMemProfReader> Reader = std::move(ReaderOrErr.get());
714 // Check if the profile types can be merged, e.g. clang frontend profiles
715 // should not be merged with memprof profiles.
716 if (Error E = WC->Writer.mergeProfileKind(Other: Reader->getProfileKind())) {
717 consumeError(Err: std::move(E));
718 WC->Errors.emplace_back(
719 args: make_error<StringError>(
720 Args: "Cannot merge MemProf profile with Clang generated profile.",
721 Args: std::error_code()),
722 args&: Filename);
723 return;
724 }
725
726 auto MemProfError = [&](Error E) {
727 auto [ErrorCode, Msg] = InstrProfError::take(E: std::move(E));
728 WC->Errors.emplace_back(args: make_error<InstrProfError>(Args&: ErrorCode, Args&: Msg),
729 args&: Filename);
730 };
731
732 WC->Writer.addMemProfData(Incoming: Reader->takeMemProfData(), Warn: MemProfError);
733 return;
734 }
735
736 using ::llvm::memprof::YAMLMemProfReader;
737 if (YAMLMemProfReader::hasFormat(Path: Input.Filename)) {
738 auto ReaderOrErr = YAMLMemProfReader::create(Path: Input.Filename);
739 if (!ReaderOrErr)
740 exitWithError(E: ReaderOrErr.takeError(), Whence: Input.Filename);
741 std::unique_ptr<YAMLMemProfReader> Reader = std::move(ReaderOrErr.get());
742 // Check if the profile types can be merged, e.g. clang frontend profiles
743 // should not be merged with memprof profiles.
744 if (Error E = WC->Writer.mergeProfileKind(Other: Reader->getProfileKind())) {
745 consumeError(Err: std::move(E));
746 WC->Errors.emplace_back(
747 args: make_error<StringError>(
748 Args: "Cannot merge MemProf profile with incompatible profile.",
749 Args: std::error_code()),
750 args&: Filename);
751 return;
752 }
753
754 auto MemProfError = [&](Error E) {
755 auto [ErrorCode, Msg] = InstrProfError::take(E: std::move(E));
756 WC->Errors.emplace_back(args: make_error<InstrProfError>(Args&: ErrorCode, Args&: Msg),
757 args&: Filename);
758 };
759
760 auto MemProfData = Reader->takeMemProfData();
761
762 auto DataAccessProfData = Reader->takeDataAccessProfData();
763
764 // Check for the empty input in case the YAML file is invalid.
765 if (MemProfData.Records.empty() &&
766 (!DataAccessProfData || DataAccessProfData->empty())) {
767 WC->Errors.emplace_back(
768 args: make_error<StringError>(Args: "The profile is empty.", Args: std::error_code()),
769 args&: Filename);
770 }
771
772 WC->Writer.addMemProfData(Incoming: std::move(MemProfData), Warn: MemProfError);
773 WC->Writer.addDataAccessProfData(DataAccessProfile: std::move(DataAccessProfData));
774 return;
775 }
776
777 auto FS = vfs::getRealFileSystem();
778 // TODO: This only saves the first non-fatal error from InstrProfReader, and
779 // then added to WriterContext::Errors. However, this is not extensible, if
780 // we have more non-fatal errors from InstrProfReader in the future. How
781 // should this interact with different -failure-mode?
782 std::optional<std::pair<Error, std::string>> ReaderWarning;
783 llvm::scope_exit ReaderWarningScope([&] {
784 // If we hit a different error we may still have an error in ReaderWarning.
785 // Consume it now to avoid an assert
786 if (ReaderWarning)
787 consumeError(Err: std::move(ReaderWarning->first));
788 });
789 auto Warn = [&](Error E) {
790 if (ReaderWarning) {
791 consumeError(Err: std::move(E));
792 return;
793 }
794 // Only show the first time an error occurs in this file.
795 auto [ErrCode, Msg] = InstrProfError::take(E: std::move(E));
796 ReaderWarning = {make_error<InstrProfError>(Args&: ErrCode, Args&: Msg), Filename};
797 };
798
799 const ProfCorrelatorKind CorrelatorKind = BIDFetcherCorrelatorKind
800 ? *BIDFetcherCorrelatorKind
801 : ProfCorrelatorKind::NONE;
802 auto ReaderOrErr = InstrProfReader::create(Path: Input.Filename, FS&: *FS, Correlator,
803 BIDFetcher, BIDFetcherCorrelatorKind: CorrelatorKind, Warn);
804 if (Error E = ReaderOrErr.takeError()) {
805 // Skip the empty profiles by returning silently.
806 auto [ErrCode, Msg] = InstrProfError::take(E: std::move(E));
807 if (ErrCode != instrprof_error::empty_raw_profile)
808 WC->Errors.emplace_back(args: make_error<InstrProfError>(Args&: ErrCode, Args&: Msg),
809 args&: Filename);
810 return;
811 }
812
813 auto Reader = std::move(ReaderOrErr.get());
814 if (Error E = WC->Writer.mergeProfileKind(Other: Reader->getProfileKind())) {
815 consumeError(Err: std::move(E));
816 WC->Errors.emplace_back(
817 args: make_error<StringError>(
818 Args: "Merge IR generated profile with Clang generated profile.",
819 Args: std::error_code()),
820 args&: Filename);
821 return;
822 }
823
824 for (auto &I : *Reader) {
825 if (Remapper)
826 I.Name = (*Remapper)(I.Name);
827 const StringRef FuncName = I.Name;
828 bool Reported = false;
829
830 WC->Writer.addRecord(I: std::move(I), Weight: Input.Weight, Warn: [&](Error E) {
831 if (Reported) {
832 consumeError(Err: std::move(E));
833 return;
834 }
835 Reported = true;
836 // Only show hint the first time an error occurs.
837 auto [ErrCode, Msg] = InstrProfError::take(E: std::move(E));
838 std::unique_lock<std::mutex> ErrGuard{WC->ErrLock};
839 bool firstTime = WC->WriterErrorCodes.insert(V: ErrCode).second;
840 handleMergeWriterError(E: make_error<InstrProfError>(Args&: ErrCode, Args&: Msg),
841 WhenceFile: Input.Filename, WhenceFunction: FuncName, ShowHint: firstTime);
842 });
843 }
844
845 if (KeepVTableSymbols) {
846 const InstrProfSymtab &symtab = Reader->getSymtab();
847 const auto &VTableNames = symtab.getVTableNames();
848
849 for (const auto &kv : VTableNames)
850 WC->Writer.addVTableName(VTableName: kv.getKey());
851 }
852
853 if (Reader->hasTemporalProfile()) {
854 auto &Traces = Reader->getTemporalProfTraces(Weight: Input.Weight);
855 if (!Traces.empty())
856 WC->Writer.addTemporalProfileTraces(
857 SrcTraces&: Traces, SrcStreamSize: Reader->getTemporalProfTraceStreamSize());
858 }
859 if (Reader->hasError()) {
860 if (Error E = Reader->getError()) {
861 WC->Errors.emplace_back(args: std::move(E), args&: Filename);
862 return;
863 }
864 }
865
866 std::vector<llvm::object::BuildID> BinaryIds;
867 if (Error E = Reader->readBinaryIds(BinaryIds)) {
868 WC->Errors.emplace_back(args: std::move(E), args&: Filename);
869 return;
870 }
871 WC->Writer.addBinaryIds(BIs: BinaryIds);
872
873 if (ReaderWarning) {
874 WC->Errors.emplace_back(args: std::move(ReaderWarning->first),
875 args&: ReaderWarning->second);
876 }
877}
878
879/// Merge the \p Src writer context into \p Dst.
880static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
881 for (auto &ErrorPair : Src->Errors)
882 Dst->Errors.push_back(x: std::move(ErrorPair));
883 Src->Errors.clear();
884
885 if (Error E = Dst->Writer.mergeProfileKind(Other: Src->Writer.getProfileKind()))
886 exitWithError(E: std::move(E));
887
888 Dst->Writer.mergeRecordsFromWriter(IPW: std::move(Src->Writer), Warn: [&](Error E) {
889 auto [ErrorCode, Msg] = InstrProfError::take(E: std::move(E));
890 std::unique_lock<std::mutex> ErrGuard{Dst->ErrLock};
891 bool firstTime = Dst->WriterErrorCodes.insert(V: ErrorCode).second;
892 if (firstTime)
893 warn(Message: toString(E: make_error<InstrProfError>(Args&: ErrorCode, Args&: Msg)));
894 });
895}
896
897static StringRef
898getFuncName(const StringMap<InstrProfWriter::ProfilingData>::value_type &Val) {
899 return Val.first();
900}
901
902static std::string
903getFuncName(const SampleProfileMap::value_type &Val) {
904 return Val.second.getContext().toString();
905}
906
907template <typename T>
908static void filterFunctions(T &ProfileMap) {
909 bool hasFilter = !FuncNameFilter.empty();
910 bool hasNegativeFilter = !FuncNameNegativeFilter.empty();
911 if (!hasFilter && !hasNegativeFilter)
912 return;
913
914 // If filter starts with '?' it is MSVC mangled name, not a regex.
915 llvm::Regex ProbablyMSVCMangledName("[?@$_0-9A-Za-z]+");
916 if (hasFilter && FuncNameFilter[0] == '?' &&
917 ProbablyMSVCMangledName.match(String: FuncNameFilter))
918 FuncNameFilter = llvm::Regex::escape(String: FuncNameFilter);
919 if (hasNegativeFilter && FuncNameNegativeFilter[0] == '?' &&
920 ProbablyMSVCMangledName.match(String: FuncNameNegativeFilter))
921 FuncNameNegativeFilter = llvm::Regex::escape(String: FuncNameNegativeFilter);
922
923 size_t Count = ProfileMap.size();
924 llvm::Regex Pattern(FuncNameFilter);
925 llvm::Regex NegativePattern(FuncNameNegativeFilter);
926 std::string Error;
927 if (hasFilter && !Pattern.isValid(Error))
928 exitWithError(Message: Error);
929 if (hasNegativeFilter && !NegativePattern.isValid(Error))
930 exitWithError(Message: Error);
931
932 // Handle MD5 profile, so it is still able to match using the original name.
933 std::string MD5Name = std::to_string(val: llvm::MD5Hash(Str: FuncNameFilter));
934 std::string NegativeMD5Name =
935 std::to_string(val: llvm::MD5Hash(Str: FuncNameNegativeFilter));
936
937 ProfileMap.remove_if([&](const auto &Entry) {
938 const auto &FuncName = getFuncName(Entry);
939 // Negative filter has higher precedence than positive filter.
940 return (hasNegativeFilter &&
941 (NegativePattern.match(String: FuncName) ||
942 (FunctionSamples::UseMD5 && NegativeMD5Name == FuncName))) ||
943 (hasFilter && !(Pattern.match(String: FuncName) ||
944 (FunctionSamples::UseMD5 && MD5Name == FuncName)));
945 });
946
947 llvm::dbgs() << Count - ProfileMap.size() << " of " << Count << " functions "
948 << "in the original profile are filtered.\n";
949}
950
951static void writeInstrProfile(StringRef OutputFilename,
952 ProfileFormat OutputFormat,
953 InstrProfWriter &Writer) {
954 std::error_code EC;
955 raw_fd_ostream Output(OutputFilename.data(), EC,
956 OutputFormat == PF_Text ? sys::fs::OF_TextWithCRLF
957 : sys::fs::OF_None);
958 if (EC)
959 exitWithErrorCode(EC, Whence: OutputFilename);
960
961 if (OutputFormat == PF_Text) {
962 if (Error E = Writer.writeText(OS&: Output))
963 warn(E: std::move(E));
964 } else {
965 if (Output.is_displayed())
966 exitWithError(Message: "cannot write a non-text format profile to the terminal");
967 if (Error E = Writer.write(OS&: Output))
968 warn(E: std::move(E));
969 }
970}
971
972static void mergeInstrProfile(const WeightedFileVector &Inputs,
973 SymbolRemapper *Remapper,
974 int MaxDbgCorrelationWarnings,
975 const StringRef ProfiledBinary) {
976 const uint64_t TraceReservoirSize = TemporalProfTraceReservoirSize.getValue();
977 const uint64_t MaxTraceLength = TemporalProfMaxTraceLength.getValue();
978 if (OutputFormat == PF_Compact_Binary)
979 exitWithError(Message: "Compact Binary is deprecated");
980 if (OutputFormat != PF_Binary && OutputFormat != PF_Ext_Binary &&
981 OutputFormat != PF_Text)
982 exitWithError(Message: "unknown format is specified");
983
984 // TODO: Maybe we should support correlation with mixture of different
985 // correlation modes(w/wo debug-info/object correlation).
986 if (DebugInfoFilename.empty()) {
987 if (!BinaryFilename.empty() && (DebugInfod || !DebugFileDirectory.empty()))
988 exitWithError(Message: "Expected only one of -binary-file, -debuginfod or "
989 "-debug-file-directory");
990 } else if (!BinaryFilename.empty() || DebugInfod ||
991 !DebugFileDirectory.empty()) {
992 exitWithError(Message: "Expected only one of -debug-info, -binary-file, -debuginfod "
993 "or -debug-file-directory");
994 }
995 std::string CorrelateFilename;
996 ProfCorrelatorKind CorrelateKind = ProfCorrelatorKind::NONE;
997 if (!DebugInfoFilename.empty()) {
998 CorrelateFilename = DebugInfoFilename;
999 CorrelateKind = ProfCorrelatorKind::DEBUG_INFO;
1000 } else if (!BinaryFilename.empty()) {
1001 CorrelateFilename = BinaryFilename;
1002 CorrelateKind = ProfCorrelatorKind::BINARY;
1003 }
1004
1005 std::unique_ptr<InstrProfCorrelator> Correlator;
1006 if (CorrelateKind != InstrProfCorrelator::NONE) {
1007 if (auto Err = InstrProfCorrelator::get(Filename: CorrelateFilename, FileKind: CorrelateKind)
1008 .moveInto(Value&: Correlator))
1009 exitWithError(E: std::move(Err), Whence: CorrelateFilename);
1010 if (auto Err = Correlator->correlateProfileData(MaxWarnings: MaxDbgCorrelationWarnings))
1011 exitWithError(E: std::move(Err), Whence: CorrelateFilename);
1012 }
1013
1014 ProfCorrelatorKind BIDFetcherCorrelateKind = ProfCorrelatorKind::NONE;
1015 std::unique_ptr<object::BuildIDFetcher> BIDFetcher;
1016 if (DebugInfod) {
1017 llvm::HTTPClient::initialize();
1018 BIDFetcher = std::make_unique<DebuginfodFetcher>(args&: DebugFileDirectory);
1019 if (!BIDFetcherProfileCorrelate)
1020 exitWithError(Message: "Expected --correlate when --debuginfod is provided");
1021 BIDFetcherCorrelateKind = BIDFetcherProfileCorrelate;
1022 } else if (!DebugFileDirectory.empty()) {
1023 BIDFetcher = std::make_unique<object::BuildIDFetcher>(args&: DebugFileDirectory);
1024 if (!BIDFetcherProfileCorrelate)
1025 exitWithError(Message: "Expected --correlate when --debug-file-directory "
1026 "is provided");
1027 BIDFetcherCorrelateKind = BIDFetcherProfileCorrelate;
1028 } else if (BIDFetcherProfileCorrelate) {
1029 exitWithError(Message: "Expected --debuginfod or --debug-file-directory when "
1030 "--correlate is provided");
1031 }
1032
1033 std::mutex ErrorLock;
1034 SmallSet<instrprof_error, 4> WriterErrorCodes;
1035
1036 // If NumThreads is not specified, auto-detect a good default.
1037 if (NumThreads == 0)
1038 NumThreads = std::min(a: hardware_concurrency().compute_thread_count(),
1039 b: unsigned((Inputs.size() + 1) / 2));
1040
1041 // Initialize the writer contexts.
1042 SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;
1043 for (unsigned I = 0; I < NumThreads; ++I)
1044 Contexts.emplace_back(Args: std::make_unique<WriterContext>(
1045 args&: OutputSparse, args&: ErrorLock, args&: WriterErrorCodes, args: TraceReservoirSize,
1046 args: MaxTraceLength));
1047
1048 if (NumThreads == 1) {
1049 for (const auto &Input : Inputs)
1050 loadInput(Input, Remapper, Correlator: Correlator.get(), ProfiledBinary,
1051 WC: Contexts[0].get(), BIDFetcher: BIDFetcher.get(), BIDFetcherCorrelatorKind: &BIDFetcherCorrelateKind);
1052 } else {
1053 DefaultThreadPool Pool(hardware_concurrency(ThreadCount: NumThreads));
1054
1055 // Load the inputs in parallel (N/NumThreads serial steps).
1056 unsigned Ctx = 0;
1057 for (const auto &Input : Inputs) {
1058 Pool.async(F&: loadInput, ArgList: Input, ArgList&: Remapper, ArgList: Correlator.get(), ArgList: ProfiledBinary,
1059 ArgList: Contexts[Ctx].get(), ArgList: BIDFetcher.get(),
1060 ArgList: &BIDFetcherCorrelateKind);
1061 Ctx = (Ctx + 1) % NumThreads;
1062 }
1063 Pool.wait();
1064
1065 // Merge the writer contexts together (~ lg(NumThreads) serial steps).
1066 unsigned Mid = Contexts.size() / 2;
1067 unsigned End = Contexts.size();
1068 assert(Mid > 0 && "Expected more than one context");
1069 do {
1070 for (unsigned I = 0; I < Mid; ++I)
1071 Pool.async(F&: mergeWriterContexts, ArgList: Contexts[I].get(),
1072 ArgList: Contexts[I + Mid].get());
1073 Pool.wait();
1074 if (End & 1) {
1075 Pool.async(F&: mergeWriterContexts, ArgList: Contexts[0].get(),
1076 ArgList: Contexts[End - 1].get());
1077 Pool.wait();
1078 }
1079 End = Mid;
1080 Mid /= 2;
1081 } while (Mid > 0);
1082 }
1083
1084 // Handle deferred errors encountered during merging. If the number of errors
1085 // is equal to the number of inputs the merge failed.
1086 unsigned NumErrors = 0;
1087 for (std::unique_ptr<WriterContext> &WC : Contexts) {
1088 for (auto &ErrorPair : WC->Errors) {
1089 ++NumErrors;
1090 warn(Message: toString(E: std::move(ErrorPair.first)), Whence: ErrorPair.second);
1091 }
1092 }
1093 if ((NumErrors == Inputs.size() && FailMode == failIfAllAreInvalid) ||
1094 (NumErrors > 0 && FailMode == failIfAnyAreInvalid))
1095 exitWithError(Message: "no profile can be merged");
1096
1097 filterFunctions(ProfileMap&: Contexts[0]->Writer.getProfileData());
1098
1099 writeInstrProfile(OutputFilename, OutputFormat, Writer&: Contexts[0]->Writer);
1100}
1101
1102/// The profile entry for a function in instrumentation profile.
1103struct InstrProfileEntry {
1104 uint64_t MaxCount = 0;
1105 uint64_t NumEdgeCounters = 0;
1106 float ZeroCounterRatio = 0.0;
1107 InstrProfRecord *ProfRecord;
1108 InstrProfileEntry(InstrProfRecord *Record);
1109 InstrProfileEntry() = default;
1110};
1111
1112InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) {
1113 ProfRecord = Record;
1114 uint64_t CntNum = Record->Counts.size();
1115 uint64_t ZeroCntNum = 0;
1116 for (size_t I = 0; I < CntNum; ++I) {
1117 MaxCount = std::max(a: MaxCount, b: Record->Counts[I]);
1118 ZeroCntNum += !Record->Counts[I];
1119 }
1120 ZeroCounterRatio = (float)ZeroCntNum / CntNum;
1121 NumEdgeCounters = CntNum;
1122}
1123
1124/// Either set all the counters in the instr profile entry \p IFE to
1125/// -1 / -2 /in order to drop the profile or scale up the
1126/// counters in \p IFP to be above hot / cold threshold. We use
1127/// the ratio of zero counters in the profile of a function to
1128/// decide the profile is helpful or harmful for performance,
1129/// and to choose whether to scale up or drop it.
1130static void updateInstrProfileEntry(InstrProfileEntry &IFE, bool SetToHot,
1131 uint64_t HotInstrThreshold,
1132 uint64_t ColdInstrThreshold,
1133 float ZeroCounterThreshold) {
1134 InstrProfRecord *ProfRecord = IFE.ProfRecord;
1135 if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) {
1136 // If all or most of the counters of the function are zero, the
1137 // profile is unaccountable and should be dropped. Reset all the
1138 // counters to be -1 / -2 and PGO profile-use will drop the profile.
1139 // All counters being -1 also implies that the function is hot so
1140 // PGO profile-use will also set the entry count metadata to be
1141 // above hot threshold.
1142 // All counters being -2 implies that the function is warm so
1143 // PGO profile-use will also set the entry count metadata to be
1144 // above cold threshold.
1145 auto Kind =
1146 (SetToHot ? InstrProfRecord::PseudoHot : InstrProfRecord::PseudoWarm);
1147 ProfRecord->setPseudoCount(Kind);
1148 return;
1149 }
1150
1151 // Scale up the MaxCount to be multiple times above hot / cold threshold.
1152 const unsigned MultiplyFactor = 3;
1153 uint64_t Threshold = (SetToHot ? HotInstrThreshold : ColdInstrThreshold);
1154 uint64_t Numerator = Threshold * MultiplyFactor;
1155
1156 // Make sure Threshold for warm counters is below the HotInstrThreshold.
1157 if (!SetToHot && Threshold >= HotInstrThreshold) {
1158 Threshold = (HotInstrThreshold + ColdInstrThreshold) / 2;
1159 }
1160
1161 uint64_t Denominator = IFE.MaxCount;
1162 if (Numerator <= Denominator)
1163 return;
1164 ProfRecord->scale(N: Numerator, D: Denominator, Warn: [&](instrprof_error E) {
1165 warn(Message: toString(E: make_error<InstrProfError>(Args&: E)));
1166 });
1167}
1168
1169const uint64_t ColdPercentileIdx = 15;
1170const uint64_t HotPercentileIdx = 11;
1171
1172using sampleprof::FSDiscriminatorPass;
1173
1174// Internal options to set FSDiscriminatorPass. Used in merge and show
1175// commands.
1176static cl::opt<FSDiscriminatorPass> FSDiscriminatorPassOption(
1177 "fs-discriminator-pass", cl::init(Val: PassLast), cl::Hidden,
1178 cl::desc("Zero out the discriminator bits for the FS discrimiantor "
1179 "pass beyond this value. The enum values are defined in "
1180 "Support/Discriminator.h"),
1181 cl::values(clEnumVal(Base, "Use base discriminators only"),
1182 clEnumVal(Pass1, "Use base and pass 1 discriminators"),
1183 clEnumVal(Pass2, "Use base and pass 1-2 discriminators"),
1184 clEnumVal(Pass3, "Use base and pass 1-3 discriminators"),
1185 clEnumVal(PassLast, "Use all discriminator bits (default)")));
1186
1187static unsigned getDiscriminatorMask() {
1188 return getN1Bits(N: getFSPassBitEnd(P: FSDiscriminatorPassOption.getValue()));
1189}
1190
1191/// Adjust the instr profile in \p WC based on the sample profile in
1192/// \p Reader.
1193static void
1194adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
1195 std::unique_ptr<sampleprof::SampleProfileReader> &Reader,
1196 unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
1197 unsigned InstrProfColdThreshold) {
1198 // Function to its entry in instr profile.
1199 StringMap<InstrProfileEntry> InstrProfileMap;
1200 StringMap<StringRef> StaticFuncMap;
1201 InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs);
1202
1203 auto checkSampleProfileHasFUnique = [&Reader]() {
1204 for (const auto &PD : Reader->getProfiles()) {
1205 auto &FContext = PD.second.getContext();
1206 if (FContext.toString().find(s: FunctionSamples::UniqSuffix) !=
1207 std::string::npos) {
1208 return true;
1209 }
1210 }
1211 return false;
1212 };
1213
1214 bool SampleProfileHasFUnique = checkSampleProfileHasFUnique();
1215
1216 auto buildStaticFuncMap = [&StaticFuncMap,
1217 SampleProfileHasFUnique](const StringRef Name) {
1218 std::string FilePrefixes[] = {".cpp", "cc", ".c", ".hpp", ".h"};
1219 size_t PrefixPos = StringRef::npos;
1220 for (auto &FilePrefix : FilePrefixes) {
1221 std::string NamePrefix = FilePrefix + GlobalIdentifierDelimiter;
1222 PrefixPos = Name.find_insensitive(Str: NamePrefix);
1223 if (PrefixPos == StringRef::npos)
1224 continue;
1225 PrefixPos += NamePrefix.size();
1226 break;
1227 }
1228
1229 if (PrefixPos == StringRef::npos) {
1230 return;
1231 }
1232
1233 StringRef NewName = Name.drop_front(N: PrefixPos);
1234 StringRef FName = Name.substr(Start: 0, N: PrefixPos - 1);
1235 if (NewName.size() == 0) {
1236 return;
1237 }
1238
1239 // This name should have a static linkage.
1240 size_t PostfixPos = NewName.find(Str: FunctionSamples::UniqSuffix);
1241 bool ProfileHasFUnique = (PostfixPos != StringRef::npos);
1242
1243 // If sample profile and instrumented profile do not agree on symbol
1244 // uniqification.
1245 if (SampleProfileHasFUnique != ProfileHasFUnique) {
1246 // If instrumented profile uses -funique-internal-linkage-symbols,
1247 // we need to trim the name.
1248 if (ProfileHasFUnique) {
1249 NewName = NewName.substr(Start: 0, N: PostfixPos);
1250 } else {
1251 // If sample profile uses -funique-internal-linkage-symbols,
1252 // we build the map.
1253 std::string NStr =
1254 NewName.str() + getUniqueInternalLinkagePostfix(FName);
1255 NewName = StringRef(NStr);
1256 StaticFuncMap[NewName] = Name;
1257 return;
1258 }
1259 }
1260
1261 auto [It, Inserted] = StaticFuncMap.try_emplace(Key: NewName, Args: Name);
1262 if (!Inserted)
1263 It->second = DuplicateNameStr;
1264 };
1265
1266 // We need to flatten the SampleFDO profile as the InstrFDO
1267 // profile does not have inlined callsite profiles.
1268 // One caveat is the pre-inlined function -- their samples
1269 // should be collapsed into the caller function.
1270 // Here we do a DFS traversal to get the flatten profile
1271 // info: the sum of entrycount and the max of maxcount.
1272 // Here is the algorithm:
1273 // recursive (FS, root_name) {
1274 // name = FS->getName();
1275 // get samples for FS;
1276 // if (InstrProf.find(name) {
1277 // root_name = name;
1278 // } else {
1279 // if (name is in static_func map) {
1280 // root_name = static_name;
1281 // }
1282 // }
1283 // update the Map entry for root_name;
1284 // for (subfs: FS) {
1285 // recursive(subfs, root_name);
1286 // }
1287 // }
1288 //
1289 // Here is an example.
1290 //
1291 // SampleProfile:
1292 // foo:12345:1000
1293 // 1: 1000
1294 // 2.1: 1000
1295 // 15: 5000
1296 // 4: bar:1000
1297 // 1: 1000
1298 // 2: goo:3000
1299 // 1: 3000
1300 // 8: bar:40000
1301 // 1: 10000
1302 // 2: goo:30000
1303 // 1: 30000
1304 //
1305 // InstrProfile has two entries:
1306 // foo
1307 // bar.cc;bar
1308 //
1309 // After BuildMaxSampleMap, we should have the following in FlattenSampleMap:
1310 // {"foo", {1000, 5000}}
1311 // {"bar.cc;bar", {11000, 30000}}
1312 //
1313 // foo's has an entry count of 1000, and max body count of 5000.
1314 // bar.cc;bar has an entry count of 11000 (sum two callsites of 1000 and
1315 // 10000), and max count of 30000 (from the callsite in line 8).
1316 //
1317 // Note that goo's count will remain in bar.cc;bar() as it does not have an
1318 // entry in InstrProfile.
1319 llvm::StringMap<std::pair<uint64_t, uint64_t>> FlattenSampleMap;
1320 auto BuildMaxSampleMap = [&FlattenSampleMap, &StaticFuncMap,
1321 &InstrProfileMap](const FunctionSamples &FS,
1322 const StringRef &RootName) {
1323 auto BuildMaxSampleMapImpl = [&](const FunctionSamples &FS,
1324 const StringRef &RootName,
1325 auto &BuildImpl) -> void {
1326 std::string NameStr = FS.getFunction().str();
1327 const StringRef Name = NameStr;
1328 const StringRef *NewRootName = &RootName;
1329 uint64_t EntrySample = FS.getHeadSamplesEstimate();
1330 uint64_t MaxBodySample = FS.getMaxCountInside(/* SkipCallSite*/ true);
1331
1332 auto It = InstrProfileMap.find(Key: Name);
1333 if (It != InstrProfileMap.end()) {
1334 NewRootName = &Name;
1335 } else {
1336 auto NewName = StaticFuncMap.find(Key: Name);
1337 if (NewName != StaticFuncMap.end()) {
1338 It = InstrProfileMap.find(Key: NewName->second);
1339 if (NewName->second != DuplicateNameStr) {
1340 NewRootName = &NewName->second;
1341 }
1342 } else {
1343 // Here the EntrySample is of an inlined function, so we should not
1344 // update the EntrySample in the map.
1345 EntrySample = 0;
1346 }
1347 }
1348 EntrySample += FlattenSampleMap[*NewRootName].first;
1349 MaxBodySample =
1350 std::max(a: FlattenSampleMap[*NewRootName].second, b: MaxBodySample);
1351 FlattenSampleMap[*NewRootName] =
1352 std::make_pair(x&: EntrySample, y&: MaxBodySample);
1353
1354 for (const auto &C : FS.getCallsiteSamples())
1355 for (const auto &F : C.second)
1356 BuildImpl(F.second, *NewRootName, BuildImpl);
1357 };
1358 BuildMaxSampleMapImpl(FS, RootName, BuildMaxSampleMapImpl);
1359 };
1360
1361 for (auto &PD : WC->Writer.getProfileData()) {
1362 // Populate IPBuilder.
1363 for (const auto &PDV : PD.getValue()) {
1364 InstrProfRecord Record = PDV.second;
1365 IPBuilder.addRecord(Record);
1366 }
1367
1368 // If a function has multiple entries in instr profile, skip it.
1369 if (PD.getValue().size() != 1)
1370 continue;
1371
1372 // Initialize InstrProfileMap.
1373 InstrProfRecord *R = &PD.getValue().begin()->second;
1374 StringRef FullName = PD.getKey();
1375 InstrProfileMap[FullName] = InstrProfileEntry(R);
1376 buildStaticFuncMap(FullName);
1377 }
1378
1379 for (auto &PD : Reader->getProfiles()) {
1380 sampleprof::FunctionSamples &FS = PD.second;
1381 std::string Name = FS.getFunction().str();
1382 BuildMaxSampleMap(FS, Name);
1383 }
1384
1385 ProfileSummary InstrPS = *IPBuilder.getSummary();
1386 ProfileSummary SamplePS = Reader->getSummary();
1387
1388 // Compute cold thresholds for instr profile and sample profile.
1389 uint64_t HotSampleThreshold =
1390 ProfileSummaryBuilder::getEntryForPercentile(
1391 DS: SamplePS.getDetailedSummary(),
1392 Percentile: ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
1393 .MinCount;
1394 uint64_t ColdSampleThreshold =
1395 ProfileSummaryBuilder::getEntryForPercentile(
1396 DS: SamplePS.getDetailedSummary(),
1397 Percentile: ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
1398 .MinCount;
1399 uint64_t HotInstrThreshold =
1400 ProfileSummaryBuilder::getEntryForPercentile(
1401 DS: InstrPS.getDetailedSummary(),
1402 Percentile: ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
1403 .MinCount;
1404 uint64_t ColdInstrThreshold =
1405 InstrProfColdThreshold
1406 ? InstrProfColdThreshold
1407 : ProfileSummaryBuilder::getEntryForPercentile(
1408 DS: InstrPS.getDetailedSummary(),
1409 Percentile: ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
1410 .MinCount;
1411
1412 // Find hot/warm functions in sample profile which is cold in instr profile
1413 // and adjust the profiles of those functions in the instr profile.
1414 for (const auto &E : FlattenSampleMap) {
1415 uint64_t SampleMaxCount = std::max(a: E.second.first, b: E.second.second);
1416 if (SampleMaxCount < ColdSampleThreshold)
1417 continue;
1418 StringRef Name = E.first();
1419 auto It = InstrProfileMap.find(Key: Name);
1420 if (It == InstrProfileMap.end()) {
1421 auto NewName = StaticFuncMap.find(Key: Name);
1422 if (NewName != StaticFuncMap.end()) {
1423 It = InstrProfileMap.find(Key: NewName->second);
1424 if (NewName->second == DuplicateNameStr) {
1425 WithColor::warning()
1426 << "Static function " << Name
1427 << " has multiple promoted names, cannot adjust profile.\n";
1428 }
1429 }
1430 }
1431 if (It == InstrProfileMap.end() ||
1432 It->second.MaxCount > ColdInstrThreshold ||
1433 It->second.NumEdgeCounters < SupplMinSizeThreshold)
1434 continue;
1435 bool SetToHot = SampleMaxCount >= HotSampleThreshold;
1436 updateInstrProfileEntry(IFE&: It->second, SetToHot, HotInstrThreshold,
1437 ColdInstrThreshold, ZeroCounterThreshold);
1438 }
1439}
1440
1441/// The main function to supplement instr profile with sample profile.
1442/// \Inputs contains the instr profile. \p SampleFilename specifies the
1443/// sample profile. \p OutputFilename specifies the output profile name.
1444/// \p OutputFormat specifies the output profile format. \p OutputSparse
1445/// specifies whether to generate sparse profile. \p SupplMinSizeThreshold
1446/// specifies the minimal size for the functions whose profile will be
1447/// adjusted. \p ZeroCounterThreshold is the threshold to check whether
1448/// a function contains too many zero counters and whether its profile
1449/// should be dropped. \p InstrProfColdThreshold is the user specified
1450/// cold threshold which will override the cold threshold got from the
1451/// instr profile summary.
1452static void supplementInstrProfile(const WeightedFileVector &Inputs,
1453 StringRef SampleFilename, bool OutputSparse,
1454 unsigned SupplMinSizeThreshold,
1455 float ZeroCounterThreshold,
1456 unsigned InstrProfColdThreshold) {
1457 if (OutputFilename == "-")
1458 exitWithError(Message: "cannot write indexed profdata format to stdout");
1459 if (Inputs.size() != 1)
1460 exitWithError(Message: "expect one input to be an instr profile");
1461 if (Inputs[0].Weight != 1)
1462 exitWithError(Message: "expect instr profile doesn't have weight");
1463
1464 StringRef InstrFilename = Inputs[0].Filename;
1465
1466 // Read sample profile.
1467 LLVMContext Context;
1468 auto FS = vfs::getRealFileSystem();
1469 auto ReaderOrErr = sampleprof::SampleProfileReader::create(
1470 Filename: SampleFilename.str(), C&: Context, FS&: *FS, P: FSDiscriminatorPassOption);
1471 if (std::error_code EC = ReaderOrErr.getError())
1472 exitWithErrorCode(EC, Whence: SampleFilename);
1473 auto Reader = std::move(ReaderOrErr.get());
1474 if (std::error_code EC = Reader->read())
1475 exitWithErrorCode(EC, Whence: SampleFilename);
1476
1477 // Read instr profile.
1478 std::mutex ErrorLock;
1479 SmallSet<instrprof_error, 4> WriterErrorCodes;
1480 auto WC = std::make_unique<WriterContext>(args&: OutputSparse, args&: ErrorLock,
1481 args&: WriterErrorCodes);
1482 loadInput(Input: Inputs[0], Remapper: nullptr, Correlator: nullptr, /*ProfiledBinary=*/"", WC: WC.get());
1483 if (WC->Errors.size() > 0)
1484 exitWithError(E: std::move(WC->Errors[0].first), Whence: InstrFilename);
1485
1486 adjustInstrProfile(WC, Reader, SupplMinSizeThreshold, ZeroCounterThreshold,
1487 InstrProfColdThreshold);
1488 writeInstrProfile(OutputFilename, OutputFormat, Writer&: WC->Writer);
1489}
1490
1491/// Make a copy of the given function samples with all symbol names remapped
1492/// by the provided symbol remapper.
1493static sampleprof::FunctionSamples
1494remapSamples(const sampleprof::FunctionSamples &Samples,
1495 SymbolRemapper &Remapper, sampleprof_error &Error) {
1496 sampleprof::FunctionSamples Result;
1497 Result.setFunction(Remapper(Samples.getFunction()));
1498 Result.addTotalSamples(Num: Samples.getTotalSamples());
1499 Result.addHeadSamples(Num: Samples.getHeadSamples());
1500 for (const auto &BodySample : Samples.getBodySamples()) {
1501 uint32_t MaskedDiscriminator =
1502 BodySample.first.Discriminator & getDiscriminatorMask();
1503 Result.addBodySamples(LineOffset: BodySample.first.LineOffset, Discriminator: MaskedDiscriminator,
1504 Num: BodySample.second.getSamples());
1505 for (const auto &Target : BodySample.second.getCallTargets()) {
1506 Result.addCalledTargetSamples(LineOffset: BodySample.first.LineOffset,
1507 Discriminator: MaskedDiscriminator,
1508 Func: Remapper(Target.first), Num: Target.second);
1509 }
1510 }
1511 for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
1512 sampleprof::FunctionSamplesMap &Target =
1513 Result.functionSamplesAt(Loc: CallsiteSamples.first);
1514 for (const auto &Callsite : CallsiteSamples.second) {
1515 sampleprof::FunctionSamples Remapped =
1516 remapSamples(Samples: Callsite.second, Remapper, Error);
1517 mergeSampleProfErrors(Accumulator&: Error,
1518 Result: Target[Remapped.getFunction()].merge(Other: Remapped));
1519 }
1520 }
1521 return Result;
1522}
1523
1524static sampleprof::SampleProfileFormat FormatMap[] = {
1525 sampleprof::SPF_None,
1526 sampleprof::SPF_Text,
1527 sampleprof::SPF_None,
1528 sampleprof::SPF_Ext_Binary,
1529 sampleprof::SPF_GCC,
1530 sampleprof::SPF_Binary};
1531
1532static std::unique_ptr<MemoryBuffer>
1533getInputFileBuf(const StringRef &InputFile) {
1534 if (InputFile == "")
1535 return {};
1536
1537 auto BufOrError = MemoryBuffer::getFileOrSTDIN(Filename: InputFile);
1538 if (!BufOrError)
1539 exitWithErrorCode(EC: BufOrError.getError(), Whence: InputFile);
1540
1541 return std::move(*BufOrError);
1542}
1543
1544static void populateProfileSymbolList(MemoryBuffer *Buffer,
1545 sampleprof::ProfileSymbolList &PSL) {
1546 if (!Buffer)
1547 return;
1548
1549 SmallVector<StringRef, 32> SymbolVec;
1550 StringRef Data = Buffer->getBuffer();
1551 Data.split(A&: SymbolVec, Separator: '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1552
1553 for (StringRef SymbolStr : SymbolVec)
1554 PSL.add(Name: SymbolStr.trim());
1555}
1556
1557static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer,
1558 ProfileFormat OutputFormat,
1559 MemoryBuffer *Buffer,
1560 sampleprof::ProfileSymbolList &WriterList,
1561 bool CompressAllSections, bool UseMD5,
1562 bool GenPartialProfile) {
1563 if (SplitLayout) {
1564 if (OutputFormat == PF_Binary)
1565 warn(Message: "-split-layout is ignored. Specify -extbinary to enable it");
1566 else
1567 Writer.setUseCtxSplitLayout();
1568 }
1569
1570 populateProfileSymbolList(Buffer, PSL&: WriterList);
1571 if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary)
1572 warn(Message: "Profile Symbol list is not empty but the output format is not "
1573 "ExtBinary format. The list will be lost in the output. ");
1574
1575 Writer.setProfileSymbolList(&WriterList);
1576
1577 if (CompressAllSections) {
1578 if (OutputFormat != PF_Ext_Binary)
1579 warn(Message: "-compress-all-section is ignored. Specify -extbinary to enable it");
1580 else
1581 Writer.setToCompressAllSections();
1582 }
1583 if (UseMD5) {
1584 if (OutputFormat != PF_Ext_Binary)
1585 warn(Message: "-use-md5 is ignored. Specify -extbinary to enable it");
1586 else
1587 Writer.setUseMD5();
1588 }
1589 if (GenPartialProfile) {
1590 if (OutputFormat != PF_Ext_Binary)
1591 warn(Message: "-gen-partial-profile is ignored. Specify -extbinary to enable it");
1592 else
1593 Writer.setPartialProfile();
1594 }
1595}
1596
1597static void mergeSampleProfile(const WeightedFileVector &Inputs,
1598 SymbolRemapper *Remapper,
1599 StringRef ProfileSymbolListFile,
1600 size_t OutputSizeLimit) {
1601 using namespace sampleprof;
1602 SampleProfileMap ProfileMap;
1603 SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
1604 LLVMContext Context;
1605 sampleprof::ProfileSymbolList WriterList;
1606 std::optional<bool> ProfileIsProbeBased;
1607 std::optional<bool> ProfileIsCS;
1608 for (const auto &Input : Inputs) {
1609 auto FS = vfs::getRealFileSystem();
1610 auto ReaderOrErr = SampleProfileReader::create(Filename: Input.Filename, C&: Context, FS&: *FS,
1611 P: FSDiscriminatorPassOption);
1612 if (std::error_code EC = ReaderOrErr.getError()) {
1613 warnOrExitGivenError(FailMode, EC, Whence: Input.Filename);
1614 continue;
1615 }
1616
1617 // We need to keep the readers around until after all the files are
1618 // read so that we do not lose the function names stored in each
1619 // reader's memory. The function names are needed to write out the
1620 // merged profile map.
1621 Readers.push_back(Elt: std::move(ReaderOrErr.get()));
1622 const auto Reader = Readers.back().get();
1623 if (std::error_code EC = Reader->read()) {
1624 warnOrExitGivenError(FailMode, EC, Whence: Input.Filename);
1625 Readers.pop_back();
1626 continue;
1627 }
1628
1629 SampleProfileMap &Profiles = Reader->getProfiles();
1630 if (ProfileIsProbeBased &&
1631 ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased)
1632 exitWithError(
1633 Message: "cannot merge probe-based profile with non-probe-based profile");
1634 ProfileIsProbeBased = FunctionSamples::ProfileIsProbeBased;
1635 if (ProfileIsCS && ProfileIsCS != FunctionSamples::ProfileIsCS)
1636 exitWithError(Message: "cannot merge CS profile with non-CS profile");
1637 ProfileIsCS = FunctionSamples::ProfileIsCS;
1638 for (SampleProfileMap::iterator I = Profiles.begin(), E = Profiles.end();
1639 I != E; ++I) {
1640 sampleprof_error Result = sampleprof_error::success;
1641 FunctionSamples Remapped =
1642 Remapper ? remapSamples(Samples: I->second, Remapper&: *Remapper, Error&: Result)
1643 : FunctionSamples();
1644 FunctionSamples &Samples = Remapper ? Remapped : I->second;
1645 SampleContext FContext = Samples.getContext();
1646 mergeSampleProfErrors(Accumulator&: Result,
1647 Result: ProfileMap[FContext].merge(Other: Samples, Weight: Input.Weight));
1648 if (Result != sampleprof_error::success) {
1649 std::error_code EC = make_error_code(E: Result);
1650 handleMergeWriterError(E: errorCodeToError(EC), WhenceFile: Input.Filename,
1651 WhenceFunction: FContext.toString());
1652 }
1653 }
1654
1655 if (!DropProfileSymbolList) {
1656 std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
1657 Reader->getProfileSymbolList();
1658 if (ReaderList)
1659 WriterList.merge(List: *ReaderList);
1660 }
1661 }
1662
1663 if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) {
1664 // Use threshold calculated from profile summary unless specified.
1665 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1666 auto Summary = Builder.computeSummaryForProfiles(Profiles: ProfileMap);
1667 uint64_t SampleProfColdThreshold =
1668 ProfileSummaryBuilder::getColdCountThreshold(
1669 DS: (Summary->getDetailedSummary()));
1670
1671 // Trim and merge cold context profile using cold threshold above;
1672 SampleContextTrimmer(ProfileMap)
1673 .trimAndMergeColdContextProfiles(
1674 ColdCountThreshold: SampleProfColdThreshold, TrimColdContext: SampleTrimColdContext,
1675 MergeColdContext: SampleMergeColdContext, ColdContextFrameLength: SampleColdContextFrameDepth, TrimBaseProfileOnly: false);
1676 }
1677
1678 if (ProfileLayout == llvm::sampleprof::SPL_Flat) {
1679 ProfileConverter::flattenProfile(ProfileMap, ProfileIsCS: FunctionSamples::ProfileIsCS);
1680 ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1681 } else if (ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Nest) {
1682 ProfileConverter CSConverter(ProfileMap);
1683 CSConverter.convertCSProfiles();
1684 ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1685 }
1686
1687 filterFunctions(ProfileMap);
1688
1689 auto WriterOrErr =
1690 SampleProfileWriter::create(Filename: OutputFilename, Format: FormatMap[OutputFormat]);
1691 if (std::error_code EC = WriterOrErr.getError())
1692 exitWithErrorCode(EC, Whence: OutputFilename);
1693
1694 auto Writer = std::move(WriterOrErr.get());
1695 // WriterList will have StringRef refering to string in Buffer.
1696 // Make sure Buffer lives as long as WriterList.
1697 auto Buffer = getInputFileBuf(InputFile: ProfileSymbolListFile);
1698 handleExtBinaryWriter(Writer&: *Writer, OutputFormat, Buffer: Buffer.get(), WriterList,
1699 CompressAllSections, UseMD5, GenPartialProfile);
1700
1701 // If OutputSizeLimit is 0 (default), it is the same as write().
1702 if (std::error_code EC =
1703 Writer->writeWithSizeLimit(ProfileMap, OutputSizeLimit))
1704 exitWithErrorCode(EC);
1705}
1706
1707static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
1708 StringRef WeightStr, FileName;
1709 std::tie(args&: WeightStr, args&: FileName) = WeightedFilename.split(Separator: ',');
1710
1711 uint64_t Weight;
1712 if (WeightStr.getAsInteger(Radix: 10, Result&: Weight) || Weight < 1)
1713 exitWithError(Message: "input weight must be a positive integer");
1714
1715 llvm::SmallString<128> ResolvedFileName;
1716 llvm::sys::fs::expand_tilde(path: FileName, output&: ResolvedFileName);
1717
1718 return {.Filename: std::string(ResolvedFileName), .Weight: Weight};
1719}
1720
1721static void addWeightedInput(WeightedFileVector &WNI, const WeightedFile &WF) {
1722 StringRef Filename = WF.Filename;
1723 uint64_t Weight = WF.Weight;
1724
1725 // If it's STDIN just pass it on.
1726 if (Filename == "-") {
1727 WNI.push_back(Elt: {.Filename: std::string(Filename), .Weight: Weight});
1728 return;
1729 }
1730
1731 llvm::sys::fs::file_status Status;
1732 llvm::sys::fs::status(path: Filename, result&: Status);
1733 if (!llvm::sys::fs::exists(status: Status))
1734 exitWithErrorCode(EC: make_error_code(E: errc::no_such_file_or_directory),
1735 Whence: Filename);
1736 // If it's a source file, collect it.
1737 if (llvm::sys::fs::is_regular_file(status: Status)) {
1738 WNI.push_back(Elt: {.Filename: std::string(Filename), .Weight: Weight});
1739 return;
1740 }
1741
1742 if (llvm::sys::fs::is_directory(status: Status)) {
1743 std::error_code EC;
1744 for (llvm::sys::fs::recursive_directory_iterator F(Filename, EC), E;
1745 F != E && !EC; F.increment(ec&: EC)) {
1746 if (llvm::sys::fs::is_regular_file(Path: F->path())) {
1747 addWeightedInput(WNI, WF: {.Filename: F->path(), .Weight: Weight});
1748 }
1749 }
1750 if (EC)
1751 exitWithErrorCode(EC, Whence: Filename);
1752 }
1753}
1754
1755static void parseInputFilenamesFile(MemoryBuffer *Buffer,
1756 WeightedFileVector &WFV) {
1757 if (!Buffer)
1758 return;
1759
1760 SmallVector<StringRef, 8> Entries;
1761 StringRef Data = Buffer->getBuffer();
1762 Data.split(A&: Entries, Separator: '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1763 for (const StringRef &FileWeightEntry : Entries) {
1764 StringRef SanitizedEntry = FileWeightEntry.trim(Chars: " \t\v\f\r");
1765 // Skip comments.
1766 if (SanitizedEntry.starts_with(Prefix: "#"))
1767 continue;
1768 // If there's no comma, it's an unweighted profile.
1769 else if (!SanitizedEntry.contains(C: ','))
1770 addWeightedInput(WNI&: WFV, WF: {.Filename: std::string(SanitizedEntry), .Weight: 1});
1771 else
1772 addWeightedInput(WNI&: WFV, WF: parseWeightedFile(WeightedFilename: SanitizedEntry));
1773 }
1774}
1775
1776static int merge_main(StringRef ProgName) {
1777 WeightedFileVector WeightedInputs;
1778 for (StringRef Filename : InputFilenames)
1779 addWeightedInput(WNI&: WeightedInputs, WF: {.Filename: std::string(Filename), .Weight: 1});
1780 for (StringRef WeightedFilename : WeightedInputFilenames)
1781 addWeightedInput(WNI&: WeightedInputs, WF: parseWeightedFile(WeightedFilename));
1782
1783 // Make sure that the file buffer stays alive for the duration of the
1784 // weighted input vector's lifetime.
1785 auto Buffer = getInputFileBuf(InputFile: InputFilenamesFile);
1786 parseInputFilenamesFile(Buffer: Buffer.get(), WFV&: WeightedInputs);
1787
1788 if (WeightedInputs.empty())
1789 exitWithError(Message: "no input files specified. See " + ProgName + " merge -help");
1790
1791 if (DumpInputFileList) {
1792 for (auto &WF : WeightedInputs)
1793 outs() << WF.Weight << "," << WF.Filename << "\n";
1794 return 0;
1795 }
1796
1797 std::unique_ptr<SymbolRemapper> Remapper;
1798 if (!RemappingFile.empty())
1799 Remapper = SymbolRemapper::create(InputFile: RemappingFile);
1800
1801 if (!SupplInstrWithSample.empty()) {
1802 if (ProfileKind != instr)
1803 exitWithError(
1804 Message: "-supplement-instr-with-sample can only work with -instr. ");
1805
1806 supplementInstrProfile(Inputs: WeightedInputs, SampleFilename: SupplInstrWithSample, OutputSparse,
1807 SupplMinSizeThreshold, ZeroCounterThreshold,
1808 InstrProfColdThreshold);
1809 return 0;
1810 }
1811
1812 if (ProfileKind == instr)
1813 mergeInstrProfile(Inputs: WeightedInputs, Remapper: Remapper.get(), MaxDbgCorrelationWarnings,
1814 ProfiledBinary);
1815 else
1816 mergeSampleProfile(Inputs: WeightedInputs, Remapper: Remapper.get(), ProfileSymbolListFile,
1817 OutputSizeLimit);
1818 return 0;
1819}
1820
1821/// Computer the overlap b/w profile BaseFilename and profile TestFilename.
1822static void overlapInstrProfile(const std::string &BaseFilename,
1823 const std::string &TestFilename,
1824 const OverlapFuncFilters &FuncFilter,
1825 raw_fd_ostream &OS, bool IsCS) {
1826 std::mutex ErrorLock;
1827 SmallSet<instrprof_error, 4> WriterErrorCodes;
1828 WriterContext Context(false, ErrorLock, WriterErrorCodes);
1829 WeightedFile WeightedInput{.Filename: BaseFilename, .Weight: 1};
1830 OverlapStats Overlap;
1831 Error E = Overlap.accumulateCounts(BaseFilename, TestFilename, IsCS);
1832 if (E)
1833 exitWithError(E: std::move(E), Whence: "error in getting profile count sums");
1834 if (Overlap.Base.CountSum < 1.0f) {
1835 OS << "Sum of edge counts for profile " << BaseFilename << " is 0.\n";
1836 exit(status: 0);
1837 }
1838 if (Overlap.Test.CountSum < 1.0f) {
1839 OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n";
1840 exit(status: 0);
1841 }
1842 loadInput(Input: WeightedInput, Remapper: nullptr, Correlator: nullptr, /*ProfiledBinary=*/"", WC: &Context);
1843 overlapInput(BaseFilename, TestFilename, WC: &Context, Overlap, FuncFilter, OS,
1844 IsCS);
1845 Overlap.dump(OS);
1846}
1847
1848namespace {
1849struct SampleOverlapStats {
1850 SampleContext BaseName;
1851 SampleContext TestName;
1852 // Number of overlap units
1853 uint64_t OverlapCount = 0;
1854 // Total samples of overlap units
1855 uint64_t OverlapSample = 0;
1856 // Number of and total samples of units that only present in base or test
1857 // profile
1858 uint64_t BaseUniqueCount = 0;
1859 uint64_t BaseUniqueSample = 0;
1860 uint64_t TestUniqueCount = 0;
1861 uint64_t TestUniqueSample = 0;
1862 // Number of units and total samples in base or test profile
1863 uint64_t BaseCount = 0;
1864 uint64_t BaseSample = 0;
1865 uint64_t TestCount = 0;
1866 uint64_t TestSample = 0;
1867 // Number of and total samples of units that present in at least one profile
1868 uint64_t UnionCount = 0;
1869 uint64_t UnionSample = 0;
1870 // Weighted similarity
1871 double Similarity = 0.0;
1872 // For SampleOverlapStats instances representing functions, weights of the
1873 // function in base and test profiles
1874 double BaseWeight = 0.0;
1875 double TestWeight = 0.0;
1876
1877 SampleOverlapStats() = default;
1878};
1879} // end anonymous namespace
1880
1881namespace {
1882struct FuncSampleStats {
1883 uint64_t SampleSum = 0;
1884 uint64_t MaxSample = 0;
1885 uint64_t HotBlockCount = 0;
1886 FuncSampleStats() = default;
1887 FuncSampleStats(uint64_t SampleSum, uint64_t MaxSample,
1888 uint64_t HotBlockCount)
1889 : SampleSum(SampleSum), MaxSample(MaxSample),
1890 HotBlockCount(HotBlockCount) {}
1891};
1892} // end anonymous namespace
1893
1894namespace {
1895enum MatchStatus { MS_Match, MS_FirstUnique, MS_SecondUnique, MS_None };
1896
1897// Class for updating merging steps for two sorted maps. The class should be
1898// instantiated with a map iterator type.
1899template <class T> class MatchStep {
1900public:
1901 MatchStep() = delete;
1902
1903 MatchStep(T FirstIter, T FirstEnd, T SecondIter, T SecondEnd)
1904 : FirstIter(FirstIter), FirstEnd(FirstEnd), SecondIter(SecondIter),
1905 SecondEnd(SecondEnd), Status(MS_None) {}
1906
1907 bool areBothFinished() const {
1908 return (FirstIter == FirstEnd && SecondIter == SecondEnd);
1909 }
1910
1911 bool isFirstFinished() const { return FirstIter == FirstEnd; }
1912
1913 bool isSecondFinished() const { return SecondIter == SecondEnd; }
1914
1915 /// Advance one step based on the previous match status unless the previous
1916 /// status is MS_None. Then update Status based on the comparison between two
1917 /// container iterators at the current step. If the previous status is
1918 /// MS_None, it means two iterators are at the beginning and no comparison has
1919 /// been made, so we simply update Status without advancing the iterators.
1920 void updateOneStep();
1921
1922 T getFirstIter() const { return FirstIter; }
1923
1924 T getSecondIter() const { return SecondIter; }
1925
1926 MatchStatus getMatchStatus() const { return Status; }
1927
1928private:
1929 // Current iterator and end iterator of the first container.
1930 T FirstIter;
1931 T FirstEnd;
1932 // Current iterator and end iterator of the second container.
1933 T SecondIter;
1934 T SecondEnd;
1935 // Match status of the current step.
1936 MatchStatus Status;
1937};
1938} // end anonymous namespace
1939
1940template <class T> void MatchStep<T>::updateOneStep() {
1941 switch (Status) {
1942 case MS_Match:
1943 ++FirstIter;
1944 ++SecondIter;
1945 break;
1946 case MS_FirstUnique:
1947 ++FirstIter;
1948 break;
1949 case MS_SecondUnique:
1950 ++SecondIter;
1951 break;
1952 case MS_None:
1953 break;
1954 }
1955
1956 // Update Status according to iterators at the current step.
1957 if (areBothFinished())
1958 return;
1959 if (FirstIter != FirstEnd &&
1960 (SecondIter == SecondEnd || FirstIter->first < SecondIter->first))
1961 Status = MS_FirstUnique;
1962 else if (SecondIter != SecondEnd &&
1963 (FirstIter == FirstEnd || SecondIter->first < FirstIter->first))
1964 Status = MS_SecondUnique;
1965 else
1966 Status = MS_Match;
1967}
1968
1969// Return the sum of line/block samples, the max line/block sample, and the
1970// number of line/block samples above the given threshold in a function
1971// including its inlinees.
1972static void getFuncSampleStats(const sampleprof::FunctionSamples &Func,
1973 FuncSampleStats &FuncStats,
1974 uint64_t HotThreshold) {
1975 for (const auto &L : Func.getBodySamples()) {
1976 uint64_t Sample = L.second.getSamples();
1977 FuncStats.SampleSum += Sample;
1978 FuncStats.MaxSample = std::max(a: FuncStats.MaxSample, b: Sample);
1979 if (Sample >= HotThreshold)
1980 ++FuncStats.HotBlockCount;
1981 }
1982
1983 for (const auto &C : Func.getCallsiteSamples()) {
1984 for (const auto &F : C.second)
1985 getFuncSampleStats(Func: F.second, FuncStats, HotThreshold);
1986 }
1987}
1988
1989/// Predicate that determines if a function is hot with a given threshold. We
1990/// keep it separate from its callsites for possible extension in the future.
1991static bool isFunctionHot(const FuncSampleStats &FuncStats,
1992 uint64_t HotThreshold) {
1993 // We intentionally compare the maximum sample count in a function with the
1994 // HotThreshold to get an approximate determination on hot functions.
1995 return (FuncStats.MaxSample >= HotThreshold);
1996}
1997
1998namespace {
1999class SampleOverlapAggregator {
2000public:
2001 SampleOverlapAggregator(const std::string &BaseFilename,
2002 const std::string &TestFilename,
2003 double LowSimilarityThreshold, double Epsilon,
2004 const OverlapFuncFilters &FuncFilter)
2005 : BaseFilename(BaseFilename), TestFilename(TestFilename),
2006 LowSimilarityThreshold(LowSimilarityThreshold), Epsilon(Epsilon),
2007 FuncFilter(FuncFilter) {}
2008
2009 /// Detect 0-sample input profile and report to output stream. This interface
2010 /// should be called after loadProfiles().
2011 bool detectZeroSampleProfile(raw_fd_ostream &OS) const;
2012
2013 /// Write out function-level similarity statistics for functions specified by
2014 /// options --function, --value-cutoff, and --similarity-cutoff.
2015 void dumpFuncSimilarity(raw_fd_ostream &OS) const;
2016
2017 /// Write out program-level similarity and overlap statistics.
2018 void dumpProgramSummary(raw_fd_ostream &OS) const;
2019
2020 /// Write out hot-function and hot-block statistics for base_profile,
2021 /// test_profile, and their overlap. For both cases, the overlap HO is
2022 /// calculated as follows:
2023 /// Given the number of functions (or blocks) that are hot in both profiles
2024 /// HCommon and the number of functions (or blocks) that are hot in at
2025 /// least one profile HUnion, HO = HCommon / HUnion.
2026 void dumpHotFuncAndBlockOverlap(raw_fd_ostream &OS) const;
2027
2028 /// This function tries matching functions in base and test profiles. For each
2029 /// pair of matched functions, it aggregates the function-level
2030 /// similarity into a profile-level similarity. It also dump function-level
2031 /// similarity information of functions specified by --function,
2032 /// --value-cutoff, and --similarity-cutoff options. The program-level
2033 /// similarity PS is computed as follows:
2034 /// Given function-level similarity FS(A) for all function A, the
2035 /// weight of function A in base profile WB(A), and the weight of function
2036 /// A in test profile WT(A), compute PS(base_profile, test_profile) =
2037 /// sum_A(FS(A) * avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0
2038 /// meaning no-overlap.
2039 void computeSampleProfileOverlap(raw_fd_ostream &OS);
2040
2041 /// Initialize ProfOverlap with the sum of samples in base and test
2042 /// profiles. This function also computes and keeps the sum of samples and
2043 /// max sample counts of each function in BaseStats and TestStats for later
2044 /// use to avoid re-computations.
2045 void initializeSampleProfileOverlap();
2046
2047 /// Load profiles specified by BaseFilename and TestFilename.
2048 std::error_code loadProfiles();
2049
2050 using FuncSampleStatsMap = DenseMap<SampleContext, FuncSampleStats>;
2051
2052private:
2053 SampleOverlapStats ProfOverlap;
2054 SampleOverlapStats HotFuncOverlap;
2055 SampleOverlapStats HotBlockOverlap;
2056 std::string BaseFilename;
2057 std::string TestFilename;
2058 std::unique_ptr<sampleprof::SampleProfileReader> BaseReader;
2059 std::unique_ptr<sampleprof::SampleProfileReader> TestReader;
2060 // BaseStats and TestStats hold FuncSampleStats for each function, with
2061 // function name as the key.
2062 FuncSampleStatsMap BaseStats;
2063 FuncSampleStatsMap TestStats;
2064 // Low similarity threshold in floating point number
2065 double LowSimilarityThreshold;
2066 // Block samples above BaseHotThreshold or TestHotThreshold are considered hot
2067 // for tracking hot blocks.
2068 uint64_t BaseHotThreshold;
2069 uint64_t TestHotThreshold;
2070 // A small threshold used to round the results of floating point accumulations
2071 // to resolve imprecision.
2072 const double Epsilon;
2073 std::multimap<double, SampleOverlapStats, std::greater<double>>
2074 FuncSimilarityDump;
2075 // FuncFilter carries specifications in options --value-cutoff and
2076 // --function.
2077 OverlapFuncFilters FuncFilter;
2078 // Column offsets for printing the function-level details table.
2079 static const unsigned int TestWeightCol = 15;
2080 static const unsigned int SimilarityCol = 30;
2081 static const unsigned int OverlapCol = 43;
2082 static const unsigned int BaseUniqueCol = 53;
2083 static const unsigned int TestUniqueCol = 67;
2084 static const unsigned int BaseSampleCol = 81;
2085 static const unsigned int TestSampleCol = 96;
2086 static const unsigned int FuncNameCol = 111;
2087
2088 /// Return a similarity of two line/block sample counters in the same
2089 /// function in base and test profiles. The line/block-similarity BS(i) is
2090 /// computed as follows:
2091 /// For an offsets i, given the sample count at i in base profile BB(i),
2092 /// the sample count at i in test profile BT(i), the sum of sample counts
2093 /// in this function in base profile SB, and the sum of sample counts in
2094 /// this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB -
2095 /// BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap.
2096 double computeBlockSimilarity(uint64_t BaseSample, uint64_t TestSample,
2097 const SampleOverlapStats &FuncOverlap) const;
2098
2099 void updateHotBlockOverlap(uint64_t BaseSample, uint64_t TestSample,
2100 uint64_t HotBlockCount);
2101
2102 void getHotFunctions(const FuncSampleStatsMap &ProfStats,
2103 FuncSampleStatsMap &HotFunc,
2104 uint64_t HotThreshold) const;
2105
2106 void computeHotFuncOverlap();
2107
2108 /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
2109 /// Difference for two sample units in a matched function according to the
2110 /// given match status.
2111 void updateOverlapStatsForFunction(uint64_t BaseSample, uint64_t TestSample,
2112 uint64_t HotBlockCount,
2113 SampleOverlapStats &FuncOverlap,
2114 double &Difference, MatchStatus Status);
2115
2116 /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
2117 /// Difference for unmatched callees that only present in one profile in a
2118 /// matched caller function.
2119 void updateForUnmatchedCallee(const sampleprof::FunctionSamples &Func,
2120 SampleOverlapStats &FuncOverlap,
2121 double &Difference, MatchStatus Status);
2122
2123 /// This function updates sample overlap statistics of an overlap function in
2124 /// base and test profile. It also calculates a function-internal similarity
2125 /// FIS as follows:
2126 /// For offsets i that have samples in at least one profile in this
2127 /// function A, given BS(i) returned by computeBlockSimilarity(), compute
2128 /// FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with
2129 /// 0.0 meaning no overlap.
2130 double computeSampleFunctionInternalOverlap(
2131 const sampleprof::FunctionSamples &BaseFunc,
2132 const sampleprof::FunctionSamples &TestFunc,
2133 SampleOverlapStats &FuncOverlap);
2134
2135 /// Function-level similarity (FS) is a weighted value over function internal
2136 /// similarity (FIS). This function computes a function's FS from its FIS by
2137 /// applying the weight.
2138 double weightForFuncSimilarity(double FuncSimilarity, uint64_t BaseFuncSample,
2139 uint64_t TestFuncSample) const;
2140
2141 /// The function-level similarity FS(A) for a function A is computed as
2142 /// follows:
2143 /// Compute a function-internal similarity FIS(A) by
2144 /// computeSampleFunctionInternalOverlap(). Then, with the weight of
2145 /// function A in base profile WB(A), and the weight of function A in test
2146 /// profile WT(A), compute FS(A) = FIS(A) * (1.0 - fabs(WB(A) - WT(A)))
2147 /// ranging in [0.0f to 1.0f] with 0.0 meaning no overlap.
2148 double
2149 computeSampleFunctionOverlap(const sampleprof::FunctionSamples *BaseFunc,
2150 const sampleprof::FunctionSamples *TestFunc,
2151 SampleOverlapStats *FuncOverlap,
2152 uint64_t BaseFuncSample,
2153 uint64_t TestFuncSample);
2154
2155 /// Profile-level similarity (PS) is a weighted aggregate over function-level
2156 /// similarities (FS). This method weights the FS value by the function
2157 /// weights in the base and test profiles for the aggregation.
2158 double weightByImportance(double FuncSimilarity, uint64_t BaseFuncSample,
2159 uint64_t TestFuncSample) const;
2160};
2161} // end anonymous namespace
2162
2163bool SampleOverlapAggregator::detectZeroSampleProfile(
2164 raw_fd_ostream &OS) const {
2165 bool HaveZeroSample = false;
2166 if (ProfOverlap.BaseSample == 0) {
2167 OS << "Sum of sample counts for profile " << BaseFilename << " is 0.\n";
2168 HaveZeroSample = true;
2169 }
2170 if (ProfOverlap.TestSample == 0) {
2171 OS << "Sum of sample counts for profile " << TestFilename << " is 0.\n";
2172 HaveZeroSample = true;
2173 }
2174 return HaveZeroSample;
2175}
2176
2177double SampleOverlapAggregator::computeBlockSimilarity(
2178 uint64_t BaseSample, uint64_t TestSample,
2179 const SampleOverlapStats &FuncOverlap) const {
2180 double BaseFrac = 0.0;
2181 double TestFrac = 0.0;
2182 if (FuncOverlap.BaseSample > 0)
2183 BaseFrac = static_cast<double>(BaseSample) / FuncOverlap.BaseSample;
2184 if (FuncOverlap.TestSample > 0)
2185 TestFrac = static_cast<double>(TestSample) / FuncOverlap.TestSample;
2186 return 1.0 - std::fabs(x: BaseFrac - TestFrac);
2187}
2188
2189void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample,
2190 uint64_t TestSample,
2191 uint64_t HotBlockCount) {
2192 bool IsBaseHot = (BaseSample >= BaseHotThreshold);
2193 bool IsTestHot = (TestSample >= TestHotThreshold);
2194 if (!IsBaseHot && !IsTestHot)
2195 return;
2196
2197 HotBlockOverlap.UnionCount += HotBlockCount;
2198 if (IsBaseHot)
2199 HotBlockOverlap.BaseCount += HotBlockCount;
2200 if (IsTestHot)
2201 HotBlockOverlap.TestCount += HotBlockCount;
2202 if (IsBaseHot && IsTestHot)
2203 HotBlockOverlap.OverlapCount += HotBlockCount;
2204}
2205
2206void SampleOverlapAggregator::getHotFunctions(
2207 const FuncSampleStatsMap &ProfStats, FuncSampleStatsMap &HotFunc,
2208 uint64_t HotThreshold) const {
2209 for (const auto &F : ProfStats) {
2210 if (isFunctionHot(FuncStats: F.second, HotThreshold))
2211 HotFunc.try_emplace(Key: F.first, Args: F.second);
2212 }
2213}
2214
2215void SampleOverlapAggregator::computeHotFuncOverlap() {
2216 FuncSampleStatsMap BaseHotFunc;
2217 getHotFunctions(ProfStats: BaseStats, HotFunc&: BaseHotFunc, HotThreshold: BaseHotThreshold);
2218 HotFuncOverlap.BaseCount = BaseHotFunc.size();
2219
2220 FuncSampleStatsMap TestHotFunc;
2221 getHotFunctions(ProfStats: TestStats, HotFunc&: TestHotFunc, HotThreshold: TestHotThreshold);
2222 HotFuncOverlap.TestCount = TestHotFunc.size();
2223 HotFuncOverlap.UnionCount = HotFuncOverlap.TestCount;
2224
2225 for (const auto &F : BaseHotFunc) {
2226 if (TestHotFunc.count(Val: F.first))
2227 ++HotFuncOverlap.OverlapCount;
2228 else
2229 ++HotFuncOverlap.UnionCount;
2230 }
2231}
2232
2233void SampleOverlapAggregator::updateOverlapStatsForFunction(
2234 uint64_t BaseSample, uint64_t TestSample, uint64_t HotBlockCount,
2235 SampleOverlapStats &FuncOverlap, double &Difference, MatchStatus Status) {
2236 assert(Status != MS_None &&
2237 "Match status should be updated before updating overlap statistics");
2238 if (Status == MS_FirstUnique) {
2239 TestSample = 0;
2240 FuncOverlap.BaseUniqueSample += BaseSample;
2241 } else if (Status == MS_SecondUnique) {
2242 BaseSample = 0;
2243 FuncOverlap.TestUniqueSample += TestSample;
2244 } else {
2245 ++FuncOverlap.OverlapCount;
2246 }
2247
2248 FuncOverlap.UnionSample += std::max(a: BaseSample, b: TestSample);
2249 FuncOverlap.OverlapSample += std::min(a: BaseSample, b: TestSample);
2250 Difference +=
2251 1.0 - computeBlockSimilarity(BaseSample, TestSample, FuncOverlap);
2252 updateHotBlockOverlap(BaseSample, TestSample, HotBlockCount);
2253}
2254
2255void SampleOverlapAggregator::updateForUnmatchedCallee(
2256 const sampleprof::FunctionSamples &Func, SampleOverlapStats &FuncOverlap,
2257 double &Difference, MatchStatus Status) {
2258 assert((Status == MS_FirstUnique || Status == MS_SecondUnique) &&
2259 "Status must be either of the two unmatched cases");
2260 FuncSampleStats FuncStats;
2261 if (Status == MS_FirstUnique) {
2262 getFuncSampleStats(Func, FuncStats, HotThreshold: BaseHotThreshold);
2263 updateOverlapStatsForFunction(BaseSample: FuncStats.SampleSum, TestSample: 0,
2264 HotBlockCount: FuncStats.HotBlockCount, FuncOverlap,
2265 Difference, Status);
2266 } else {
2267 getFuncSampleStats(Func, FuncStats, HotThreshold: TestHotThreshold);
2268 updateOverlapStatsForFunction(BaseSample: 0, TestSample: FuncStats.SampleSum,
2269 HotBlockCount: FuncStats.HotBlockCount, FuncOverlap,
2270 Difference, Status);
2271 }
2272}
2273
2274double SampleOverlapAggregator::computeSampleFunctionInternalOverlap(
2275 const sampleprof::FunctionSamples &BaseFunc,
2276 const sampleprof::FunctionSamples &TestFunc,
2277 SampleOverlapStats &FuncOverlap) {
2278
2279 using namespace sampleprof;
2280
2281 double Difference = 0;
2282
2283 // Accumulate Difference for regular line/block samples in the function.
2284 // We match them through sort-merge join algorithm because
2285 // FunctionSamples::getBodySamples() returns a map of sample counters ordered
2286 // by their offsets.
2287 MatchStep<BodySampleMap::const_iterator> BlockIterStep(
2288 BaseFunc.getBodySamples().cbegin(), BaseFunc.getBodySamples().cend(),
2289 TestFunc.getBodySamples().cbegin(), TestFunc.getBodySamples().cend());
2290 BlockIterStep.updateOneStep();
2291 while (!BlockIterStep.areBothFinished()) {
2292 uint64_t BaseSample =
2293 BlockIterStep.isFirstFinished()
2294 ? 0
2295 : BlockIterStep.getFirstIter()->second.getSamples();
2296 uint64_t TestSample =
2297 BlockIterStep.isSecondFinished()
2298 ? 0
2299 : BlockIterStep.getSecondIter()->second.getSamples();
2300 updateOverlapStatsForFunction(BaseSample, TestSample, HotBlockCount: 1, FuncOverlap,
2301 Difference, Status: BlockIterStep.getMatchStatus());
2302
2303 BlockIterStep.updateOneStep();
2304 }
2305
2306 // Accumulate Difference for callsite lines in the function. We match
2307 // them through sort-merge algorithm because
2308 // FunctionSamples::getCallsiteSamples() returns a map of callsite records
2309 // ordered by their offsets.
2310 MatchStep<CallsiteSampleMap::const_iterator> CallsiteIterStep(
2311 BaseFunc.getCallsiteSamples().cbegin(),
2312 BaseFunc.getCallsiteSamples().cend(),
2313 TestFunc.getCallsiteSamples().cbegin(),
2314 TestFunc.getCallsiteSamples().cend());
2315 CallsiteIterStep.updateOneStep();
2316 while (!CallsiteIterStep.areBothFinished()) {
2317 MatchStatus CallsiteStepStatus = CallsiteIterStep.getMatchStatus();
2318 assert(CallsiteStepStatus != MS_None &&
2319 "Match status should be updated before entering loop body");
2320
2321 if (CallsiteStepStatus != MS_Match) {
2322 auto Callsite = (CallsiteStepStatus == MS_FirstUnique)
2323 ? CallsiteIterStep.getFirstIter()
2324 : CallsiteIterStep.getSecondIter();
2325 for (const auto &F : Callsite->second)
2326 updateForUnmatchedCallee(Func: F.second, FuncOverlap, Difference,
2327 Status: CallsiteStepStatus);
2328 } else {
2329 // There may be multiple inlinees at the same offset, so we need to try
2330 // matching all of them. This match is implemented through sort-merge
2331 // algorithm because callsite records at the same offset are ordered by
2332 // function names.
2333 MatchStep<FunctionSamplesMap::const_iterator> CalleeIterStep(
2334 CallsiteIterStep.getFirstIter()->second.cbegin(),
2335 CallsiteIterStep.getFirstIter()->second.cend(),
2336 CallsiteIterStep.getSecondIter()->second.cbegin(),
2337 CallsiteIterStep.getSecondIter()->second.cend());
2338 CalleeIterStep.updateOneStep();
2339 while (!CalleeIterStep.areBothFinished()) {
2340 MatchStatus CalleeStepStatus = CalleeIterStep.getMatchStatus();
2341 if (CalleeStepStatus != MS_Match) {
2342 auto Callee = (CalleeStepStatus == MS_FirstUnique)
2343 ? CalleeIterStep.getFirstIter()
2344 : CalleeIterStep.getSecondIter();
2345 updateForUnmatchedCallee(Func: Callee->second, FuncOverlap, Difference,
2346 Status: CalleeStepStatus);
2347 } else {
2348 // An inlined function can contain other inlinees inside, so compute
2349 // the Difference recursively.
2350 Difference += 2.0 - 2 * computeSampleFunctionInternalOverlap(
2351 BaseFunc: CalleeIterStep.getFirstIter()->second,
2352 TestFunc: CalleeIterStep.getSecondIter()->second,
2353 FuncOverlap);
2354 }
2355 CalleeIterStep.updateOneStep();
2356 }
2357 }
2358 CallsiteIterStep.updateOneStep();
2359 }
2360
2361 // Difference reflects the total differences of line/block samples in this
2362 // function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to
2363 // reflect the similarity between function profiles in [0.0f to 1.0f].
2364 return (2.0 - Difference) / 2;
2365}
2366
2367double SampleOverlapAggregator::weightForFuncSimilarity(
2368 double FuncInternalSimilarity, uint64_t BaseFuncSample,
2369 uint64_t TestFuncSample) const {
2370 // Compute the weight as the distance between the function weights in two
2371 // profiles.
2372 double BaseFrac = 0.0;
2373 double TestFrac = 0.0;
2374 assert(ProfOverlap.BaseSample > 0 &&
2375 "Total samples in base profile should be greater than 0");
2376 BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample;
2377 assert(ProfOverlap.TestSample > 0 &&
2378 "Total samples in test profile should be greater than 0");
2379 TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample;
2380 double WeightDistance = std::fabs(x: BaseFrac - TestFrac);
2381
2382 // Take WeightDistance into the similarity.
2383 return FuncInternalSimilarity * (1 - WeightDistance);
2384}
2385
2386double
2387SampleOverlapAggregator::weightByImportance(double FuncSimilarity,
2388 uint64_t BaseFuncSample,
2389 uint64_t TestFuncSample) const {
2390
2391 double BaseFrac = 0.0;
2392 double TestFrac = 0.0;
2393 assert(ProfOverlap.BaseSample > 0 &&
2394 "Total samples in base profile should be greater than 0");
2395 BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample / 2.0;
2396 assert(ProfOverlap.TestSample > 0 &&
2397 "Total samples in test profile should be greater than 0");
2398 TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample / 2.0;
2399 return FuncSimilarity * (BaseFrac + TestFrac);
2400}
2401
2402double SampleOverlapAggregator::computeSampleFunctionOverlap(
2403 const sampleprof::FunctionSamples *BaseFunc,
2404 const sampleprof::FunctionSamples *TestFunc,
2405 SampleOverlapStats *FuncOverlap, uint64_t BaseFuncSample,
2406 uint64_t TestFuncSample) {
2407 // Default function internal similarity before weighted, meaning two functions
2408 // has no overlap.
2409 const double DefaultFuncInternalSimilarity = 0;
2410 double FuncSimilarity;
2411 double FuncInternalSimilarity;
2412
2413 // If BaseFunc or TestFunc is nullptr, it means the functions do not overlap.
2414 // In this case, we use DefaultFuncInternalSimilarity as the function internal
2415 // similarity.
2416 if (!BaseFunc || !TestFunc) {
2417 FuncInternalSimilarity = DefaultFuncInternalSimilarity;
2418 } else {
2419 assert(FuncOverlap != nullptr &&
2420 "FuncOverlap should be provided in this case");
2421 FuncInternalSimilarity = computeSampleFunctionInternalOverlap(
2422 BaseFunc: *BaseFunc, TestFunc: *TestFunc, FuncOverlap&: *FuncOverlap);
2423 // Now, FuncInternalSimilarity may be a little less than 0 due to
2424 // imprecision of floating point accumulations. Make it zero if the
2425 // difference is below Epsilon.
2426 FuncInternalSimilarity = (std::fabs(x: FuncInternalSimilarity - 0) < Epsilon)
2427 ? 0
2428 : FuncInternalSimilarity;
2429 }
2430 FuncSimilarity = weightForFuncSimilarity(FuncInternalSimilarity,
2431 BaseFuncSample, TestFuncSample);
2432 return FuncSimilarity;
2433}
2434
2435void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
2436 using namespace sampleprof;
2437
2438 DenseMap<SampleContext, const FunctionSamples *> BaseFuncProf;
2439 const auto &BaseProfiles = BaseReader->getProfiles();
2440 for (const auto &BaseFunc : BaseProfiles) {
2441 BaseFuncProf.try_emplace(Key: BaseFunc.second.getContext(), Args: &(BaseFunc.second));
2442 }
2443 ProfOverlap.UnionCount = BaseFuncProf.size();
2444
2445 const auto &TestProfiles = TestReader->getProfiles();
2446 for (const auto &TestFunc : TestProfiles) {
2447 SampleOverlapStats FuncOverlap;
2448 FuncOverlap.TestName = TestFunc.second.getContext();
2449 assert(TestStats.count(FuncOverlap.TestName) &&
2450 "TestStats should have records for all functions in test profile "
2451 "except inlinees");
2452 FuncOverlap.TestSample = TestStats[FuncOverlap.TestName].SampleSum;
2453
2454 bool Matched = false;
2455 const auto Match = BaseFuncProf.find(Val: FuncOverlap.TestName);
2456 if (Match == BaseFuncProf.end()) {
2457 const FuncSampleStats &FuncStats = TestStats[FuncOverlap.TestName];
2458 ++ProfOverlap.TestUniqueCount;
2459 ProfOverlap.TestUniqueSample += FuncStats.SampleSum;
2460 FuncOverlap.TestUniqueSample = FuncStats.SampleSum;
2461
2462 updateHotBlockOverlap(BaseSample: 0, TestSample: FuncStats.SampleSum, HotBlockCount: FuncStats.HotBlockCount);
2463
2464 double FuncSimilarity = computeSampleFunctionOverlap(
2465 BaseFunc: nullptr, TestFunc: nullptr, FuncOverlap: nullptr, BaseFuncSample: 0, TestFuncSample: FuncStats.SampleSum);
2466 ProfOverlap.Similarity +=
2467 weightByImportance(FuncSimilarity, BaseFuncSample: 0, TestFuncSample: FuncStats.SampleSum);
2468
2469 ++ProfOverlap.UnionCount;
2470 ProfOverlap.UnionSample += FuncStats.SampleSum;
2471 } else {
2472 ++ProfOverlap.OverlapCount;
2473
2474 // Two functions match with each other. Compute function-level overlap and
2475 // aggregate them into profile-level overlap.
2476 FuncOverlap.BaseName = Match->second->getContext();
2477 assert(BaseStats.count(FuncOverlap.BaseName) &&
2478 "BaseStats should have records for all functions in base profile "
2479 "except inlinees");
2480 FuncOverlap.BaseSample = BaseStats[FuncOverlap.BaseName].SampleSum;
2481
2482 FuncOverlap.Similarity = computeSampleFunctionOverlap(
2483 BaseFunc: Match->second, TestFunc: &TestFunc.second, FuncOverlap: &FuncOverlap, BaseFuncSample: FuncOverlap.BaseSample,
2484 TestFuncSample: FuncOverlap.TestSample);
2485 ProfOverlap.Similarity +=
2486 weightByImportance(FuncSimilarity: FuncOverlap.Similarity, BaseFuncSample: FuncOverlap.BaseSample,
2487 TestFuncSample: FuncOverlap.TestSample);
2488 ProfOverlap.OverlapSample += FuncOverlap.OverlapSample;
2489 ProfOverlap.UnionSample += FuncOverlap.UnionSample;
2490
2491 // Accumulate the percentage of base unique and test unique samples into
2492 // ProfOverlap.
2493 ProfOverlap.BaseUniqueSample += FuncOverlap.BaseUniqueSample;
2494 ProfOverlap.TestUniqueSample += FuncOverlap.TestUniqueSample;
2495
2496 // Remove matched base functions for later reporting functions not found
2497 // in test profile.
2498 BaseFuncProf.erase(I: Match);
2499 Matched = true;
2500 }
2501
2502 // Print function-level similarity information if specified by options.
2503 assert(TestStats.count(FuncOverlap.TestName) &&
2504 "TestStats should have records for all functions in test profile "
2505 "except inlinees");
2506 if (TestStats[FuncOverlap.TestName].MaxSample >= FuncFilter.ValueCutoff ||
2507 (Matched && FuncOverlap.Similarity < LowSimilarityThreshold) ||
2508 (Matched && !FuncFilter.NameFilter.empty() &&
2509 FuncOverlap.BaseName.toString().find(str: FuncFilter.NameFilter) !=
2510 std::string::npos)) {
2511 assert(ProfOverlap.BaseSample > 0 &&
2512 "Total samples in base profile should be greater than 0");
2513 FuncOverlap.BaseWeight =
2514 static_cast<double>(FuncOverlap.BaseSample) / ProfOverlap.BaseSample;
2515 assert(ProfOverlap.TestSample > 0 &&
2516 "Total samples in test profile should be greater than 0");
2517 FuncOverlap.TestWeight =
2518 static_cast<double>(FuncOverlap.TestSample) / ProfOverlap.TestSample;
2519 FuncSimilarityDump.emplace(args&: FuncOverlap.BaseWeight, args&: FuncOverlap);
2520 }
2521 }
2522
2523 // Traverse through functions in base profile but not in test profile.
2524 for (const auto &F : BaseFuncProf) {
2525 assert(BaseStats.count(F.second->getContext()) &&
2526 "BaseStats should have records for all functions in base profile "
2527 "except inlinees");
2528 const FuncSampleStats &FuncStats = BaseStats[F.second->getContext()];
2529 ++ProfOverlap.BaseUniqueCount;
2530 ProfOverlap.BaseUniqueSample += FuncStats.SampleSum;
2531
2532 updateHotBlockOverlap(BaseSample: FuncStats.SampleSum, TestSample: 0, HotBlockCount: FuncStats.HotBlockCount);
2533
2534 double FuncSimilarity = computeSampleFunctionOverlap(
2535 BaseFunc: nullptr, TestFunc: nullptr, FuncOverlap: nullptr, BaseFuncSample: FuncStats.SampleSum, TestFuncSample: 0);
2536 ProfOverlap.Similarity +=
2537 weightByImportance(FuncSimilarity, BaseFuncSample: FuncStats.SampleSum, TestFuncSample: 0);
2538
2539 ProfOverlap.UnionSample += FuncStats.SampleSum;
2540 }
2541
2542 // Now, ProfSimilarity may be a little greater than 1 due to imprecision
2543 // of floating point accumulations. Make it 1.0 if the difference is below
2544 // Epsilon.
2545 ProfOverlap.Similarity = (std::fabs(x: ProfOverlap.Similarity - 1) < Epsilon)
2546 ? 1
2547 : ProfOverlap.Similarity;
2548
2549 computeHotFuncOverlap();
2550}
2551
2552void SampleOverlapAggregator::initializeSampleProfileOverlap() {
2553 const auto &BaseProf = BaseReader->getProfiles();
2554 for (const auto &I : BaseProf) {
2555 ++ProfOverlap.BaseCount;
2556 FuncSampleStats FuncStats;
2557 getFuncSampleStats(Func: I.second, FuncStats, HotThreshold: BaseHotThreshold);
2558 ProfOverlap.BaseSample += FuncStats.SampleSum;
2559 BaseStats.try_emplace(Key: I.second.getContext(), Args&: FuncStats);
2560 }
2561
2562 const auto &TestProf = TestReader->getProfiles();
2563 for (const auto &I : TestProf) {
2564 ++ProfOverlap.TestCount;
2565 FuncSampleStats FuncStats;
2566 getFuncSampleStats(Func: I.second, FuncStats, HotThreshold: TestHotThreshold);
2567 ProfOverlap.TestSample += FuncStats.SampleSum;
2568 TestStats.try_emplace(Key: I.second.getContext(), Args&: FuncStats);
2569 }
2570
2571 ProfOverlap.BaseName = StringRef(BaseFilename);
2572 ProfOverlap.TestName = StringRef(TestFilename);
2573}
2574
2575void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream &OS) const {
2576 using namespace sampleprof;
2577
2578 if (FuncSimilarityDump.empty())
2579 return;
2580
2581 formatted_raw_ostream FOS(OS);
2582 FOS << "Function-level details:\n";
2583 FOS << "Base weight";
2584 FOS.PadToColumn(NewCol: TestWeightCol);
2585 FOS << "Test weight";
2586 FOS.PadToColumn(NewCol: SimilarityCol);
2587 FOS << "Similarity";
2588 FOS.PadToColumn(NewCol: OverlapCol);
2589 FOS << "Overlap";
2590 FOS.PadToColumn(NewCol: BaseUniqueCol);
2591 FOS << "Base unique";
2592 FOS.PadToColumn(NewCol: TestUniqueCol);
2593 FOS << "Test unique";
2594 FOS.PadToColumn(NewCol: BaseSampleCol);
2595 FOS << "Base samples";
2596 FOS.PadToColumn(NewCol: TestSampleCol);
2597 FOS << "Test samples";
2598 FOS.PadToColumn(NewCol: FuncNameCol);
2599 FOS << "Function name\n";
2600 for (const auto &F : FuncSimilarityDump) {
2601 double OverlapPercent =
2602 F.second.UnionSample > 0
2603 ? static_cast<double>(F.second.OverlapSample) / F.second.UnionSample
2604 : 0;
2605 double BaseUniquePercent =
2606 F.second.BaseSample > 0
2607 ? static_cast<double>(F.second.BaseUniqueSample) /
2608 F.second.BaseSample
2609 : 0;
2610 double TestUniquePercent =
2611 F.second.TestSample > 0
2612 ? static_cast<double>(F.second.TestUniqueSample) /
2613 F.second.TestSample
2614 : 0;
2615
2616 FOS << format(Fmt: "%.2f%%", Vals: F.second.BaseWeight * 100);
2617 FOS.PadToColumn(NewCol: TestWeightCol);
2618 FOS << format(Fmt: "%.2f%%", Vals: F.second.TestWeight * 100);
2619 FOS.PadToColumn(NewCol: SimilarityCol);
2620 FOS << format(Fmt: "%.2f%%", Vals: F.second.Similarity * 100);
2621 FOS.PadToColumn(NewCol: OverlapCol);
2622 FOS << format(Fmt: "%.2f%%", Vals: OverlapPercent * 100);
2623 FOS.PadToColumn(NewCol: BaseUniqueCol);
2624 FOS << format(Fmt: "%.2f%%", Vals: BaseUniquePercent * 100);
2625 FOS.PadToColumn(NewCol: TestUniqueCol);
2626 FOS << format(Fmt: "%.2f%%", Vals: TestUniquePercent * 100);
2627 FOS.PadToColumn(NewCol: BaseSampleCol);
2628 FOS << F.second.BaseSample;
2629 FOS.PadToColumn(NewCol: TestSampleCol);
2630 FOS << F.second.TestSample;
2631 FOS.PadToColumn(NewCol: FuncNameCol);
2632 FOS << F.second.TestName.toString() << "\n";
2633 }
2634}
2635
2636void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream &OS) const {
2637 OS << "Profile overlap information for base_profile: "
2638 << ProfOverlap.BaseName.toString()
2639 << " and test_profile: " << ProfOverlap.TestName.toString()
2640 << "\nProgram level:\n";
2641
2642 OS << " Whole program profile similarity: "
2643 << format(Fmt: "%.3f%%", Vals: ProfOverlap.Similarity * 100) << "\n";
2644
2645 assert(ProfOverlap.UnionSample > 0 &&
2646 "Total samples in two profile should be greater than 0");
2647 double OverlapPercent =
2648 static_cast<double>(ProfOverlap.OverlapSample) / ProfOverlap.UnionSample;
2649 assert(ProfOverlap.BaseSample > 0 &&
2650 "Total samples in base profile should be greater than 0");
2651 double BaseUniquePercent = static_cast<double>(ProfOverlap.BaseUniqueSample) /
2652 ProfOverlap.BaseSample;
2653 assert(ProfOverlap.TestSample > 0 &&
2654 "Total samples in test profile should be greater than 0");
2655 double TestUniquePercent = static_cast<double>(ProfOverlap.TestUniqueSample) /
2656 ProfOverlap.TestSample;
2657
2658 OS << " Whole program sample overlap: "
2659 << format(Fmt: "%.3f%%", Vals: OverlapPercent * 100) << "\n";
2660 OS << " percentage of samples unique in base profile: "
2661 << format(Fmt: "%.3f%%", Vals: BaseUniquePercent * 100) << "\n";
2662 OS << " percentage of samples unique in test profile: "
2663 << format(Fmt: "%.3f%%", Vals: TestUniquePercent * 100) << "\n";
2664 OS << " total samples in base profile: " << ProfOverlap.BaseSample << "\n"
2665 << " total samples in test profile: " << ProfOverlap.TestSample << "\n";
2666
2667 assert(ProfOverlap.UnionCount > 0 &&
2668 "There should be at least one function in two input profiles");
2669 double FuncOverlapPercent =
2670 static_cast<double>(ProfOverlap.OverlapCount) / ProfOverlap.UnionCount;
2671 OS << " Function overlap: " << format(Fmt: "%.3f%%", Vals: FuncOverlapPercent * 100)
2672 << "\n";
2673 OS << " overlap functions: " << ProfOverlap.OverlapCount << "\n";
2674 OS << " functions unique in base profile: " << ProfOverlap.BaseUniqueCount
2675 << "\n";
2676 OS << " functions unique in test profile: " << ProfOverlap.TestUniqueCount
2677 << "\n";
2678}
2679
2680void SampleOverlapAggregator::dumpHotFuncAndBlockOverlap(
2681 raw_fd_ostream &OS) const {
2682 assert(HotFuncOverlap.UnionCount > 0 &&
2683 "There should be at least one hot function in two input profiles");
2684 OS << " Hot-function overlap: "
2685 << format(Fmt: "%.3f%%", Vals: static_cast<double>(HotFuncOverlap.OverlapCount) /
2686 HotFuncOverlap.UnionCount * 100)
2687 << "\n";
2688 OS << " overlap hot functions: " << HotFuncOverlap.OverlapCount << "\n";
2689 OS << " hot functions unique in base profile: "
2690 << HotFuncOverlap.BaseCount - HotFuncOverlap.OverlapCount << "\n";
2691 OS << " hot functions unique in test profile: "
2692 << HotFuncOverlap.TestCount - HotFuncOverlap.OverlapCount << "\n";
2693
2694 assert(HotBlockOverlap.UnionCount > 0 &&
2695 "There should be at least one hot block in two input profiles");
2696 OS << " Hot-block overlap: "
2697 << format(Fmt: "%.3f%%", Vals: static_cast<double>(HotBlockOverlap.OverlapCount) /
2698 HotBlockOverlap.UnionCount * 100)
2699 << "\n";
2700 OS << " overlap hot blocks: " << HotBlockOverlap.OverlapCount << "\n";
2701 OS << " hot blocks unique in base profile: "
2702 << HotBlockOverlap.BaseCount - HotBlockOverlap.OverlapCount << "\n";
2703 OS << " hot blocks unique in test profile: "
2704 << HotBlockOverlap.TestCount - HotBlockOverlap.OverlapCount << "\n";
2705}
2706
2707std::error_code SampleOverlapAggregator::loadProfiles() {
2708 using namespace sampleprof;
2709
2710 LLVMContext Context;
2711 auto FS = vfs::getRealFileSystem();
2712 auto BaseReaderOrErr = SampleProfileReader::create(Filename: BaseFilename, C&: Context, FS&: *FS,
2713 P: FSDiscriminatorPassOption);
2714 if (std::error_code EC = BaseReaderOrErr.getError())
2715 exitWithErrorCode(EC, Whence: BaseFilename);
2716
2717 auto TestReaderOrErr = SampleProfileReader::create(Filename: TestFilename, C&: Context, FS&: *FS,
2718 P: FSDiscriminatorPassOption);
2719 if (std::error_code EC = TestReaderOrErr.getError())
2720 exitWithErrorCode(EC, Whence: TestFilename);
2721
2722 BaseReader = std::move(BaseReaderOrErr.get());
2723 TestReader = std::move(TestReaderOrErr.get());
2724
2725 if (std::error_code EC = BaseReader->read())
2726 exitWithErrorCode(EC, Whence: BaseFilename);
2727 if (std::error_code EC = TestReader->read())
2728 exitWithErrorCode(EC, Whence: TestFilename);
2729 if (BaseReader->profileIsProbeBased() != TestReader->profileIsProbeBased())
2730 exitWithError(
2731 Message: "cannot compare probe-based profile with non-probe-based profile");
2732 if (BaseReader->profileIsCS() != TestReader->profileIsCS())
2733 exitWithError(Message: "cannot compare CS profile with non-CS profile");
2734
2735 // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in
2736 // profile summary.
2737 ProfileSummary &BasePS = BaseReader->getSummary();
2738 ProfileSummary &TestPS = TestReader->getSummary();
2739 BaseHotThreshold =
2740 ProfileSummaryBuilder::getHotCountThreshold(DS: BasePS.getDetailedSummary());
2741 TestHotThreshold =
2742 ProfileSummaryBuilder::getHotCountThreshold(DS: TestPS.getDetailedSummary());
2743
2744 return std::error_code();
2745}
2746
2747void overlapSampleProfile(const std::string &BaseFilename,
2748 const std::string &TestFilename,
2749 const OverlapFuncFilters &FuncFilter,
2750 uint64_t SimilarityCutoff, raw_fd_ostream &OS) {
2751 using namespace sampleprof;
2752
2753 // We use 0.000005 to initialize OverlapAggr.Epsilon because the final metrics
2754 // report 2--3 places after decimal point in percentage numbers.
2755 SampleOverlapAggregator OverlapAggr(
2756 BaseFilename, TestFilename,
2757 static_cast<double>(SimilarityCutoff) / 1000000, 0.000005, FuncFilter);
2758 if (std::error_code EC = OverlapAggr.loadProfiles())
2759 exitWithErrorCode(EC);
2760
2761 OverlapAggr.initializeSampleProfileOverlap();
2762 if (OverlapAggr.detectZeroSampleProfile(OS))
2763 return;
2764
2765 OverlapAggr.computeSampleProfileOverlap(OS);
2766
2767 OverlapAggr.dumpProgramSummary(OS);
2768 OverlapAggr.dumpHotFuncAndBlockOverlap(OS);
2769 OverlapAggr.dumpFuncSimilarity(OS);
2770}
2771
2772static int overlap_main() {
2773 std::error_code EC;
2774 raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
2775 if (EC)
2776 exitWithErrorCode(EC, Whence: OutputFilename);
2777
2778 if (ProfileKind == instr)
2779 overlapInstrProfile(BaseFilename, TestFilename,
2780 FuncFilter: OverlapFuncFilters{.ValueCutoff: OverlapValueCutoff, .NameFilter: FuncNameFilter},
2781 OS, IsCS);
2782 else
2783 overlapSampleProfile(BaseFilename, TestFilename,
2784 FuncFilter: OverlapFuncFilters{.ValueCutoff: OverlapValueCutoff, .NameFilter: FuncNameFilter},
2785 SimilarityCutoff, OS);
2786
2787 return 0;
2788}
2789
2790namespace {
2791struct ValueSitesStats {
2792 ValueSitesStats() = default;
2793 uint64_t TotalNumValueSites = 0;
2794 uint64_t TotalNumValueSitesWithValueProfile = 0;
2795 uint64_t TotalNumValues = 0;
2796 std::vector<unsigned> ValueSitesHistogram;
2797};
2798} // namespace
2799
2800static void traverseAllValueSites(const InstrProfRecord &Func, uint32_t VK,
2801 ValueSitesStats &Stats, raw_fd_ostream &OS,
2802 InstrProfSymtab *Symtab) {
2803 uint32_t NS = Func.getNumValueSites(ValueKind: VK);
2804 Stats.TotalNumValueSites += NS;
2805 for (size_t I = 0; I < NS; ++I) {
2806 auto VD = Func.getValueArrayForSite(ValueKind: VK, Site: I);
2807 uint32_t NV = VD.size();
2808 if (NV == 0)
2809 continue;
2810 Stats.TotalNumValues += NV;
2811 Stats.TotalNumValueSitesWithValueProfile++;
2812 if (NV > Stats.ValueSitesHistogram.size())
2813 Stats.ValueSitesHistogram.resize(new_size: NV, x: 0);
2814 Stats.ValueSitesHistogram[NV - 1]++;
2815
2816 uint64_t SiteSum = 0;
2817 for (const auto &V : VD)
2818 SiteSum += V.Count;
2819 if (SiteSum == 0)
2820 SiteSum = 1;
2821
2822 for (const auto &V : VD) {
2823 OS << "\t[ " << format(Fmt: "%2u", Vals: I) << ", ";
2824 if (Symtab == nullptr)
2825 OS << format(Fmt: "%4" PRIu64, Vals: V.Value);
2826 else
2827 OS << Symtab->getFuncOrVarName(MD5Hash: V.Value);
2828 OS << ", " << format(Fmt: "%10" PRId64, Vals: V.Count) << " ] ("
2829 << format(Fmt: "%.2f%%", Vals: (V.Count * 100.0 / SiteSum)) << ")\n";
2830 }
2831 }
2832}
2833
2834static void showValueSitesStats(raw_fd_ostream &OS, uint32_t VK,
2835 ValueSitesStats &Stats) {
2836 OS << " Total number of sites: " << Stats.TotalNumValueSites << "\n";
2837 OS << " Total number of sites with values: "
2838 << Stats.TotalNumValueSitesWithValueProfile << "\n";
2839 OS << " Total number of profiled values: " << Stats.TotalNumValues << "\n";
2840
2841 OS << " Value sites histogram:\n\tNumTargets, SiteCount\n";
2842 for (unsigned I = 0; I < Stats.ValueSitesHistogram.size(); I++) {
2843 if (Stats.ValueSitesHistogram[I] > 0)
2844 OS << "\t" << I + 1 << ", " << Stats.ValueSitesHistogram[I] << "\n";
2845 }
2846}
2847
2848static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
2849 if (SFormat == ShowFormat::Json)
2850 exitWithError(Message: "JSON output is not supported for instr profiles");
2851 if (SFormat == ShowFormat::Yaml)
2852 exitWithError(Message: "YAML output is not supported for instr profiles");
2853 auto FS = vfs::getRealFileSystem();
2854 auto ReaderOrErr = InstrProfReader::create(Path: Filename, FS&: *FS);
2855 std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
2856 if (Cutoffs.empty() && (ShowDetailedSummary || ShowHotFuncList))
2857 Cutoffs = ProfileSummaryBuilder::DefaultCutoffs;
2858 InstrProfSummaryBuilder Builder(std::move(Cutoffs));
2859 if (Error E = ReaderOrErr.takeError())
2860 exitWithError(E: std::move(E), Whence: Filename);
2861
2862 auto Reader = std::move(ReaderOrErr.get());
2863 bool IsIRInstr = Reader->isIRLevelProfile();
2864 size_t ShownFunctions = 0;
2865 size_t BelowCutoffFunctions = 0;
2866 int NumVPKind = IPVK_Last - IPVK_First + 1;
2867 std::vector<ValueSitesStats> VPStats(NumVPKind);
2868
2869 std::vector<std::pair<StringRef, uint64_t>> NameAndMaxCount;
2870
2871 if (!TextFormat && OnlyListBelow) {
2872 OS << "The list of functions with the maximum counter less than "
2873 << ShowValueCutoff << ":\n";
2874 }
2875
2876 // Add marker so that IR-level instrumentation round-trips properly.
2877 if (TextFormat && IsIRInstr)
2878 OS << ":ir\n";
2879
2880 for (const auto &Func : *Reader) {
2881 if (Reader->isIRLevelProfile()) {
2882 bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(FuncHash: Func.Hash);
2883 if (FuncIsCS != ShowCS)
2884 continue;
2885 }
2886 bool Show = ShowAllFunctions ||
2887 (!FuncNameFilter.empty() && Func.Name.contains(Other: FuncNameFilter));
2888
2889 bool doTextFormatDump = (Show && TextFormat);
2890
2891 if (doTextFormatDump) {
2892 InstrProfSymtab &Symtab = Reader->getSymtab();
2893 InstrProfWriter::writeRecordInText(Name: Func.Name, Hash: Func.Hash, Counters: Func, Symtab,
2894 OS);
2895 continue;
2896 }
2897
2898 assert(Func.Counts.size() > 0 && "function missing entry counter");
2899 Builder.addRecord(Func);
2900
2901 if (ShowCovered) {
2902 if (llvm::any_of(Range: Func.Counts, P: [](uint64_t C) { return C; }))
2903 OS << Func.Name << "\n";
2904 continue;
2905 }
2906
2907 uint64_t FuncMax = 0;
2908 uint64_t FuncSum = 0;
2909
2910 auto PseudoKind = Func.getCountPseudoKind();
2911 if (PseudoKind != InstrProfRecord::NotPseudo) {
2912 if (Show) {
2913 if (!ShownFunctions)
2914 OS << "Counters:\n";
2915 ++ShownFunctions;
2916 OS << " " << Func.Name << ":\n"
2917 << " Hash: " << format(Fmt: "0x%016" PRIx64, Vals: Func.Hash) << "\n"
2918 << " Counters: " << Func.Counts.size();
2919 if (PseudoKind == InstrProfRecord::PseudoHot)
2920 OS << " <PseudoHot>\n";
2921 else if (PseudoKind == InstrProfRecord::PseudoWarm)
2922 OS << " <PseudoWarm>\n";
2923 else
2924 llvm_unreachable("Unknown PseudoKind");
2925 }
2926 continue;
2927 }
2928
2929 for (uint64_t Count : Func.Counts) {
2930 FuncMax = std::max(a: FuncMax, b: Count);
2931 FuncSum += Count;
2932 }
2933
2934 if (FuncMax < ShowValueCutoff) {
2935 ++BelowCutoffFunctions;
2936 if (OnlyListBelow) {
2937 OS << " " << Func.Name << ": (Max = " << FuncMax
2938 << " Sum = " << FuncSum << ")\n";
2939 }
2940 continue;
2941 } else if (OnlyListBelow)
2942 continue;
2943
2944 if (TopNFunctions || ShowHotFuncList)
2945 NameAndMaxCount.emplace_back(args: Func.Name, args&: FuncMax);
2946
2947 if (Show) {
2948 if (!ShownFunctions)
2949 OS << "Counters:\n";
2950
2951 ++ShownFunctions;
2952
2953 OS << " " << Func.Name << ":\n"
2954 << " Hash: " << format(Fmt: "0x%016" PRIx64, Vals: Func.Hash) << "\n"
2955 << " Counters: " << Func.Counts.size() << "\n";
2956 if (!IsIRInstr)
2957 OS << " Function count: " << Func.Counts[0] << "\n";
2958
2959 if (ShowIndirectCallTargets)
2960 OS << " Indirect Call Site Count: "
2961 << Func.getNumValueSites(ValueKind: IPVK_IndirectCallTarget) << "\n";
2962
2963 if (ShowVTables)
2964 OS << " Number of instrumented vtables: "
2965 << Func.getNumValueSites(ValueKind: IPVK_VTableTarget) << "\n";
2966
2967 uint32_t NumMemOPCalls = Func.getNumValueSites(ValueKind: IPVK_MemOPSize);
2968 if (ShowMemOPSizes && NumMemOPCalls > 0)
2969 OS << " Number of Memory Intrinsics Calls: " << NumMemOPCalls
2970 << "\n";
2971
2972 if (ShowCounts) {
2973 OS << " Block counts: [";
2974 size_t Start = (IsIRInstr ? 0 : 1);
2975 for (size_t I = Start, E = Func.Counts.size(); I < E; ++I) {
2976 OS << (I == Start ? "" : ", ") << Func.Counts[I];
2977 }
2978 OS << "]\n";
2979
2980 // Show uniformity bits if present
2981 if (!Func.UniformityBits.empty()) {
2982 OS << " Block uniformity: [";
2983 for (size_t I = Start, E = Func.Counts.size(); I < E; ++I) {
2984 bool IsUniform = Func.isBlockUniform(BlockIdx: I);
2985 OS << (I == Start ? "" : ", ") << (IsUniform ? "U" : "D");
2986 }
2987 OS << "]\n";
2988 }
2989 }
2990
2991 if (ShowIndirectCallTargets) {
2992 OS << " Indirect Target Results:\n";
2993 traverseAllValueSites(Func, VK: IPVK_IndirectCallTarget,
2994 Stats&: VPStats[IPVK_IndirectCallTarget], OS,
2995 Symtab: &(Reader->getSymtab()));
2996 }
2997
2998 if (ShowVTables) {
2999 OS << " VTable Results:\n";
3000 traverseAllValueSites(Func, VK: IPVK_VTableTarget,
3001 Stats&: VPStats[IPVK_VTableTarget], OS,
3002 Symtab: &(Reader->getSymtab()));
3003 }
3004
3005 if (ShowMemOPSizes && NumMemOPCalls > 0) {
3006 OS << " Memory Intrinsic Size Results:\n";
3007 traverseAllValueSites(Func, VK: IPVK_MemOPSize, Stats&: VPStats[IPVK_MemOPSize], OS,
3008 Symtab: nullptr);
3009 }
3010 }
3011 }
3012 if (Reader->hasError())
3013 exitWithError(E: Reader->getError(), Whence: Filename);
3014
3015 if (TextFormat || ShowCovered)
3016 return 0;
3017 std::unique_ptr<ProfileSummary> PS(Builder.getSummary());
3018 bool IsIR = Reader->isIRLevelProfile();
3019 OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end");
3020 if (IsIR) {
3021 OS << " entry_first = " << Reader->instrEntryBBEnabled();
3022 OS << " instrument_loop_entries = " << Reader->instrLoopEntriesEnabled();
3023 }
3024 OS << "\n";
3025 if (ShowAllFunctions || !FuncNameFilter.empty())
3026 OS << "Functions shown: " << ShownFunctions << "\n";
3027 PS->printSummary(OS);
3028 if (ShowValueCutoff > 0) {
3029 OS << "Number of functions with maximum count (< " << ShowValueCutoff
3030 << "): " << BelowCutoffFunctions << "\n";
3031 OS << "Number of functions with maximum count (>= " << ShowValueCutoff
3032 << "): " << PS->getNumFunctions() - BelowCutoffFunctions << "\n";
3033 }
3034
3035 // Sort by MaxCount in decreasing order
3036 llvm::stable_sort(Range&: NameAndMaxCount, C: [](const auto &L, const auto &R) {
3037 return L.second > R.second;
3038 });
3039 if (TopNFunctions) {
3040 OS << "Top " << TopNFunctions
3041 << " functions with the largest internal block counts: \n";
3042 auto TopFuncs = ArrayRef(NameAndMaxCount).take_front(N: TopNFunctions);
3043 for (auto [Name, MaxCount] : TopFuncs)
3044 OS << " " << Name << ", max count = " << MaxCount << "\n";
3045 }
3046
3047 if (ShowHotFuncList) {
3048 auto HotCountThreshold =
3049 ProfileSummaryBuilder::getHotCountThreshold(DS: PS->getDetailedSummary());
3050 OS << "# Hot count threshold: " << HotCountThreshold << "\n";
3051 for (auto [Name, MaxCount] : NameAndMaxCount) {
3052 if (MaxCount < HotCountThreshold)
3053 break;
3054 OS << Name << "\n";
3055 }
3056 }
3057
3058 if (ShownFunctions && ShowIndirectCallTargets) {
3059 OS << "Statistics for indirect call sites profile:\n";
3060 showValueSitesStats(OS, VK: IPVK_IndirectCallTarget,
3061 Stats&: VPStats[IPVK_IndirectCallTarget]);
3062 }
3063
3064 if (ShownFunctions && ShowVTables) {
3065 OS << "Statistics for vtable profile:\n";
3066 showValueSitesStats(OS, VK: IPVK_VTableTarget, Stats&: VPStats[IPVK_VTableTarget]);
3067 }
3068
3069 if (ShownFunctions && ShowMemOPSizes) {
3070 OS << "Statistics for memory intrinsic calls sizes profile:\n";
3071 showValueSitesStats(OS, VK: IPVK_MemOPSize, Stats&: VPStats[IPVK_MemOPSize]);
3072 }
3073
3074 if (ShowDetailedSummary)
3075 PS->printDetailedSummary(OS);
3076
3077 if (ShowBinaryIds)
3078 if (Error E = Reader->printBinaryIds(OS))
3079 exitWithError(E: std::move(E), Whence: Filename);
3080
3081 if (ShowProfileVersion)
3082 OS << "Profile version: " << Reader->getVersion() << "\n";
3083
3084 if (ShowTemporalProfTraces) {
3085 auto &Traces = Reader->getTemporalProfTraces();
3086 OS << "Temporal Profile Traces (samples=" << Traces.size()
3087 << " seen=" << Reader->getTemporalProfTraceStreamSize() << "):\n";
3088 for (unsigned i = 0; i < Traces.size(); i++) {
3089 OS << " Temporal Profile Trace " << i << " (weight=" << Traces[i].Weight
3090 << " count=" << Traces[i].FunctionNameRefs.size() << "):\n";
3091 for (auto &NameRef : Traces[i].FunctionNameRefs)
3092 OS << " " << Reader->getSymtab().getFuncOrVarName(MD5Hash: NameRef) << "\n";
3093 }
3094 }
3095
3096 return 0;
3097}
3098
3099static void showSectionInfo(sampleprof::SampleProfileReader *Reader,
3100 raw_fd_ostream &OS) {
3101 if (!Reader->dumpSectionInfo(OS)) {
3102 WithColor::warning() << "-show-sec-info-only is only supported for "
3103 << "sample profile in extbinary format and is "
3104 << "ignored for other formats.\n";
3105 return;
3106 }
3107}
3108
3109namespace {
3110struct HotFuncInfo {
3111 std::string FuncName;
3112 uint64_t TotalCount = 0;
3113 double TotalCountPercent = 0.0f;
3114 uint64_t MaxCount = 0;
3115 uint64_t EntryCount = 0;
3116
3117 HotFuncInfo() = default;
3118
3119 HotFuncInfo(StringRef FN, uint64_t TS, double TSP, uint64_t MS, uint64_t ES)
3120 : FuncName(FN.begin(), FN.end()), TotalCount(TS), TotalCountPercent(TSP),
3121 MaxCount(MS), EntryCount(ES) {}
3122};
3123} // namespace
3124
3125// Print out detailed information about hot functions in PrintValues vector.
3126// Users specify titles and offset of every columns through ColumnTitle and
3127// ColumnOffset. The size of ColumnTitle and ColumnOffset need to be the same
3128// and at least 4. Besides, users can optionally give a HotFuncMetric string to
3129// print out or let it be an empty string.
3130static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle,
3131 const std::vector<int> &ColumnOffset,
3132 const std::vector<HotFuncInfo> &PrintValues,
3133 uint64_t HotFuncCount, uint64_t TotalFuncCount,
3134 uint64_t HotProfCount, uint64_t TotalProfCount,
3135 const std::string &HotFuncMetric,
3136 uint32_t TopNFunctions, raw_fd_ostream &OS) {
3137 assert(ColumnOffset.size() == ColumnTitle.size() &&
3138 "ColumnOffset and ColumnTitle should have the same size");
3139 assert(ColumnTitle.size() >= 4 &&
3140 "ColumnTitle should have at least 4 elements");
3141 assert(TotalFuncCount > 0 &&
3142 "There should be at least one function in the profile");
3143 double TotalProfPercent = 0;
3144 if (TotalProfCount > 0)
3145 TotalProfPercent = static_cast<double>(HotProfCount) / TotalProfCount * 100;
3146
3147 formatted_raw_ostream FOS(OS);
3148 FOS << HotFuncCount << " out of " << TotalFuncCount
3149 << " functions with profile ("
3150 << format(Fmt: "%.2f%%",
3151 Vals: (static_cast<double>(HotFuncCount) / TotalFuncCount * 100))
3152 << ") are considered hot functions";
3153 if (!HotFuncMetric.empty())
3154 FOS << " (" << HotFuncMetric << ")";
3155 FOS << ".\n";
3156 FOS << HotProfCount << " out of " << TotalProfCount << " profile counts ("
3157 << format(Fmt: "%.2f%%", Vals: TotalProfPercent) << ") are from hot functions.\n";
3158
3159 for (size_t I = 0; I < ColumnTitle.size(); ++I) {
3160 FOS.PadToColumn(NewCol: ColumnOffset[I]);
3161 FOS << ColumnTitle[I];
3162 }
3163 FOS << "\n";
3164
3165 uint32_t Count = 0;
3166 for (const auto &R : PrintValues) {
3167 if (TopNFunctions && (Count++ == TopNFunctions))
3168 break;
3169 FOS.PadToColumn(NewCol: ColumnOffset[0]);
3170 FOS << R.TotalCount << " (" << format(Fmt: "%.2f%%", Vals: R.TotalCountPercent) << ")";
3171 FOS.PadToColumn(NewCol: ColumnOffset[1]);
3172 FOS << R.MaxCount;
3173 FOS.PadToColumn(NewCol: ColumnOffset[2]);
3174 FOS << R.EntryCount;
3175 FOS.PadToColumn(NewCol: ColumnOffset[3]);
3176 FOS << R.FuncName << "\n";
3177 }
3178}
3179
3180static int showHotFunctionList(const sampleprof::SampleProfileMap &Profiles,
3181 ProfileSummary &PS, uint32_t TopN,
3182 raw_fd_ostream &OS) {
3183 using namespace sampleprof;
3184
3185 const uint32_t HotFuncCutoff = 990000;
3186 auto &SummaryVector = PS.getDetailedSummary();
3187 uint64_t MinCountThreshold = 0;
3188 for (const ProfileSummaryEntry &SummaryEntry : SummaryVector) {
3189 if (SummaryEntry.Cutoff == HotFuncCutoff) {
3190 MinCountThreshold = SummaryEntry.MinCount;
3191 break;
3192 }
3193 }
3194
3195 // Traverse all functions in the profile and keep only hot functions.
3196 // The following loop also calculates the sum of total samples of all
3197 // functions.
3198 std::multimap<uint64_t, std::pair<const FunctionSamples *, const uint64_t>,
3199 std::greater<uint64_t>>
3200 HotFunc;
3201 uint64_t ProfileTotalSample = 0;
3202 uint64_t HotFuncSample = 0;
3203 uint64_t HotFuncCount = 0;
3204
3205 for (const auto &I : Profiles) {
3206 FuncSampleStats FuncStats;
3207 const FunctionSamples &FuncProf = I.second;
3208 ProfileTotalSample += FuncProf.getTotalSamples();
3209 getFuncSampleStats(Func: FuncProf, FuncStats, HotThreshold: MinCountThreshold);
3210
3211 if (isFunctionHot(FuncStats, HotThreshold: MinCountThreshold)) {
3212 HotFunc.emplace(args: FuncProf.getTotalSamples(),
3213 args: std::make_pair(x: &(I.second), y&: FuncStats.MaxSample));
3214 HotFuncSample += FuncProf.getTotalSamples();
3215 ++HotFuncCount;
3216 }
3217 }
3218
3219 std::vector<std::string> ColumnTitle{"Total sample (%)", "Max sample",
3220 "Entry sample", "Function name"};
3221 std::vector<int> ColumnOffset{0, 24, 42, 58};
3222 std::string Metric =
3223 std::string("max sample >= ") + std::to_string(val: MinCountThreshold);
3224 std::vector<HotFuncInfo> PrintValues;
3225 for (const auto &FuncPair : HotFunc) {
3226 const FunctionSamples &Func = *FuncPair.second.first;
3227 double TotalSamplePercent =
3228 (ProfileTotalSample > 0)
3229 ? (Func.getTotalSamples() * 100.0) / ProfileTotalSample
3230 : 0;
3231 PrintValues.emplace_back(
3232 args: HotFuncInfo(Func.getContext().toString(), Func.getTotalSamples(),
3233 TotalSamplePercent, FuncPair.second.second,
3234 Func.getHeadSamplesEstimate()));
3235 }
3236 dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount,
3237 TotalFuncCount: Profiles.size(), HotProfCount: HotFuncSample, TotalProfCount: ProfileTotalSample,
3238 HotFuncMetric: Metric, TopNFunctions: TopN, OS);
3239
3240 return 0;
3241}
3242
3243static int showSampleProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
3244 if (SFormat == ShowFormat::Yaml)
3245 exitWithError(Message: "YAML output is not supported for sample profiles");
3246 using namespace sampleprof;
3247 LLVMContext Context;
3248 auto FS = vfs::getRealFileSystem();
3249 auto ReaderOrErr = SampleProfileReader::create(Filename, C&: Context, FS&: *FS,
3250 P: FSDiscriminatorPassOption);
3251 if (std::error_code EC = ReaderOrErr.getError())
3252 exitWithErrorCode(EC, Whence: Filename);
3253
3254 auto Reader = std::move(ReaderOrErr.get());
3255 if (ShowSectionInfoOnly) {
3256 showSectionInfo(Reader: Reader.get(), OS);
3257 return 0;
3258 }
3259
3260 if (std::error_code EC = Reader->read())
3261 exitWithErrorCode(EC, Whence: Filename);
3262
3263 if (ShowAllFunctions || FuncNameFilter.empty()) {
3264 if (SFormat == ShowFormat::Json)
3265 Reader->dumpJson(OS);
3266 else
3267 Reader->dump(OS);
3268 } else {
3269 if (SFormat == ShowFormat::Json)
3270 exitWithError(
3271 Message: "the JSON format is supported only when all functions are to "
3272 "be printed");
3273
3274 // TODO: parse context string to support filtering by contexts.
3275 FunctionSamples *FS = Reader->getSamplesFor(Fname: StringRef(FuncNameFilter));
3276 Reader->dumpFunctionProfile(FS: FS ? *FS : FunctionSamples(), OS);
3277 }
3278
3279 if (ShowProfileSymbolList) {
3280 std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
3281 Reader->getProfileSymbolList();
3282 ReaderList->dump(OS);
3283 }
3284
3285 if (ShowDetailedSummary) {
3286 auto &PS = Reader->getSummary();
3287 PS.printSummary(OS);
3288 PS.printDetailedSummary(OS);
3289 }
3290
3291 if (ShowHotFuncList || TopNFunctions)
3292 showHotFunctionList(Profiles: Reader->getProfiles(), PS&: Reader->getSummary(),
3293 TopN: TopNFunctions, OS);
3294
3295 return 0;
3296}
3297
3298static int showMemProfProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
3299 if (SFormat == ShowFormat::Json)
3300 exitWithError(Message: "JSON output is not supported for MemProf");
3301
3302 // Show the raw profile in YAML.
3303 if (memprof::RawMemProfReader::hasFormat(Path: Filename)) {
3304 auto ReaderOr = llvm::memprof::RawMemProfReader::create(
3305 Path: Filename, ProfiledBinary, /*KeepNames=*/KeepName: true);
3306 if (Error E = ReaderOr.takeError()) {
3307 // Since the error can be related to the profile or the binary we do not
3308 // pass whence. Instead additional context is provided where necessary in
3309 // the error message.
3310 exitWithError(E: std::move(E), /*Whence*/ "");
3311 }
3312
3313 std::unique_ptr<llvm::memprof::RawMemProfReader> Reader(
3314 ReaderOr.get().release());
3315
3316 Reader->printYAML(OS);
3317 return 0;
3318 }
3319
3320 // Show the indexed MemProf profile in YAML.
3321 auto FS = vfs::getRealFileSystem();
3322 auto ReaderOrErr = IndexedInstrProfReader::create(Path: Filename, FS&: *FS);
3323 if (Error E = ReaderOrErr.takeError())
3324 exitWithError(E: std::move(E), Whence: Filename);
3325
3326 auto Reader = std::move(ReaderOrErr.get());
3327 memprof::AllMemProfData Data = Reader->getAllMemProfData();
3328
3329 // For v4 and above the summary is serialized in the indexed profile, and can
3330 // be accessed from the reader. Earlier versions build the summary below.
3331 // The summary is emitted as YAML comments at the start of the output.
3332 if (auto *MemProfSum = Reader->getMemProfSummary()) {
3333 MemProfSum->printSummaryYaml(OS);
3334 } else {
3335 memprof::MemProfSummaryBuilder MemProfSumBuilder;
3336 for (auto &Pair : Data.HeapProfileRecords)
3337 MemProfSumBuilder.addRecord(Pair.Record);
3338 MemProfSumBuilder.getSummary()->printSummaryYaml(OS);
3339 }
3340 // Construct yaml::Output with the maximum column width of 80 so that each
3341 // Frame fits in one line.
3342 yaml::Output Yout(OS, nullptr, 80);
3343 Yout << Data;
3344
3345 return 0;
3346}
3347
3348static int showDebugInfoCorrelation(const std::string &Filename,
3349 ShowFormat SFormat, raw_fd_ostream &OS) {
3350 if (SFormat == ShowFormat::Json)
3351 exitWithError(Message: "JSON output is not supported for debug info correlation");
3352 std::unique_ptr<InstrProfCorrelator> Correlator;
3353 if (auto Err =
3354 InstrProfCorrelator::get(Filename, FileKind: InstrProfCorrelator::DEBUG_INFO)
3355 .moveInto(Value&: Correlator))
3356 exitWithError(E: std::move(Err), Whence: Filename);
3357 if (SFormat == ShowFormat::Yaml) {
3358 if (auto Err = Correlator->dumpYaml(MaxWarnings: MaxDbgCorrelationWarnings, OS))
3359 exitWithError(E: std::move(Err), Whence: Filename);
3360 return 0;
3361 }
3362
3363 if (auto Err = Correlator->correlateProfileData(MaxWarnings: MaxDbgCorrelationWarnings))
3364 exitWithError(E: std::move(Err), Whence: Filename);
3365
3366 InstrProfSymtab Symtab;
3367 if (auto Err = Symtab.create(
3368 NameStrings: StringRef(Correlator->getNamesPointer(), Correlator->getNamesSize())))
3369 exitWithError(E: std::move(Err), Whence: Filename);
3370
3371 if (ShowProfileSymbolList)
3372 Symtab.dumpNames(OS);
3373 // TODO: Read "Profile Data Type" from debug info to compute and show how many
3374 // counters the section holds.
3375 if (ShowDetailedSummary)
3376 OS << "Counters section size: 0x"
3377 << Twine::utohexstr(Val: Correlator->getCountersSectionSize()) << " bytes\n";
3378 OS << "Found " << Correlator->getDataSize() << " functions\n";
3379
3380 return 0;
3381}
3382
3383static int show_main(StringRef ProgName) {
3384 if (Filename.empty() && DebugInfoFilename.empty())
3385 exitWithError(
3386 Message: "the positional argument '<profdata-file>' is required unless '--" +
3387 DebugInfoFilename.ArgStr + "' is provided");
3388
3389 if (Filename == OutputFilename) {
3390 errs() << ProgName
3391 << " show: Input file name cannot be the same as the output file "
3392 "name!\n";
3393 return 1;
3394 }
3395 if (JsonFormat)
3396 SFormat = ShowFormat::Json;
3397
3398 std::error_code EC;
3399 raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3400 if (EC)
3401 exitWithErrorCode(EC, Whence: OutputFilename);
3402
3403 if (ShowAllFunctions && !FuncNameFilter.empty())
3404 WithColor::warning() << "-function argument ignored: showing all functions\n";
3405
3406 if (!DebugInfoFilename.empty())
3407 return showDebugInfoCorrelation(Filename: DebugInfoFilename, SFormat, OS);
3408
3409 if (ShowProfileKind == instr)
3410 return showInstrProfile(SFormat, OS);
3411 if (ShowProfileKind == sample)
3412 return showSampleProfile(SFormat, OS);
3413 return showMemProfProfile(SFormat, OS);
3414}
3415
3416static int order_main() {
3417 std::error_code EC;
3418 raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3419 if (EC)
3420 exitWithErrorCode(EC, Whence: OutputFilename);
3421 auto FS = vfs::getRealFileSystem();
3422 auto ReaderOrErr = InstrProfReader::create(Path: Filename, FS&: *FS);
3423 if (Error E = ReaderOrErr.takeError())
3424 exitWithError(E: std::move(E), Whence: Filename);
3425
3426 auto Reader = std::move(ReaderOrErr.get());
3427 for (auto &I : *Reader) {
3428 // Read all entries
3429 (void)I;
3430 }
3431 ArrayRef Traces = Reader->getTemporalProfTraces();
3432 if (NumTestTraces && NumTestTraces >= Traces.size())
3433 exitWithError(
3434 Message: "--" + NumTestTraces.ArgStr +
3435 " must be smaller than the total number of traces: expected: < " +
3436 Twine(Traces.size()) + ", actual: " + Twine(NumTestTraces));
3437 ArrayRef TestTraces = Traces.take_back(N: NumTestTraces);
3438 Traces = Traces.drop_back(N: NumTestTraces);
3439
3440 std::vector<BPFunctionNode> Nodes;
3441 TemporalProfTraceTy::createBPFunctionNodes(Traces, Nodes);
3442 BalancedPartitioningConfig Config;
3443 BalancedPartitioning BP(Config);
3444 BP.run(Nodes);
3445
3446 OS << "# Ordered " << Nodes.size() << " functions\n";
3447 if (!TestTraces.empty()) {
3448 // Since we don't know the symbol sizes, we assume 32 functions per page.
3449 DenseMap<BPFunctionNode::IDT, unsigned> IdToPageNumber;
3450 for (auto &Node : Nodes)
3451 IdToPageNumber[Node.Id] = IdToPageNumber.size() / 32;
3452
3453 SmallSet<unsigned, 0> TouchedPages;
3454 unsigned Area = 0;
3455 for (auto &Trace : TestTraces) {
3456 for (auto Id : Trace.FunctionNameRefs) {
3457 auto It = IdToPageNumber.find(Val: Id);
3458 if (It == IdToPageNumber.end())
3459 continue;
3460 TouchedPages.insert(V: It->getSecond());
3461 Area += TouchedPages.size();
3462 }
3463 TouchedPages.clear();
3464 }
3465 OS << "# Total area under the page fault curve: " << (float)Area << "\n";
3466 }
3467 OS << "# Warning: Mach-O may prefix symbols with \"_\" depending on the "
3468 "linkage and this output does not take that into account. Some "
3469 "post-processing may be required before passing to the linker via "
3470 "-order_file.\n";
3471 for (auto &N : Nodes) {
3472 auto [Filename, ParsedFuncName] =
3473 getParsedIRPGOName(IRPGOName: Reader->getSymtab().getFuncOrVarName(MD5Hash: N.Id));
3474 if (!Filename.empty())
3475 OS << "# " << Filename << "\n";
3476 OS << ParsedFuncName << "\n";
3477 }
3478 return 0;
3479}
3480
3481int main(int argc, const char *argv[]) {
3482 InitLLVM X(argc, argv);
3483 StringRef ProgName(sys::path::filename(path: argv[0]));
3484
3485 if (argc < 2) {
3486 errs()
3487 << ProgName
3488 << ": No subcommand specified! Run llvm-profdata --help for usage.\n";
3489 return 1;
3490 }
3491
3492 cl::ParseCommandLineOptions(argc, argv, Overview: "LLVM profile data\n");
3493
3494 if (ShowSubcommand)
3495 return show_main(ProgName);
3496
3497 if (OrderSubcommand)
3498 return order_main();
3499
3500 if (OverlapSubcommand)
3501 return overlap_main();
3502
3503 if (MergeSubcommand)
3504 return merge_main(ProgName);
3505
3506 errs() << ProgName
3507 << ": Unknown command. Run llvm-profdata --help for usage.\n";
3508 return 1;
3509}
3510