1//===- llvm-profdata.cpp - LLVM profile data tool -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// llvm-profdata merges .profdata files.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/ScopeExit.h"
14#include "llvm/ADT/SmallSet.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/ADT/StringRef.h"
17#include "llvm/Debuginfod/HTTPClient.h"
18#include "llvm/IR/LLVMContext.h"
19#include "llvm/Object/Binary.h"
20#include "llvm/ProfileData/DataAccessProf.h"
21#include "llvm/ProfileData/InstrProfCorrelator.h"
22#include "llvm/ProfileData/InstrProfReader.h"
23#include "llvm/ProfileData/InstrProfWriter.h"
24#include "llvm/ProfileData/MemProf.h"
25#include "llvm/ProfileData/MemProfReader.h"
26#include "llvm/ProfileData/MemProfSummaryBuilder.h"
27#include "llvm/ProfileData/MemProfYAML.h"
28#include "llvm/ProfileData/ProfileCommon.h"
29#include "llvm/ProfileData/SampleProfReader.h"
30#include "llvm/ProfileData/SampleProfWriter.h"
31#include "llvm/Support/BalancedPartitioning.h"
32#include "llvm/Support/CommandLine.h"
33#include "llvm/Support/Discriminator.h"
34#include "llvm/Support/Errc.h"
35#include "llvm/Support/FileSystem.h"
36#include "llvm/Support/Format.h"
37#include "llvm/Support/FormattedStream.h"
38#include "llvm/Support/InitLLVM.h"
39#include "llvm/Support/MD5.h"
40#include "llvm/Support/MemoryBuffer.h"
41#include "llvm/Support/Path.h"
42#include "llvm/Support/Regex.h"
43#include "llvm/Support/ThreadPool.h"
44#include "llvm/Support/Threading.h"
45#include "llvm/Support/VirtualFileSystem.h"
46#include "llvm/Support/WithColor.h"
47#include "llvm/Support/raw_ostream.h"
48#include <algorithm>
49#include <cmath>
50#include <optional>
51
52using namespace llvm;
53using ProfCorrelatorKind = InstrProfCorrelator::ProfCorrelatorKind;
54
55// https://llvm.org/docs/CommandGuide/llvm-profdata.html has documentations
56// on each subcommand.
57cl::SubCommand ShowSubcommand(
58 "show",
59 "Takes a profile data file and displays the profiles. See detailed "
60 "documentation in "
61 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-show");
62cl::SubCommand OrderSubcommand(
63 "order",
64 "Reads temporal profiling traces from a profile and outputs a function "
65 "order that reduces the number of page faults for those traces. See "
66 "detailed documentation in "
67 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-order");
68cl::SubCommand OverlapSubcommand(
69 "overlap",
70 "Computes and displays the overlap between two profiles. See detailed "
71 "documentation in "
72 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-overlap");
73cl::SubCommand MergeSubcommand(
74 "merge",
75 "Takes several profiles and merge them together. See detailed "
76 "documentation in "
77 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge");
78
79namespace {
80enum ProfileKinds { instr, sample, memory };
81enum FailureMode { warnOnly, failIfAnyAreInvalid, failIfAllAreInvalid };
82
83enum ProfileFormat {
84 PF_None = 0,
85 PF_Text,
86 PF_Compact_Binary, // Deprecated
87 PF_Ext_Binary,
88 PF_GCC,
89 PF_Binary
90};
91
92enum class ShowFormat { Text, Json, Yaml };
93} // namespace
94
95// Common options.
96cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
97 cl::init(Val: "-"), cl::desc("Output file"),
98 cl::sub(ShowSubcommand),
99 cl::sub(OrderSubcommand),
100 cl::sub(OverlapSubcommand),
101 cl::sub(MergeSubcommand));
102// NOTE: cl::alias must not have cl::sub(), since aliased option's cl::sub()
103// will be used. llvm::cl::alias::done() method asserts this condition.
104static cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
105 cl::aliasopt(OutputFilename));
106
107// Options common to at least two commands.
108static cl::opt<ProfileKinds> ProfileKind(
109 cl::desc("Profile kind:"), cl::sub(MergeSubcommand),
110 cl::sub(OverlapSubcommand), cl::init(Val: instr),
111 cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
112 clEnumVal(sample, "Sample profile")));
113static cl::opt<std::string> Filename(cl::Positional,
114 cl::desc("<profdata-file>"),
115 cl::sub(ShowSubcommand),
116 cl::sub(OrderSubcommand));
117static cl::opt<unsigned> MaxDbgCorrelationWarnings(
118 "max-debug-info-correlation-warnings",
119 cl::desc("The maximum number of warnings to emit when correlating "
120 "profile from debug info (0 = no limit)"),
121 cl::sub(MergeSubcommand), cl::sub(ShowSubcommand), cl::init(Val: 5));
122static cl::opt<std::string> ProfiledBinary(
123 "profiled-binary", cl::init(Val: ""),
124 cl::desc("Path to binary from which the profile was collected."),
125 cl::sub(ShowSubcommand), cl::sub(MergeSubcommand));
126static cl::opt<std::string> DebugInfoFilename(
127 "debug-info", cl::init(Val: ""),
128 cl::desc(
129 "For show, read and extract profile metadata from debug info and show "
130 "the functions it found. For merge, use the provided debug info to "
131 "correlate the raw profile."),
132 cl::sub(ShowSubcommand), cl::sub(MergeSubcommand));
133static cl::opt<std::string>
134 BinaryFilename("binary-file", cl::init(Val: ""),
135 cl::desc("For merge, use the provided unstripped binary to "
136 "correlate the raw profile."),
137 cl::sub(MergeSubcommand));
138static cl::list<std::string> DebugFileDirectory(
139 "debug-file-directory",
140 cl::desc("Directories to search for object files by build ID"));
141static cl::opt<bool> DebugInfod("debuginfod", cl::init(Val: false), cl::Hidden,
142 cl::sub(MergeSubcommand),
143 cl::desc("Enable debuginfod"));
144static cl::opt<ProfCorrelatorKind> BIDFetcherProfileCorrelate(
145 "correlate",
146 cl::desc("Use debug-info or binary correlation to correlate profiles with "
147 "build id fetcher"),
148 cl::init(Val: InstrProfCorrelator::NONE),
149 cl::values(clEnumValN(InstrProfCorrelator::NONE, "",
150 "No profile correlation"),
151 clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info",
152 "Use debug info to correlate"),
153 clEnumValN(InstrProfCorrelator::BINARY, "binary",
154 "Use binary to correlate")));
155static cl::opt<std::string> FuncNameFilter(
156 "function",
157 cl::desc("Only functions matching the filter are shown in the output. For "
158 "overlapping CSSPGO, this takes a function name with calling "
159 "context."),
160 cl::sub(ShowSubcommand), cl::sub(OverlapSubcommand),
161 cl::sub(MergeSubcommand));
162
163// TODO: Consider creating a template class (e.g., MergeOption, ShowOption) to
164// factor out the common cl::sub in cl::opt constructor for subcommand-specific
165// options.
166
167// Options specific to merge subcommand.
168static cl::list<std::string> InputFilenames(cl::Positional,
169 cl::sub(MergeSubcommand),
170 cl::desc("<filename...>"));
171static cl::list<std::string>
172 WeightedInputFilenames("weighted-input", cl::sub(MergeSubcommand),
173 cl::desc("<weight>,<filename>"));
174static cl::opt<ProfileFormat> OutputFormat(
175 cl::desc("Format of output profile"), cl::sub(MergeSubcommand),
176 cl::init(Val: PF_Ext_Binary),
177 cl::values(clEnumValN(PF_Binary, "binary", "Binary encoding"),
178 clEnumValN(PF_Ext_Binary, "extbinary",
179 "Extensible binary encoding "
180 "(default)"),
181 clEnumValN(PF_Text, "text", "Text encoding"),
182 clEnumValN(PF_GCC, "gcc",
183 "GCC encoding (only meaningful for -sample)")));
184static cl::opt<std::string>
185 InputFilenamesFile("input-files", cl::init(Val: ""), cl::sub(MergeSubcommand),
186 cl::desc("Path to file containing newline-separated "
187 "[<weight>,]<filename> entries"));
188static cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
189 cl::aliasopt(InputFilenamesFile));
190static cl::opt<bool> DumpInputFileList(
191 "dump-input-file-list", cl::init(Val: false), cl::Hidden,
192 cl::sub(MergeSubcommand),
193 cl::desc("Dump the list of input files and their weights, then exit"));
194static cl::opt<std::string> RemappingFile("remapping-file",
195 cl::value_desc("file"),
196 cl::sub(MergeSubcommand),
197 cl::desc("Symbol remapping file"));
198static cl::alias RemappingFileA("r", cl::desc("Alias for --remapping-file"),
199 cl::aliasopt(RemappingFile));
200static cl::opt<bool>
201 UseMD5("use-md5", cl::init(Val: false), cl::Hidden,
202 cl::desc("Choose to use MD5 to represent string in name table (only "
203 "meaningful for -extbinary)"),
204 cl::sub(MergeSubcommand));
205static cl::opt<bool> CompressAllSections(
206 "compress-all-sections", cl::init(Val: false), cl::Hidden,
207 cl::sub(MergeSubcommand),
208 cl::desc("Compress all sections when writing the profile (only "
209 "meaningful for -extbinary)"));
210static cl::opt<bool> SampleMergeColdContext(
211 "sample-merge-cold-context", cl::init(Val: false), cl::Hidden,
212 cl::sub(MergeSubcommand),
213 cl::desc(
214 "Merge context sample profiles whose count is below cold threshold"));
215static cl::opt<bool> SampleTrimColdContext(
216 "sample-trim-cold-context", cl::init(Val: false), cl::Hidden,
217 cl::sub(MergeSubcommand),
218 cl::desc(
219 "Trim context sample profiles whose count is below cold threshold"));
220static cl::opt<uint32_t> SampleColdContextFrameDepth(
221 "sample-frame-depth-for-cold-context", cl::init(Val: 1),
222 cl::sub(MergeSubcommand),
223 cl::desc("Keep the last K frames while merging cold profile. 1 means the "
224 "context-less base profile"));
225static cl::opt<size_t> OutputSizeLimit(
226 "output-size-limit", cl::init(Val: 0), cl::Hidden, cl::sub(MergeSubcommand),
227 cl::desc("Trim cold functions until profile size is below specified "
228 "limit in bytes. This uses a heursitic and functions may be "
229 "excessively trimmed"));
230static cl::opt<bool> GenPartialProfile(
231 "gen-partial-profile", cl::init(Val: false), cl::Hidden,
232 cl::sub(MergeSubcommand),
233 cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
234static cl::opt<bool> SplitLayout(
235 "split-layout", cl::init(Val: false), cl::Hidden, cl::sub(MergeSubcommand),
236 cl::desc("Split the profile to two sections with one containing sample "
237 "profiles with inlined functions and the other without (only "
238 "meaningful for -extbinary)"));
239static cl::opt<std::string> SupplInstrWithSample(
240 "supplement-instr-with-sample", cl::init(Val: ""), cl::Hidden,
241 cl::sub(MergeSubcommand),
242 cl::desc("Supplement an instr profile with sample profile, to correct "
243 "the profile unrepresentativeness issue. The sample "
244 "profile is the input of the flag. Output will be in instr "
245 "format (The flag only works with -instr)"));
246static cl::opt<float> ZeroCounterThreshold(
247 "zero-counter-threshold", cl::init(Val: 0.7), cl::Hidden,
248 cl::sub(MergeSubcommand),
249 cl::desc("For the function which is cold in instr profile but hot in "
250 "sample profile, if the ratio of the number of zero counters "
251 "divided by the total number of counters is above the "
252 "threshold, the profile of the function will be regarded as "
253 "being harmful for performance and will be dropped."));
254static cl::opt<unsigned> SupplMinSizeThreshold(
255 "suppl-min-size-threshold", cl::init(Val: 10), cl::Hidden,
256 cl::sub(MergeSubcommand),
257 cl::desc("If the size of a function is smaller than the threshold, "
258 "assume it can be inlined by PGO early inliner and it won't "
259 "be adjusted based on sample profile."));
260static cl::opt<unsigned> InstrProfColdThreshold(
261 "instr-prof-cold-threshold", cl::init(Val: 0), cl::Hidden,
262 cl::sub(MergeSubcommand),
263 cl::desc("User specified cold threshold for instr profile which will "
264 "override the cold threshold got from profile summary. "));
265// WARNING: This reservoir size value is propagated to any input indexed
266// profiles for simplicity. Changing this value between invocations could
267// result in sample bias.
268static cl::opt<uint64_t> TemporalProfTraceReservoirSize(
269 "temporal-profile-trace-reservoir-size", cl::init(Val: 100),
270 cl::sub(MergeSubcommand),
271 cl::desc("The maximum number of stored temporal profile traces (default: "
272 "100)"));
273static cl::opt<uint64_t> TemporalProfMaxTraceLength(
274 "temporal-profile-max-trace-length", cl::init(Val: 10000),
275 cl::sub(MergeSubcommand),
276 cl::desc("The maximum length of a single temporal profile trace "
277 "(default: 10000)"));
278static cl::opt<std::string> FuncNameNegativeFilter(
279 "no-function", cl::init(Val: ""), cl::sub(MergeSubcommand),
280 cl::desc("Exclude functions matching the filter from the output."));
281
282static cl::opt<FailureMode>
283 FailMode("failure-mode", cl::init(Val: failIfAnyAreInvalid),
284 cl::desc("Failure mode:"), cl::sub(MergeSubcommand),
285 cl::values(clEnumValN(warnOnly, "warn",
286 "Do not fail and just print warnings."),
287 clEnumValN(failIfAnyAreInvalid, "any",
288 "Fail if any profile is invalid."),
289 clEnumValN(failIfAllAreInvalid, "all",
290 "Fail only if all profiles are invalid.")));
291
292static cl::opt<bool> OutputSparse(
293 "sparse", cl::init(Val: false), cl::sub(MergeSubcommand),
294 cl::desc("Generate a sparse profile (only meaningful for -instr)"));
295static cl::opt<unsigned> NumThreads(
296 "num-threads", cl::init(Val: 0), cl::sub(MergeSubcommand),
297 cl::desc("Number of merge threads to use (default: autodetect)"));
298static cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"),
299 cl::aliasopt(NumThreads));
300
301static cl::opt<std::string> ProfileSymbolListFile(
302 "prof-sym-list", cl::init(Val: ""), cl::sub(MergeSubcommand),
303 cl::desc("Path to file containing the list of function symbols "
304 "used to populate profile symbol list"));
305
306static cl::opt<SampleProfileLayout> ProfileLayout(
307 "convert-sample-profile-layout",
308 cl::desc("Convert the generated profile to a profile with a new layout"),
309 cl::sub(MergeSubcommand), cl::init(Val: SPL_None),
310 cl::values(
311 clEnumValN(SPL_Nest, "nest",
312 "Nested profile, the input should be CS flat profile"),
313 clEnumValN(SPL_Flat, "flat",
314 "Profile with nested inlinee flatten out")));
315
316static cl::opt<bool> DropProfileSymbolList(
317 "drop-profile-symbol-list", cl::init(Val: false), cl::Hidden,
318 cl::sub(MergeSubcommand),
319 cl::desc("Drop the profile symbol list when merging AutoFDO profiles "
320 "(only meaningful for -sample)"));
321
322static cl::opt<bool> KeepVTableSymbols(
323 "keep-vtable-symbols", cl::init(Val: false), cl::Hidden,
324 cl::sub(MergeSubcommand),
325 cl::desc("If true, keep the vtable symbols in indexed profiles"));
326
327// Temporary support for writing the previous version of the format, to enable
328// some forward compatibility.
329// TODO: Consider enabling this with future version changes as well, to ease
330// deployment of newer versions of llvm-profdata.
331static cl::opt<bool> DoWritePrevVersion(
332 "write-prev-version", cl::init(Val: false), cl::Hidden,
333 cl::desc("Write the previous version of indexed format, to enable "
334 "some forward compatibility."));
335
336static cl::opt<memprof::IndexedVersion> MemProfVersionRequested(
337 "memprof-version", cl::Hidden, cl::sub(MergeSubcommand),
338 cl::desc("Specify the version of the memprof format to use"),
339 cl::init(Val: memprof::Version3),
340 cl::values(clEnumValN(memprof::Version2, "2", "version 2"),
341 clEnumValN(memprof::Version3, "3", "version 3"),
342 clEnumValN(memprof::Version4, "4", "version 4")));
343
344static cl::opt<bool> MemProfFullSchema(
345 "memprof-full-schema", cl::Hidden, cl::sub(MergeSubcommand),
346 cl::desc("Use the full schema for serialization"), cl::init(Val: false));
347
348static cl::opt<bool>
349 MemprofGenerateRandomHotness("memprof-random-hotness", cl::init(Val: false),
350 cl::Hidden, cl::sub(MergeSubcommand),
351 cl::desc("Generate random hotness values"));
352static cl::opt<unsigned> MemprofGenerateRandomHotnessSeed(
353 "memprof-random-hotness-seed", cl::init(Val: 0), cl::Hidden,
354 cl::sub(MergeSubcommand),
355 cl::desc("Random hotness seed to use (0 to generate new seed)"));
356
357// Options specific to overlap subcommand.
358static cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
359 cl::desc("<base profile file>"),
360 cl::sub(OverlapSubcommand));
361static cl::opt<std::string> TestFilename(cl::Positional, cl::Required,
362 cl::desc("<test profile file>"),
363 cl::sub(OverlapSubcommand));
364
365static cl::opt<unsigned long long> SimilarityCutoff(
366 "similarity-cutoff", cl::init(Val: 0),
367 cl::desc("For sample profiles, list function names (with calling context "
368 "for csspgo) for overlapped functions "
369 "with similarities below the cutoff (percentage times 10000)."),
370 cl::sub(OverlapSubcommand));
371
372static cl::opt<bool> IsCS(
373 "cs", cl::init(Val: false),
374 cl::desc("For context sensitive PGO counts. Does not work with CSSPGO."),
375 cl::sub(OverlapSubcommand));
376
377static cl::opt<unsigned long long> OverlapValueCutoff(
378 "value-cutoff", cl::init(Val: -1),
379 cl::desc(
380 "Function level overlap information for every function (with calling "
381 "context for csspgo) in test "
382 "profile with max count value greater than the parameter value"),
383 cl::sub(OverlapSubcommand));
384
385// Options specific to show subcommand.
386static cl::opt<bool>
387 ShowCounts("counts", cl::init(Val: false),
388 cl::desc("Show counter values for shown functions"),
389 cl::sub(ShowSubcommand));
390static cl::opt<ShowFormat>
391 SFormat("show-format", cl::init(Val: ShowFormat::Text),
392 cl::desc("Emit output in the selected format if supported"),
393 cl::sub(ShowSubcommand),
394 cl::values(clEnumValN(ShowFormat::Text, "text",
395 "emit normal text output (default)"),
396 clEnumValN(ShowFormat::Json, "json", "emit JSON"),
397 clEnumValN(ShowFormat::Yaml, "yaml", "emit YAML")));
398// TODO: Consider replacing this with `--show-format=text-encoding`.
399static cl::opt<bool>
400 TextFormat("text", cl::init(Val: false),
401 cl::desc("Show instr profile data in text dump format"),
402 cl::sub(ShowSubcommand));
403static cl::opt<bool>
404 JsonFormat("json",
405 cl::desc("Show sample profile data in the JSON format "
406 "(deprecated, please use --show-format=json)"),
407 cl::sub(ShowSubcommand));
408static cl::opt<bool> ShowIndirectCallTargets(
409 "ic-targets", cl::init(Val: false),
410 cl::desc("Show indirect call site target values for shown functions"),
411 cl::sub(ShowSubcommand));
412static cl::opt<bool>
413 ShowVTables("show-vtables", cl::init(Val: false),
414 cl::desc("Show vtable names for shown functions"),
415 cl::sub(ShowSubcommand));
416static cl::opt<bool> ShowMemOPSizes(
417 "memop-sizes", cl::init(Val: false),
418 cl::desc("Show the profiled sizes of the memory intrinsic calls "
419 "for shown functions"),
420 cl::sub(ShowSubcommand));
421static cl::opt<bool>
422 ShowDetailedSummary("detailed-summary", cl::init(Val: false),
423 cl::desc("Show detailed profile summary"),
424 cl::sub(ShowSubcommand));
425static cl::list<uint32_t> DetailedSummaryCutoffs(
426 cl::CommaSeparated, "detailed-summary-cutoffs",
427 cl::desc(
428 "Cutoff percentages (times 10000) for generating detailed summary"),
429 cl::value_desc("800000,901000,999999"), cl::sub(ShowSubcommand));
430static cl::opt<bool>
431 ShowHotFuncList("hot-func-list", cl::init(Val: false),
432 cl::desc("Show profile summary of a list of hot functions"),
433 cl::sub(ShowSubcommand));
434static cl::opt<bool>
435 ShowAllFunctions("all-functions", cl::init(Val: false),
436 cl::desc("Details for each and every function"),
437 cl::sub(ShowSubcommand));
438static cl::opt<bool> ShowCS("showcs", cl::init(Val: false),
439 cl::desc("Show context sensitive counts"),
440 cl::sub(ShowSubcommand));
441static cl::opt<ProfileKinds> ShowProfileKind(
442 cl::desc("Profile kind supported by show:"), cl::sub(ShowSubcommand),
443 cl::init(Val: instr),
444 cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
445 clEnumVal(sample, "Sample profile"),
446 clEnumVal(memory, "MemProf memory access profile")));
447static cl::opt<uint32_t> TopNFunctions(
448 "topn", cl::init(Val: 0),
449 cl::desc("Show the list of functions with the largest internal counts"),
450 cl::sub(ShowSubcommand));
451static cl::opt<uint32_t> ShowValueCutoff(
452 "value-cutoff", cl::init(Val: 0),
453 cl::desc("Set the count value cutoff. Functions with the maximum count "
454 "less than this value will not be printed out. (Default is 0)"),
455 cl::sub(ShowSubcommand));
456static cl::opt<bool> OnlyListBelow(
457 "list-below-cutoff", cl::init(Val: false),
458 cl::desc("Only output names of functions whose max count values are "
459 "below the cutoff value"),
460 cl::sub(ShowSubcommand));
461static cl::opt<bool> ShowProfileSymbolList(
462 "show-prof-sym-list", cl::init(Val: false),
463 cl::desc("Show profile symbol list if it exists in the profile. "),
464 cl::sub(ShowSubcommand));
465static cl::opt<bool> ShowSectionInfoOnly(
466 "show-sec-info-only", cl::init(Val: false),
467 cl::desc("Show the information of each section in the sample profile. "
468 "The flag is only usable when the sample profile is in "
469 "extbinary format"),
470 cl::sub(ShowSubcommand));
471static cl::opt<bool> ShowBinaryIds("binary-ids", cl::init(Val: false),
472 cl::desc("Show binary ids in the profile. "),
473 cl::sub(ShowSubcommand));
474static cl::opt<bool> ShowTemporalProfTraces(
475 "temporal-profile-traces",
476 cl::desc("Show temporal profile traces in the profile."),
477 cl::sub(ShowSubcommand));
478
479static cl::opt<bool>
480 ShowCovered("covered", cl::init(Val: false),
481 cl::desc("Show only the functions that have been executed."),
482 cl::sub(ShowSubcommand));
483
484static cl::opt<bool> ShowProfileVersion("profile-version", cl::init(Val: false),
485 cl::desc("Show profile version. "),
486 cl::sub(ShowSubcommand));
487
488// Options specific to order subcommand.
489static cl::opt<unsigned>
490 NumTestTraces("num-test-traces", cl::init(Val: 0),
491 cl::desc("Keep aside the last <num-test-traces> traces in "
492 "the profile when computing the function order and "
493 "instead use them to evaluate that order"),
494 cl::sub(OrderSubcommand));
495
496// We use this string to indicate that there are
497// multiple static functions map to the same name.
498const std::string DuplicateNameStr = "----";
499
500static void warn(Twine Message, StringRef Whence = "", StringRef Hint = "") {
501 WithColor::warning();
502 if (!Whence.empty())
503 errs() << Whence << ": ";
504 errs() << Message << "\n";
505 if (!Hint.empty())
506 WithColor::note() << Hint << "\n";
507}
508
509static void warn(Error E, StringRef Whence = "") {
510 if (E.isA<InstrProfError>()) {
511 handleAllErrors(E: std::move(E), Handlers: [&](const InstrProfError &IPE) {
512 warn(Message: IPE.message(), Whence);
513 });
514 }
515}
516
517static void exitWithError(Twine Message, StringRef Whence = "",
518 StringRef Hint = "") {
519 WithColor::error();
520 if (!Whence.empty())
521 errs() << Whence << ": ";
522 errs() << Message << "\n";
523 if (!Hint.empty())
524 WithColor::note() << Hint << "\n";
525 ::exit(status: 1);
526}
527
528static void exitWithError(Error E, StringRef Whence = "") {
529 if (E.isA<InstrProfError>()) {
530 handleAllErrors(E: std::move(E), Handlers: [&](const InstrProfError &IPE) {
531 instrprof_error instrError = IPE.get();
532 StringRef Hint = "";
533 if (instrError == instrprof_error::unrecognized_format) {
534 // Hint in case user missed specifying the profile type.
535 Hint = "Perhaps you forgot to use the --sample or --memory option?";
536 }
537 exitWithError(Message: IPE.message(), Whence, Hint);
538 });
539 return;
540 }
541
542 exitWithError(Message: toString(E: std::move(E)), Whence);
543}
544
545static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
546 exitWithError(Message: EC.message(), Whence);
547}
548
549static void warnOrExitGivenError(FailureMode FailMode, std::error_code EC,
550 StringRef Whence = "") {
551 if (FailMode == failIfAnyAreInvalid)
552 exitWithErrorCode(EC, Whence);
553 else
554 warn(Message: EC.message(), Whence);
555}
556
557static void handleMergeWriterError(Error E, StringRef WhenceFile = "",
558 StringRef WhenceFunction = "",
559 bool ShowHint = true) {
560 if (!WhenceFile.empty())
561 errs() << WhenceFile << ": ";
562 if (!WhenceFunction.empty())
563 errs() << WhenceFunction << ": ";
564
565 auto IPE = instrprof_error::success;
566 E = handleErrors(E: std::move(E),
567 Hs: [&IPE](std::unique_ptr<InstrProfError> E) -> Error {
568 IPE = E->get();
569 return Error(std::move(E));
570 });
571 errs() << toString(E: std::move(E)) << "\n";
572
573 if (ShowHint) {
574 StringRef Hint = "";
575 if (IPE != instrprof_error::success) {
576 switch (IPE) {
577 case instrprof_error::hash_mismatch:
578 case instrprof_error::count_mismatch:
579 case instrprof_error::value_site_count_mismatch:
580 Hint = "Make sure that all profile data to be merged is generated "
581 "from the same binary.";
582 break;
583 default:
584 break;
585 }
586 }
587
588 if (!Hint.empty())
589 errs() << Hint << "\n";
590 }
591}
592
593namespace {
594/// A remapper from original symbol names to new symbol names based on a file
595/// containing a list of mappings from old name to new name.
596class SymbolRemapper {
597 std::unique_ptr<MemoryBuffer> File;
598 DenseMap<StringRef, StringRef> RemappingTable;
599
600public:
601 /// Build a SymbolRemapper from a file containing a list of old/new symbols.
602 static std::unique_ptr<SymbolRemapper> create(StringRef InputFile) {
603 auto BufOrError = MemoryBuffer::getFileOrSTDIN(Filename: InputFile);
604 if (!BufOrError)
605 exitWithErrorCode(EC: BufOrError.getError(), Whence: InputFile);
606
607 auto Remapper = std::make_unique<SymbolRemapper>();
608 Remapper->File = std::move(BufOrError.get());
609
610 for (line_iterator LineIt(*Remapper->File, /*SkipBlanks=*/true, '#');
611 !LineIt.is_at_eof(); ++LineIt) {
612 std::pair<StringRef, StringRef> Parts = LineIt->split(Separator: ' ');
613 if (Parts.first.empty() || Parts.second.empty() ||
614 Parts.second.count(C: ' ')) {
615 exitWithError(Message: "unexpected line in remapping file",
616 Whence: (InputFile + ":" + Twine(LineIt.line_number())).str(),
617 Hint: "expected 'old_symbol new_symbol'");
618 }
619 Remapper->RemappingTable.insert(KV: Parts);
620 }
621 return Remapper;
622 }
623
624 /// Attempt to map the given old symbol into a new symbol.
625 ///
626 /// \return The new symbol, or \p Name if no such symbol was found.
627 StringRef operator()(StringRef Name) {
628 StringRef New = RemappingTable.lookup(Val: Name);
629 return New.empty() ? Name : New;
630 }
631
632 FunctionId operator()(FunctionId Name) {
633 // MD5 name cannot be remapped.
634 if (!Name.isStringRef())
635 return Name;
636 StringRef New = RemappingTable.lookup(Val: Name.stringRef());
637 return New.empty() ? Name : FunctionId(New);
638 }
639};
640}
641
642struct WeightedFile {
643 std::string Filename;
644 uint64_t Weight;
645};
646typedef SmallVector<WeightedFile, 5> WeightedFileVector;
647
648/// Keep track of merged data and reported errors.
649struct WriterContext {
650 std::mutex Lock;
651 InstrProfWriter Writer;
652 std::vector<std::pair<Error, std::string>> Errors;
653 std::mutex &ErrLock;
654 SmallSet<instrprof_error, 4> &WriterErrorCodes;
655
656 WriterContext(bool IsSparse, std::mutex &ErrLock,
657 SmallSet<instrprof_error, 4> &WriterErrorCodes,
658 uint64_t ReservoirSize = 0, uint64_t MaxTraceLength = 0)
659 : Writer(IsSparse, ReservoirSize, MaxTraceLength, DoWritePrevVersion,
660 MemProfVersionRequested, MemProfFullSchema,
661 MemprofGenerateRandomHotness, MemprofGenerateRandomHotnessSeed),
662 ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {}
663};
664
665/// Computer the overlap b/w profile BaseFilename and TestFileName,
666/// and store the program level result to Overlap.
667static void overlapInput(const std::string &BaseFilename,
668 const std::string &TestFilename, WriterContext *WC,
669 OverlapStats &Overlap,
670 const OverlapFuncFilters &FuncFilter,
671 raw_fd_ostream &OS, bool IsCS) {
672 auto FS = vfs::getRealFileSystem();
673 auto ReaderOrErr = InstrProfReader::create(Path: TestFilename, FS&: *FS);
674 if (Error E = ReaderOrErr.takeError()) {
675 // Skip the empty profiles by returning sliently.
676 auto [ErrorCode, Msg] = InstrProfError::take(E: std::move(E));
677 if (ErrorCode != instrprof_error::empty_raw_profile)
678 WC->Errors.emplace_back(args: make_error<InstrProfError>(Args&: ErrorCode, Args&: Msg),
679 args: TestFilename);
680 return;
681 }
682
683 auto Reader = std::move(ReaderOrErr.get());
684 for (auto &I : *Reader) {
685 OverlapStats FuncOverlap(OverlapStats::FunctionLevel);
686 FuncOverlap.setFuncInfo(Name: I.Name, Hash: I.Hash);
687
688 WC->Writer.overlapRecord(Other: std::move(I), Overlap, FuncLevelOverlap&: FuncOverlap, FuncFilter);
689 FuncOverlap.dump(OS);
690 }
691}
692
693/// Load an input into a writer context.
694static void
695loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
696 const InstrProfCorrelator *Correlator, const StringRef ProfiledBinary,
697 WriterContext *WC, const object::BuildIDFetcher *BIDFetcher = nullptr,
698 const ProfCorrelatorKind *BIDFetcherCorrelatorKind = nullptr) {
699 std::unique_lock<std::mutex> CtxGuard{WC->Lock};
700
701 // Copy the filename, because llvm::ThreadPool copied the input "const
702 // WeightedFile &" by value, making a reference to the filename within it
703 // invalid outside of this packaged task.
704 std::string Filename = Input.Filename;
705
706 using ::llvm::memprof::RawMemProfReader;
707 if (RawMemProfReader::hasFormat(Path: Input.Filename)) {
708 auto ReaderOrErr = RawMemProfReader::create(Path: Input.Filename, ProfiledBinary);
709 if (!ReaderOrErr) {
710 exitWithError(E: ReaderOrErr.takeError(), Whence: Input.Filename);
711 }
712 std::unique_ptr<RawMemProfReader> Reader = std::move(ReaderOrErr.get());
713 // Check if the profile types can be merged, e.g. clang frontend profiles
714 // should not be merged with memprof profiles.
715 if (Error E = WC->Writer.mergeProfileKind(Other: Reader->getProfileKind())) {
716 consumeError(Err: std::move(E));
717 WC->Errors.emplace_back(
718 args: make_error<StringError>(
719 Args: "Cannot merge MemProf profile with Clang generated profile.",
720 Args: std::error_code()),
721 args&: Filename);
722 return;
723 }
724
725 auto MemProfError = [&](Error E) {
726 auto [ErrorCode, Msg] = InstrProfError::take(E: std::move(E));
727 WC->Errors.emplace_back(args: make_error<InstrProfError>(Args&: ErrorCode, Args&: Msg),
728 args&: Filename);
729 };
730
731 WC->Writer.addMemProfData(Incoming: Reader->takeMemProfData(), Warn: MemProfError);
732 return;
733 }
734
735 using ::llvm::memprof::YAMLMemProfReader;
736 if (YAMLMemProfReader::hasFormat(Path: Input.Filename)) {
737 auto ReaderOrErr = YAMLMemProfReader::create(Path: Input.Filename);
738 if (!ReaderOrErr)
739 exitWithError(E: ReaderOrErr.takeError(), Whence: Input.Filename);
740 std::unique_ptr<YAMLMemProfReader> Reader = std::move(ReaderOrErr.get());
741 // Check if the profile types can be merged, e.g. clang frontend profiles
742 // should not be merged with memprof profiles.
743 if (Error E = WC->Writer.mergeProfileKind(Other: Reader->getProfileKind())) {
744 consumeError(Err: std::move(E));
745 WC->Errors.emplace_back(
746 args: make_error<StringError>(
747 Args: "Cannot merge MemProf profile with incompatible profile.",
748 Args: std::error_code()),
749 args&: Filename);
750 return;
751 }
752
753 auto MemProfError = [&](Error E) {
754 auto [ErrorCode, Msg] = InstrProfError::take(E: std::move(E));
755 WC->Errors.emplace_back(args: make_error<InstrProfError>(Args&: ErrorCode, Args&: Msg),
756 args&: Filename);
757 };
758
759 auto MemProfData = Reader->takeMemProfData();
760
761 auto DataAccessProfData = Reader->takeDataAccessProfData();
762
763 // Check for the empty input in case the YAML file is invalid.
764 if (MemProfData.Records.empty() &&
765 (!DataAccessProfData || DataAccessProfData->empty())) {
766 WC->Errors.emplace_back(
767 args: make_error<StringError>(Args: "The profile is empty.", Args: std::error_code()),
768 args&: Filename);
769 }
770
771 WC->Writer.addMemProfData(Incoming: std::move(MemProfData), Warn: MemProfError);
772 WC->Writer.addDataAccessProfData(DataAccessProfile: std::move(DataAccessProfData));
773 return;
774 }
775
776 auto FS = vfs::getRealFileSystem();
777 // TODO: This only saves the first non-fatal error from InstrProfReader, and
778 // then added to WriterContext::Errors. However, this is not extensible, if
779 // we have more non-fatal errors from InstrProfReader in the future. How
780 // should this interact with different -failure-mode?
781 std::optional<std::pair<Error, std::string>> ReaderWarning;
782 llvm::scope_exit ReaderWarningScope([&] {
783 // If we hit a different error we may still have an error in ReaderWarning.
784 // Consume it now to avoid an assert
785 if (ReaderWarning)
786 consumeError(Err: std::move(ReaderWarning->first));
787 });
788 auto Warn = [&](Error E) {
789 if (ReaderWarning) {
790 consumeError(Err: std::move(E));
791 return;
792 }
793 // Only show the first time an error occurs in this file.
794 auto [ErrCode, Msg] = InstrProfError::take(E: std::move(E));
795 ReaderWarning = {make_error<InstrProfError>(Args&: ErrCode, Args&: Msg), Filename};
796 };
797
798 const ProfCorrelatorKind CorrelatorKind = BIDFetcherCorrelatorKind
799 ? *BIDFetcherCorrelatorKind
800 : ProfCorrelatorKind::NONE;
801 auto ReaderOrErr = InstrProfReader::create(Path: Input.Filename, FS&: *FS, Correlator,
802 BIDFetcher, BIDFetcherCorrelatorKind: CorrelatorKind, Warn);
803 if (Error E = ReaderOrErr.takeError()) {
804 // Skip the empty profiles by returning silently.
805 auto [ErrCode, Msg] = InstrProfError::take(E: std::move(E));
806 if (ErrCode != instrprof_error::empty_raw_profile)
807 WC->Errors.emplace_back(args: make_error<InstrProfError>(Args&: ErrCode, Args&: Msg),
808 args&: Filename);
809 return;
810 }
811
812 auto Reader = std::move(ReaderOrErr.get());
813 if (Error E = WC->Writer.mergeProfileKind(Other: Reader->getProfileKind())) {
814 consumeError(Err: std::move(E));
815 WC->Errors.emplace_back(
816 args: make_error<StringError>(
817 Args: "Merge IR generated profile with Clang generated profile.",
818 Args: std::error_code()),
819 args&: Filename);
820 return;
821 }
822
823 for (auto &I : *Reader) {
824 if (Remapper)
825 I.Name = (*Remapper)(I.Name);
826 const StringRef FuncName = I.Name;
827 bool Reported = false;
828 WC->Writer.addRecord(I: std::move(I), Weight: Input.Weight, Warn: [&](Error E) {
829 if (Reported) {
830 consumeError(Err: std::move(E));
831 return;
832 }
833 Reported = true;
834 // Only show hint the first time an error occurs.
835 auto [ErrCode, Msg] = InstrProfError::take(E: std::move(E));
836 std::unique_lock<std::mutex> ErrGuard{WC->ErrLock};
837 bool firstTime = WC->WriterErrorCodes.insert(V: ErrCode).second;
838 handleMergeWriterError(E: make_error<InstrProfError>(Args&: ErrCode, Args&: Msg),
839 WhenceFile: Input.Filename, WhenceFunction: FuncName, ShowHint: firstTime);
840 });
841 }
842
843 if (KeepVTableSymbols) {
844 const InstrProfSymtab &symtab = Reader->getSymtab();
845 const auto &VTableNames = symtab.getVTableNames();
846
847 for (const auto &kv : VTableNames)
848 WC->Writer.addVTableName(VTableName: kv.getKey());
849 }
850
851 if (Reader->hasTemporalProfile()) {
852 auto &Traces = Reader->getTemporalProfTraces(Weight: Input.Weight);
853 if (!Traces.empty())
854 WC->Writer.addTemporalProfileTraces(
855 SrcTraces&: Traces, SrcStreamSize: Reader->getTemporalProfTraceStreamSize());
856 }
857 if (Reader->hasError()) {
858 if (Error E = Reader->getError()) {
859 WC->Errors.emplace_back(args: std::move(E), args&: Filename);
860 return;
861 }
862 }
863
864 std::vector<llvm::object::BuildID> BinaryIds;
865 if (Error E = Reader->readBinaryIds(BinaryIds)) {
866 WC->Errors.emplace_back(args: std::move(E), args&: Filename);
867 return;
868 }
869 WC->Writer.addBinaryIds(BIs: BinaryIds);
870
871 if (ReaderWarning) {
872 WC->Errors.emplace_back(args: std::move(ReaderWarning->first),
873 args&: ReaderWarning->second);
874 }
875}
876
877/// Merge the \p Src writer context into \p Dst.
878static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
879 for (auto &ErrorPair : Src->Errors)
880 Dst->Errors.push_back(x: std::move(ErrorPair));
881 Src->Errors.clear();
882
883 if (Error E = Dst->Writer.mergeProfileKind(Other: Src->Writer.getProfileKind()))
884 exitWithError(E: std::move(E));
885
886 Dst->Writer.mergeRecordsFromWriter(IPW: std::move(Src->Writer), Warn: [&](Error E) {
887 auto [ErrorCode, Msg] = InstrProfError::take(E: std::move(E));
888 std::unique_lock<std::mutex> ErrGuard{Dst->ErrLock};
889 bool firstTime = Dst->WriterErrorCodes.insert(V: ErrorCode).second;
890 if (firstTime)
891 warn(Message: toString(E: make_error<InstrProfError>(Args&: ErrorCode, Args&: Msg)));
892 });
893}
894
895static StringRef
896getFuncName(const StringMap<InstrProfWriter::ProfilingData>::value_type &Val) {
897 return Val.first();
898}
899
900static std::string
901getFuncName(const SampleProfileMap::value_type &Val) {
902 return Val.second.getContext().toString();
903}
904
905template <typename T>
906static void filterFunctions(T &ProfileMap) {
907 bool hasFilter = !FuncNameFilter.empty();
908 bool hasNegativeFilter = !FuncNameNegativeFilter.empty();
909 if (!hasFilter && !hasNegativeFilter)
910 return;
911
912 // If filter starts with '?' it is MSVC mangled name, not a regex.
913 llvm::Regex ProbablyMSVCMangledName("[?@$_0-9A-Za-z]+");
914 if (hasFilter && FuncNameFilter[0] == '?' &&
915 ProbablyMSVCMangledName.match(String: FuncNameFilter))
916 FuncNameFilter = llvm::Regex::escape(String: FuncNameFilter);
917 if (hasNegativeFilter && FuncNameNegativeFilter[0] == '?' &&
918 ProbablyMSVCMangledName.match(String: FuncNameNegativeFilter))
919 FuncNameNegativeFilter = llvm::Regex::escape(String: FuncNameNegativeFilter);
920
921 size_t Count = ProfileMap.size();
922 llvm::Regex Pattern(FuncNameFilter);
923 llvm::Regex NegativePattern(FuncNameNegativeFilter);
924 std::string Error;
925 if (hasFilter && !Pattern.isValid(Error))
926 exitWithError(Message: Error);
927 if (hasNegativeFilter && !NegativePattern.isValid(Error))
928 exitWithError(Message: Error);
929
930 // Handle MD5 profile, so it is still able to match using the original name.
931 std::string MD5Name = std::to_string(val: llvm::MD5Hash(Str: FuncNameFilter));
932 std::string NegativeMD5Name =
933 std::to_string(val: llvm::MD5Hash(Str: FuncNameNegativeFilter));
934
935 for (auto I = ProfileMap.begin(); I != ProfileMap.end();) {
936 auto Tmp = I++;
937 const auto &FuncName = getFuncName(*Tmp);
938 // Negative filter has higher precedence than positive filter.
939 if ((hasNegativeFilter &&
940 (NegativePattern.match(String: FuncName) ||
941 (FunctionSamples::UseMD5 && NegativeMD5Name == FuncName))) ||
942 (hasFilter && !(Pattern.match(String: FuncName) ||
943 (FunctionSamples::UseMD5 && MD5Name == FuncName))))
944 ProfileMap.erase(Tmp);
945 }
946
947 llvm::dbgs() << Count - ProfileMap.size() << " of " << Count << " functions "
948 << "in the original profile are filtered.\n";
949}
950
951static void writeInstrProfile(StringRef OutputFilename,
952 ProfileFormat OutputFormat,
953 InstrProfWriter &Writer) {
954 std::error_code EC;
955 raw_fd_ostream Output(OutputFilename.data(), EC,
956 OutputFormat == PF_Text ? sys::fs::OF_TextWithCRLF
957 : sys::fs::OF_None);
958 if (EC)
959 exitWithErrorCode(EC, Whence: OutputFilename);
960
961 if (OutputFormat == PF_Text) {
962 if (Error E = Writer.writeText(OS&: Output))
963 warn(E: std::move(E));
964 } else {
965 if (Output.is_displayed())
966 exitWithError(Message: "cannot write a non-text format profile to the terminal");
967 if (Error E = Writer.write(OS&: Output))
968 warn(E: std::move(E));
969 }
970}
971
972static void mergeInstrProfile(const WeightedFileVector &Inputs,
973 SymbolRemapper *Remapper,
974 int MaxDbgCorrelationWarnings,
975 const StringRef ProfiledBinary) {
976 const uint64_t TraceReservoirSize = TemporalProfTraceReservoirSize.getValue();
977 const uint64_t MaxTraceLength = TemporalProfMaxTraceLength.getValue();
978 if (OutputFormat == PF_Compact_Binary)
979 exitWithError(Message: "Compact Binary is deprecated");
980 if (OutputFormat != PF_Binary && OutputFormat != PF_Ext_Binary &&
981 OutputFormat != PF_Text)
982 exitWithError(Message: "unknown format is specified");
983
984 // TODO: Maybe we should support correlation with mixture of different
985 // correlation modes(w/wo debug-info/object correlation).
986 if (DebugInfoFilename.empty()) {
987 if (!BinaryFilename.empty() && (DebugInfod || !DebugFileDirectory.empty()))
988 exitWithError(Message: "Expected only one of -binary-file, -debuginfod or "
989 "-debug-file-directory");
990 } else if (!BinaryFilename.empty() || DebugInfod ||
991 !DebugFileDirectory.empty()) {
992 exitWithError(Message: "Expected only one of -debug-info, -binary-file, -debuginfod "
993 "or -debug-file-directory");
994 }
995 std::string CorrelateFilename;
996 ProfCorrelatorKind CorrelateKind = ProfCorrelatorKind::NONE;
997 if (!DebugInfoFilename.empty()) {
998 CorrelateFilename = DebugInfoFilename;
999 CorrelateKind = ProfCorrelatorKind::DEBUG_INFO;
1000 } else if (!BinaryFilename.empty()) {
1001 CorrelateFilename = BinaryFilename;
1002 CorrelateKind = ProfCorrelatorKind::BINARY;
1003 }
1004
1005 std::unique_ptr<InstrProfCorrelator> Correlator;
1006 if (CorrelateKind != InstrProfCorrelator::NONE) {
1007 if (auto Err = InstrProfCorrelator::get(Filename: CorrelateFilename, FileKind: CorrelateKind)
1008 .moveInto(Value&: Correlator))
1009 exitWithError(E: std::move(Err), Whence: CorrelateFilename);
1010 if (auto Err = Correlator->correlateProfileData(MaxWarnings: MaxDbgCorrelationWarnings))
1011 exitWithError(E: std::move(Err), Whence: CorrelateFilename);
1012 }
1013
1014 ProfCorrelatorKind BIDFetcherCorrelateKind = ProfCorrelatorKind::NONE;
1015 std::unique_ptr<object::BuildIDFetcher> BIDFetcher;
1016 if (DebugInfod) {
1017 llvm::HTTPClient::initialize();
1018 BIDFetcher = std::make_unique<DebuginfodFetcher>(args&: DebugFileDirectory);
1019 if (!BIDFetcherProfileCorrelate)
1020 exitWithError(Message: "Expected --correlate when --debuginfod is provided");
1021 BIDFetcherCorrelateKind = BIDFetcherProfileCorrelate;
1022 } else if (!DebugFileDirectory.empty()) {
1023 BIDFetcher = std::make_unique<object::BuildIDFetcher>(args&: DebugFileDirectory);
1024 if (!BIDFetcherProfileCorrelate)
1025 exitWithError(Message: "Expected --correlate when --debug-file-directory "
1026 "is provided");
1027 BIDFetcherCorrelateKind = BIDFetcherProfileCorrelate;
1028 } else if (BIDFetcherProfileCorrelate) {
1029 exitWithError(Message: "Expected --debuginfod or --debug-file-directory when "
1030 "--correlate is provided");
1031 }
1032
1033 std::mutex ErrorLock;
1034 SmallSet<instrprof_error, 4> WriterErrorCodes;
1035
1036 // If NumThreads is not specified, auto-detect a good default.
1037 if (NumThreads == 0)
1038 NumThreads = std::min(a: hardware_concurrency().compute_thread_count(),
1039 b: unsigned((Inputs.size() + 1) / 2));
1040
1041 // Initialize the writer contexts.
1042 SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;
1043 for (unsigned I = 0; I < NumThreads; ++I)
1044 Contexts.emplace_back(Args: std::make_unique<WriterContext>(
1045 args&: OutputSparse, args&: ErrorLock, args&: WriterErrorCodes, args: TraceReservoirSize,
1046 args: MaxTraceLength));
1047
1048 if (NumThreads == 1) {
1049 for (const auto &Input : Inputs)
1050 loadInput(Input, Remapper, Correlator: Correlator.get(), ProfiledBinary,
1051 WC: Contexts[0].get(), BIDFetcher: BIDFetcher.get(), BIDFetcherCorrelatorKind: &BIDFetcherCorrelateKind);
1052 } else {
1053 DefaultThreadPool Pool(hardware_concurrency(ThreadCount: NumThreads));
1054
1055 // Load the inputs in parallel (N/NumThreads serial steps).
1056 unsigned Ctx = 0;
1057 for (const auto &Input : Inputs) {
1058 Pool.async(F&: loadInput, ArgList: Input, ArgList&: Remapper, ArgList: Correlator.get(), ArgList: ProfiledBinary,
1059 ArgList: Contexts[Ctx].get(), ArgList: BIDFetcher.get(),
1060 ArgList: &BIDFetcherCorrelateKind);
1061 Ctx = (Ctx + 1) % NumThreads;
1062 }
1063 Pool.wait();
1064
1065 // Merge the writer contexts together (~ lg(NumThreads) serial steps).
1066 unsigned Mid = Contexts.size() / 2;
1067 unsigned End = Contexts.size();
1068 assert(Mid > 0 && "Expected more than one context");
1069 do {
1070 for (unsigned I = 0; I < Mid; ++I)
1071 Pool.async(F&: mergeWriterContexts, ArgList: Contexts[I].get(),
1072 ArgList: Contexts[I + Mid].get());
1073 Pool.wait();
1074 if (End & 1) {
1075 Pool.async(F&: mergeWriterContexts, ArgList: Contexts[0].get(),
1076 ArgList: Contexts[End - 1].get());
1077 Pool.wait();
1078 }
1079 End = Mid;
1080 Mid /= 2;
1081 } while (Mid > 0);
1082 }
1083
1084 // Handle deferred errors encountered during merging. If the number of errors
1085 // is equal to the number of inputs the merge failed.
1086 unsigned NumErrors = 0;
1087 for (std::unique_ptr<WriterContext> &WC : Contexts) {
1088 for (auto &ErrorPair : WC->Errors) {
1089 ++NumErrors;
1090 warn(Message: toString(E: std::move(ErrorPair.first)), Whence: ErrorPair.second);
1091 }
1092 }
1093 if ((NumErrors == Inputs.size() && FailMode == failIfAllAreInvalid) ||
1094 (NumErrors > 0 && FailMode == failIfAnyAreInvalid))
1095 exitWithError(Message: "no profile can be merged");
1096
1097 filterFunctions(ProfileMap&: Contexts[0]->Writer.getProfileData());
1098
1099 writeInstrProfile(OutputFilename, OutputFormat, Writer&: Contexts[0]->Writer);
1100}
1101
1102/// The profile entry for a function in instrumentation profile.
1103struct InstrProfileEntry {
1104 uint64_t MaxCount = 0;
1105 uint64_t NumEdgeCounters = 0;
1106 float ZeroCounterRatio = 0.0;
1107 InstrProfRecord *ProfRecord;
1108 InstrProfileEntry(InstrProfRecord *Record);
1109 InstrProfileEntry() = default;
1110};
1111
1112InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) {
1113 ProfRecord = Record;
1114 uint64_t CntNum = Record->Counts.size();
1115 uint64_t ZeroCntNum = 0;
1116 for (size_t I = 0; I < CntNum; ++I) {
1117 MaxCount = std::max(a: MaxCount, b: Record->Counts[I]);
1118 ZeroCntNum += !Record->Counts[I];
1119 }
1120 ZeroCounterRatio = (float)ZeroCntNum / CntNum;
1121 NumEdgeCounters = CntNum;
1122}
1123
1124/// Either set all the counters in the instr profile entry \p IFE to
1125/// -1 / -2 /in order to drop the profile or scale up the
1126/// counters in \p IFP to be above hot / cold threshold. We use
1127/// the ratio of zero counters in the profile of a function to
1128/// decide the profile is helpful or harmful for performance,
1129/// and to choose whether to scale up or drop it.
1130static void updateInstrProfileEntry(InstrProfileEntry &IFE, bool SetToHot,
1131 uint64_t HotInstrThreshold,
1132 uint64_t ColdInstrThreshold,
1133 float ZeroCounterThreshold) {
1134 InstrProfRecord *ProfRecord = IFE.ProfRecord;
1135 if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) {
1136 // If all or most of the counters of the function are zero, the
1137 // profile is unaccountable and should be dropped. Reset all the
1138 // counters to be -1 / -2 and PGO profile-use will drop the profile.
1139 // All counters being -1 also implies that the function is hot so
1140 // PGO profile-use will also set the entry count metadata to be
1141 // above hot threshold.
1142 // All counters being -2 implies that the function is warm so
1143 // PGO profile-use will also set the entry count metadata to be
1144 // above cold threshold.
1145 auto Kind =
1146 (SetToHot ? InstrProfRecord::PseudoHot : InstrProfRecord::PseudoWarm);
1147 ProfRecord->setPseudoCount(Kind);
1148 return;
1149 }
1150
1151 // Scale up the MaxCount to be multiple times above hot / cold threshold.
1152 const unsigned MultiplyFactor = 3;
1153 uint64_t Threshold = (SetToHot ? HotInstrThreshold : ColdInstrThreshold);
1154 uint64_t Numerator = Threshold * MultiplyFactor;
1155
1156 // Make sure Threshold for warm counters is below the HotInstrThreshold.
1157 if (!SetToHot && Threshold >= HotInstrThreshold) {
1158 Threshold = (HotInstrThreshold + ColdInstrThreshold) / 2;
1159 }
1160
1161 uint64_t Denominator = IFE.MaxCount;
1162 if (Numerator <= Denominator)
1163 return;
1164 ProfRecord->scale(N: Numerator, D: Denominator, Warn: [&](instrprof_error E) {
1165 warn(Message: toString(E: make_error<InstrProfError>(Args&: E)));
1166 });
1167}
1168
1169const uint64_t ColdPercentileIdx = 15;
1170const uint64_t HotPercentileIdx = 11;
1171
1172using sampleprof::FSDiscriminatorPass;
1173
1174// Internal options to set FSDiscriminatorPass. Used in merge and show
1175// commands.
1176static cl::opt<FSDiscriminatorPass> FSDiscriminatorPassOption(
1177 "fs-discriminator-pass", cl::init(Val: PassLast), cl::Hidden,
1178 cl::desc("Zero out the discriminator bits for the FS discrimiantor "
1179 "pass beyond this value. The enum values are defined in "
1180 "Support/Discriminator.h"),
1181 cl::values(clEnumVal(Base, "Use base discriminators only"),
1182 clEnumVal(Pass1, "Use base and pass 1 discriminators"),
1183 clEnumVal(Pass2, "Use base and pass 1-2 discriminators"),
1184 clEnumVal(Pass3, "Use base and pass 1-3 discriminators"),
1185 clEnumVal(PassLast, "Use all discriminator bits (default)")));
1186
1187static unsigned getDiscriminatorMask() {
1188 return getN1Bits(N: getFSPassBitEnd(P: FSDiscriminatorPassOption.getValue()));
1189}
1190
1191/// Adjust the instr profile in \p WC based on the sample profile in
1192/// \p Reader.
1193static void
1194adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
1195 std::unique_ptr<sampleprof::SampleProfileReader> &Reader,
1196 unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
1197 unsigned InstrProfColdThreshold) {
1198 // Function to its entry in instr profile.
1199 StringMap<InstrProfileEntry> InstrProfileMap;
1200 StringMap<StringRef> StaticFuncMap;
1201 InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs);
1202
1203 auto checkSampleProfileHasFUnique = [&Reader]() {
1204 for (const auto &PD : Reader->getProfiles()) {
1205 auto &FContext = PD.second.getContext();
1206 if (FContext.toString().find(s: FunctionSamples::UniqSuffix) !=
1207 std::string::npos) {
1208 return true;
1209 }
1210 }
1211 return false;
1212 };
1213
1214 bool SampleProfileHasFUnique = checkSampleProfileHasFUnique();
1215
1216 auto buildStaticFuncMap = [&StaticFuncMap,
1217 SampleProfileHasFUnique](const StringRef Name) {
1218 std::string FilePrefixes[] = {".cpp", "cc", ".c", ".hpp", ".h"};
1219 size_t PrefixPos = StringRef::npos;
1220 for (auto &FilePrefix : FilePrefixes) {
1221 std::string NamePrefix = FilePrefix + GlobalIdentifierDelimiter;
1222 PrefixPos = Name.find_insensitive(Str: NamePrefix);
1223 if (PrefixPos == StringRef::npos)
1224 continue;
1225 PrefixPos += NamePrefix.size();
1226 break;
1227 }
1228
1229 if (PrefixPos == StringRef::npos) {
1230 return;
1231 }
1232
1233 StringRef NewName = Name.drop_front(N: PrefixPos);
1234 StringRef FName = Name.substr(Start: 0, N: PrefixPos - 1);
1235 if (NewName.size() == 0) {
1236 return;
1237 }
1238
1239 // This name should have a static linkage.
1240 size_t PostfixPos = NewName.find(Str: FunctionSamples::UniqSuffix);
1241 bool ProfileHasFUnique = (PostfixPos != StringRef::npos);
1242
1243 // If sample profile and instrumented profile do not agree on symbol
1244 // uniqification.
1245 if (SampleProfileHasFUnique != ProfileHasFUnique) {
1246 // If instrumented profile uses -funique-internal-linkage-symbols,
1247 // we need to trim the name.
1248 if (ProfileHasFUnique) {
1249 NewName = NewName.substr(Start: 0, N: PostfixPos);
1250 } else {
1251 // If sample profile uses -funique-internal-linkage-symbols,
1252 // we build the map.
1253 std::string NStr =
1254 NewName.str() + getUniqueInternalLinkagePostfix(FName);
1255 NewName = StringRef(NStr);
1256 StaticFuncMap[NewName] = Name;
1257 return;
1258 }
1259 }
1260
1261 auto [It, Inserted] = StaticFuncMap.try_emplace(Key: NewName, Args: Name);
1262 if (!Inserted)
1263 It->second = DuplicateNameStr;
1264 };
1265
1266 // We need to flatten the SampleFDO profile as the InstrFDO
1267 // profile does not have inlined callsite profiles.
1268 // One caveat is the pre-inlined function -- their samples
1269 // should be collapsed into the caller function.
1270 // Here we do a DFS traversal to get the flatten profile
1271 // info: the sum of entrycount and the max of maxcount.
1272 // Here is the algorithm:
1273 // recursive (FS, root_name) {
1274 // name = FS->getName();
1275 // get samples for FS;
1276 // if (InstrProf.find(name) {
1277 // root_name = name;
1278 // } else {
1279 // if (name is in static_func map) {
1280 // root_name = static_name;
1281 // }
1282 // }
1283 // update the Map entry for root_name;
1284 // for (subfs: FS) {
1285 // recursive(subfs, root_name);
1286 // }
1287 // }
1288 //
1289 // Here is an example.
1290 //
1291 // SampleProfile:
1292 // foo:12345:1000
1293 // 1: 1000
1294 // 2.1: 1000
1295 // 15: 5000
1296 // 4: bar:1000
1297 // 1: 1000
1298 // 2: goo:3000
1299 // 1: 3000
1300 // 8: bar:40000
1301 // 1: 10000
1302 // 2: goo:30000
1303 // 1: 30000
1304 //
1305 // InstrProfile has two entries:
1306 // foo
1307 // bar.cc;bar
1308 //
1309 // After BuildMaxSampleMap, we should have the following in FlattenSampleMap:
1310 // {"foo", {1000, 5000}}
1311 // {"bar.cc;bar", {11000, 30000}}
1312 //
1313 // foo's has an entry count of 1000, and max body count of 5000.
1314 // bar.cc;bar has an entry count of 11000 (sum two callsites of 1000 and
1315 // 10000), and max count of 30000 (from the callsite in line 8).
1316 //
1317 // Note that goo's count will remain in bar.cc;bar() as it does not have an
1318 // entry in InstrProfile.
1319 llvm::StringMap<std::pair<uint64_t, uint64_t>> FlattenSampleMap;
1320 auto BuildMaxSampleMap = [&FlattenSampleMap, &StaticFuncMap,
1321 &InstrProfileMap](const FunctionSamples &FS,
1322 const StringRef &RootName) {
1323 auto BuildMaxSampleMapImpl = [&](const FunctionSamples &FS,
1324 const StringRef &RootName,
1325 auto &BuildImpl) -> void {
1326 std::string NameStr = FS.getFunction().str();
1327 const StringRef Name = NameStr;
1328 const StringRef *NewRootName = &RootName;
1329 uint64_t EntrySample = FS.getHeadSamplesEstimate();
1330 uint64_t MaxBodySample = FS.getMaxCountInside(/* SkipCallSite*/ true);
1331
1332 auto It = InstrProfileMap.find(Key: Name);
1333 if (It != InstrProfileMap.end()) {
1334 NewRootName = &Name;
1335 } else {
1336 auto NewName = StaticFuncMap.find(Key: Name);
1337 if (NewName != StaticFuncMap.end()) {
1338 It = InstrProfileMap.find(Key: NewName->second);
1339 if (NewName->second != DuplicateNameStr) {
1340 NewRootName = &NewName->second;
1341 }
1342 } else {
1343 // Here the EntrySample is of an inlined function, so we should not
1344 // update the EntrySample in the map.
1345 EntrySample = 0;
1346 }
1347 }
1348 EntrySample += FlattenSampleMap[*NewRootName].first;
1349 MaxBodySample =
1350 std::max(a: FlattenSampleMap[*NewRootName].second, b: MaxBodySample);
1351 FlattenSampleMap[*NewRootName] =
1352 std::make_pair(x&: EntrySample, y&: MaxBodySample);
1353
1354 for (const auto &C : FS.getCallsiteSamples())
1355 for (const auto &F : C.second)
1356 BuildImpl(F.second, *NewRootName, BuildImpl);
1357 };
1358 BuildMaxSampleMapImpl(FS, RootName, BuildMaxSampleMapImpl);
1359 };
1360
1361 for (auto &PD : WC->Writer.getProfileData()) {
1362 // Populate IPBuilder.
1363 for (const auto &PDV : PD.getValue()) {
1364 InstrProfRecord Record = PDV.second;
1365 IPBuilder.addRecord(Record);
1366 }
1367
1368 // If a function has multiple entries in instr profile, skip it.
1369 if (PD.getValue().size() != 1)
1370 continue;
1371
1372 // Initialize InstrProfileMap.
1373 InstrProfRecord *R = &PD.getValue().begin()->second;
1374 StringRef FullName = PD.getKey();
1375 InstrProfileMap[FullName] = InstrProfileEntry(R);
1376 buildStaticFuncMap(FullName);
1377 }
1378
1379 for (auto &PD : Reader->getProfiles()) {
1380 sampleprof::FunctionSamples &FS = PD.second;
1381 std::string Name = FS.getFunction().str();
1382 BuildMaxSampleMap(FS, Name);
1383 }
1384
1385 ProfileSummary InstrPS = *IPBuilder.getSummary();
1386 ProfileSummary SamplePS = Reader->getSummary();
1387
1388 // Compute cold thresholds for instr profile and sample profile.
1389 uint64_t HotSampleThreshold =
1390 ProfileSummaryBuilder::getEntryForPercentile(
1391 DS: SamplePS.getDetailedSummary(),
1392 Percentile: ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
1393 .MinCount;
1394 uint64_t ColdSampleThreshold =
1395 ProfileSummaryBuilder::getEntryForPercentile(
1396 DS: SamplePS.getDetailedSummary(),
1397 Percentile: ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
1398 .MinCount;
1399 uint64_t HotInstrThreshold =
1400 ProfileSummaryBuilder::getEntryForPercentile(
1401 DS: InstrPS.getDetailedSummary(),
1402 Percentile: ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
1403 .MinCount;
1404 uint64_t ColdInstrThreshold =
1405 InstrProfColdThreshold
1406 ? InstrProfColdThreshold
1407 : ProfileSummaryBuilder::getEntryForPercentile(
1408 DS: InstrPS.getDetailedSummary(),
1409 Percentile: ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
1410 .MinCount;
1411
1412 // Find hot/warm functions in sample profile which is cold in instr profile
1413 // and adjust the profiles of those functions in the instr profile.
1414 for (const auto &E : FlattenSampleMap) {
1415 uint64_t SampleMaxCount = std::max(a: E.second.first, b: E.second.second);
1416 if (SampleMaxCount < ColdSampleThreshold)
1417 continue;
1418 StringRef Name = E.first();
1419 auto It = InstrProfileMap.find(Key: Name);
1420 if (It == InstrProfileMap.end()) {
1421 auto NewName = StaticFuncMap.find(Key: Name);
1422 if (NewName != StaticFuncMap.end()) {
1423 It = InstrProfileMap.find(Key: NewName->second);
1424 if (NewName->second == DuplicateNameStr) {
1425 WithColor::warning()
1426 << "Static function " << Name
1427 << " has multiple promoted names, cannot adjust profile.\n";
1428 }
1429 }
1430 }
1431 if (It == InstrProfileMap.end() ||
1432 It->second.MaxCount > ColdInstrThreshold ||
1433 It->second.NumEdgeCounters < SupplMinSizeThreshold)
1434 continue;
1435 bool SetToHot = SampleMaxCount >= HotSampleThreshold;
1436 updateInstrProfileEntry(IFE&: It->second, SetToHot, HotInstrThreshold,
1437 ColdInstrThreshold, ZeroCounterThreshold);
1438 }
1439}
1440
1441/// The main function to supplement instr profile with sample profile.
1442/// \Inputs contains the instr profile. \p SampleFilename specifies the
1443/// sample profile. \p OutputFilename specifies the output profile name.
1444/// \p OutputFormat specifies the output profile format. \p OutputSparse
1445/// specifies whether to generate sparse profile. \p SupplMinSizeThreshold
1446/// specifies the minimal size for the functions whose profile will be
1447/// adjusted. \p ZeroCounterThreshold is the threshold to check whether
1448/// a function contains too many zero counters and whether its profile
1449/// should be dropped. \p InstrProfColdThreshold is the user specified
1450/// cold threshold which will override the cold threshold got from the
1451/// instr profile summary.
1452static void supplementInstrProfile(const WeightedFileVector &Inputs,
1453 StringRef SampleFilename, bool OutputSparse,
1454 unsigned SupplMinSizeThreshold,
1455 float ZeroCounterThreshold,
1456 unsigned InstrProfColdThreshold) {
1457 if (OutputFilename == "-")
1458 exitWithError(Message: "cannot write indexed profdata format to stdout");
1459 if (Inputs.size() != 1)
1460 exitWithError(Message: "expect one input to be an instr profile");
1461 if (Inputs[0].Weight != 1)
1462 exitWithError(Message: "expect instr profile doesn't have weight");
1463
1464 StringRef InstrFilename = Inputs[0].Filename;
1465
1466 // Read sample profile.
1467 LLVMContext Context;
1468 auto FS = vfs::getRealFileSystem();
1469 auto ReaderOrErr = sampleprof::SampleProfileReader::create(
1470 Filename: SampleFilename.str(), C&: Context, FS&: *FS, P: FSDiscriminatorPassOption);
1471 if (std::error_code EC = ReaderOrErr.getError())
1472 exitWithErrorCode(EC, Whence: SampleFilename);
1473 auto Reader = std::move(ReaderOrErr.get());
1474 if (std::error_code EC = Reader->read())
1475 exitWithErrorCode(EC, Whence: SampleFilename);
1476
1477 // Read instr profile.
1478 std::mutex ErrorLock;
1479 SmallSet<instrprof_error, 4> WriterErrorCodes;
1480 auto WC = std::make_unique<WriterContext>(args&: OutputSparse, args&: ErrorLock,
1481 args&: WriterErrorCodes);
1482 loadInput(Input: Inputs[0], Remapper: nullptr, Correlator: nullptr, /*ProfiledBinary=*/"", WC: WC.get());
1483 if (WC->Errors.size() > 0)
1484 exitWithError(E: std::move(WC->Errors[0].first), Whence: InstrFilename);
1485
1486 adjustInstrProfile(WC, Reader, SupplMinSizeThreshold, ZeroCounterThreshold,
1487 InstrProfColdThreshold);
1488 writeInstrProfile(OutputFilename, OutputFormat, Writer&: WC->Writer);
1489}
1490
1491/// Make a copy of the given function samples with all symbol names remapped
1492/// by the provided symbol remapper.
1493static sampleprof::FunctionSamples
1494remapSamples(const sampleprof::FunctionSamples &Samples,
1495 SymbolRemapper &Remapper, sampleprof_error &Error) {
1496 sampleprof::FunctionSamples Result;
1497 Result.setFunction(Remapper(Samples.getFunction()));
1498 Result.addTotalSamples(Num: Samples.getTotalSamples());
1499 Result.addHeadSamples(Num: Samples.getHeadSamples());
1500 for (const auto &BodySample : Samples.getBodySamples()) {
1501 uint32_t MaskedDiscriminator =
1502 BodySample.first.Discriminator & getDiscriminatorMask();
1503 Result.addBodySamples(LineOffset: BodySample.first.LineOffset, Discriminator: MaskedDiscriminator,
1504 Num: BodySample.second.getSamples());
1505 for (const auto &Target : BodySample.second.getCallTargets()) {
1506 Result.addCalledTargetSamples(LineOffset: BodySample.first.LineOffset,
1507 Discriminator: MaskedDiscriminator,
1508 Func: Remapper(Target.first), Num: Target.second);
1509 }
1510 }
1511 for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
1512 sampleprof::FunctionSamplesMap &Target =
1513 Result.functionSamplesAt(Loc: CallsiteSamples.first);
1514 for (const auto &Callsite : CallsiteSamples.second) {
1515 sampleprof::FunctionSamples Remapped =
1516 remapSamples(Samples: Callsite.second, Remapper, Error);
1517 mergeSampleProfErrors(Accumulator&: Error,
1518 Result: Target[Remapped.getFunction()].merge(Other: Remapped));
1519 }
1520 }
1521 return Result;
1522}
1523
1524static sampleprof::SampleProfileFormat FormatMap[] = {
1525 sampleprof::SPF_None,
1526 sampleprof::SPF_Text,
1527 sampleprof::SPF_None,
1528 sampleprof::SPF_Ext_Binary,
1529 sampleprof::SPF_GCC,
1530 sampleprof::SPF_Binary};
1531
1532static std::unique_ptr<MemoryBuffer>
1533getInputFileBuf(const StringRef &InputFile) {
1534 if (InputFile == "")
1535 return {};
1536
1537 auto BufOrError = MemoryBuffer::getFileOrSTDIN(Filename: InputFile);
1538 if (!BufOrError)
1539 exitWithErrorCode(EC: BufOrError.getError(), Whence: InputFile);
1540
1541 return std::move(*BufOrError);
1542}
1543
1544static void populateProfileSymbolList(MemoryBuffer *Buffer,
1545 sampleprof::ProfileSymbolList &PSL) {
1546 if (!Buffer)
1547 return;
1548
1549 SmallVector<StringRef, 32> SymbolVec;
1550 StringRef Data = Buffer->getBuffer();
1551 Data.split(A&: SymbolVec, Separator: '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1552
1553 for (StringRef SymbolStr : SymbolVec)
1554 PSL.add(Name: SymbolStr.trim());
1555}
1556
1557static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer,
1558 ProfileFormat OutputFormat,
1559 MemoryBuffer *Buffer,
1560 sampleprof::ProfileSymbolList &WriterList,
1561 bool CompressAllSections, bool UseMD5,
1562 bool GenPartialProfile) {
1563 if (SplitLayout) {
1564 if (OutputFormat == PF_Binary)
1565 warn(Message: "-split-layout is ignored. Specify -extbinary to enable it");
1566 else
1567 Writer.setUseCtxSplitLayout();
1568 }
1569
1570 populateProfileSymbolList(Buffer, PSL&: WriterList);
1571 if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary)
1572 warn(Message: "Profile Symbol list is not empty but the output format is not "
1573 "ExtBinary format. The list will be lost in the output. ");
1574
1575 Writer.setProfileSymbolList(&WriterList);
1576
1577 if (CompressAllSections) {
1578 if (OutputFormat != PF_Ext_Binary)
1579 warn(Message: "-compress-all-section is ignored. Specify -extbinary to enable it");
1580 else
1581 Writer.setToCompressAllSections();
1582 }
1583 if (UseMD5) {
1584 if (OutputFormat != PF_Ext_Binary)
1585 warn(Message: "-use-md5 is ignored. Specify -extbinary to enable it");
1586 else
1587 Writer.setUseMD5();
1588 }
1589 if (GenPartialProfile) {
1590 if (OutputFormat != PF_Ext_Binary)
1591 warn(Message: "-gen-partial-profile is ignored. Specify -extbinary to enable it");
1592 else
1593 Writer.setPartialProfile();
1594 }
1595}
1596
1597static void mergeSampleProfile(const WeightedFileVector &Inputs,
1598 SymbolRemapper *Remapper,
1599 StringRef ProfileSymbolListFile,
1600 size_t OutputSizeLimit) {
1601 using namespace sampleprof;
1602 SampleProfileMap ProfileMap;
1603 SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
1604 LLVMContext Context;
1605 sampleprof::ProfileSymbolList WriterList;
1606 std::optional<bool> ProfileIsProbeBased;
1607 std::optional<bool> ProfileIsCS;
1608 for (const auto &Input : Inputs) {
1609 auto FS = vfs::getRealFileSystem();
1610 auto ReaderOrErr = SampleProfileReader::create(Filename: Input.Filename, C&: Context, FS&: *FS,
1611 P: FSDiscriminatorPassOption);
1612 if (std::error_code EC = ReaderOrErr.getError()) {
1613 warnOrExitGivenError(FailMode, EC, Whence: Input.Filename);
1614 continue;
1615 }
1616
1617 // We need to keep the readers around until after all the files are
1618 // read so that we do not lose the function names stored in each
1619 // reader's memory. The function names are needed to write out the
1620 // merged profile map.
1621 Readers.push_back(Elt: std::move(ReaderOrErr.get()));
1622 const auto Reader = Readers.back().get();
1623 if (std::error_code EC = Reader->read()) {
1624 warnOrExitGivenError(FailMode, EC, Whence: Input.Filename);
1625 Readers.pop_back();
1626 continue;
1627 }
1628
1629 SampleProfileMap &Profiles = Reader->getProfiles();
1630 if (ProfileIsProbeBased &&
1631 ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased)
1632 exitWithError(
1633 Message: "cannot merge probe-based profile with non-probe-based profile");
1634 ProfileIsProbeBased = FunctionSamples::ProfileIsProbeBased;
1635 if (ProfileIsCS && ProfileIsCS != FunctionSamples::ProfileIsCS)
1636 exitWithError(Message: "cannot merge CS profile with non-CS profile");
1637 ProfileIsCS = FunctionSamples::ProfileIsCS;
1638 for (SampleProfileMap::iterator I = Profiles.begin(), E = Profiles.end();
1639 I != E; ++I) {
1640 sampleprof_error Result = sampleprof_error::success;
1641 FunctionSamples Remapped =
1642 Remapper ? remapSamples(Samples: I->second, Remapper&: *Remapper, Error&: Result)
1643 : FunctionSamples();
1644 FunctionSamples &Samples = Remapper ? Remapped : I->second;
1645 SampleContext FContext = Samples.getContext();
1646 mergeSampleProfErrors(Accumulator&: Result,
1647 Result: ProfileMap[FContext].merge(Other: Samples, Weight: Input.Weight));
1648 if (Result != sampleprof_error::success) {
1649 std::error_code EC = make_error_code(E: Result);
1650 handleMergeWriterError(E: errorCodeToError(EC), WhenceFile: Input.Filename,
1651 WhenceFunction: FContext.toString());
1652 }
1653 }
1654
1655 if (!DropProfileSymbolList) {
1656 std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
1657 Reader->getProfileSymbolList();
1658 if (ReaderList)
1659 WriterList.merge(List: *ReaderList);
1660 }
1661 }
1662
1663 if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) {
1664 // Use threshold calculated from profile summary unless specified.
1665 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1666 auto Summary = Builder.computeSummaryForProfiles(Profiles: ProfileMap);
1667 uint64_t SampleProfColdThreshold =
1668 ProfileSummaryBuilder::getColdCountThreshold(
1669 DS: (Summary->getDetailedSummary()));
1670
1671 // Trim and merge cold context profile using cold threshold above;
1672 SampleContextTrimmer(ProfileMap)
1673 .trimAndMergeColdContextProfiles(
1674 ColdCountThreshold: SampleProfColdThreshold, TrimColdContext: SampleTrimColdContext,
1675 MergeColdContext: SampleMergeColdContext, ColdContextFrameLength: SampleColdContextFrameDepth, TrimBaseProfileOnly: false);
1676 }
1677
1678 if (ProfileLayout == llvm::sampleprof::SPL_Flat) {
1679 ProfileConverter::flattenProfile(ProfileMap, ProfileIsCS: FunctionSamples::ProfileIsCS);
1680 ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1681 } else if (ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Nest) {
1682 ProfileConverter CSConverter(ProfileMap);
1683 CSConverter.convertCSProfiles();
1684 ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1685 }
1686
1687 filterFunctions(ProfileMap);
1688
1689 auto WriterOrErr =
1690 SampleProfileWriter::create(Filename: OutputFilename, Format: FormatMap[OutputFormat]);
1691 if (std::error_code EC = WriterOrErr.getError())
1692 exitWithErrorCode(EC, Whence: OutputFilename);
1693
1694 auto Writer = std::move(WriterOrErr.get());
1695 // WriterList will have StringRef refering to string in Buffer.
1696 // Make sure Buffer lives as long as WriterList.
1697 auto Buffer = getInputFileBuf(InputFile: ProfileSymbolListFile);
1698 handleExtBinaryWriter(Writer&: *Writer, OutputFormat, Buffer: Buffer.get(), WriterList,
1699 CompressAllSections, UseMD5, GenPartialProfile);
1700
1701 // If OutputSizeLimit is 0 (default), it is the same as write().
1702 if (std::error_code EC =
1703 Writer->writeWithSizeLimit(ProfileMap, OutputSizeLimit))
1704 exitWithErrorCode(EC);
1705}
1706
1707static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
1708 StringRef WeightStr, FileName;
1709 std::tie(args&: WeightStr, args&: FileName) = WeightedFilename.split(Separator: ',');
1710
1711 uint64_t Weight;
1712 if (WeightStr.getAsInteger(Radix: 10, Result&: Weight) || Weight < 1)
1713 exitWithError(Message: "input weight must be a positive integer");
1714
1715 llvm::SmallString<128> ResolvedFileName;
1716 llvm::sys::fs::expand_tilde(path: FileName, output&: ResolvedFileName);
1717
1718 return {.Filename: std::string(ResolvedFileName), .Weight: Weight};
1719}
1720
1721static void addWeightedInput(WeightedFileVector &WNI, const WeightedFile &WF) {
1722 StringRef Filename = WF.Filename;
1723 uint64_t Weight = WF.Weight;
1724
1725 // If it's STDIN just pass it on.
1726 if (Filename == "-") {
1727 WNI.push_back(Elt: {.Filename: std::string(Filename), .Weight: Weight});
1728 return;
1729 }
1730
1731 llvm::sys::fs::file_status Status;
1732 llvm::sys::fs::status(path: Filename, result&: Status);
1733 if (!llvm::sys::fs::exists(status: Status))
1734 exitWithErrorCode(EC: make_error_code(E: errc::no_such_file_or_directory),
1735 Whence: Filename);
1736 // If it's a source file, collect it.
1737 if (llvm::sys::fs::is_regular_file(status: Status)) {
1738 WNI.push_back(Elt: {.Filename: std::string(Filename), .Weight: Weight});
1739 return;
1740 }
1741
1742 if (llvm::sys::fs::is_directory(status: Status)) {
1743 std::error_code EC;
1744 for (llvm::sys::fs::recursive_directory_iterator F(Filename, EC), E;
1745 F != E && !EC; F.increment(ec&: EC)) {
1746 if (llvm::sys::fs::is_regular_file(Path: F->path())) {
1747 addWeightedInput(WNI, WF: {.Filename: F->path(), .Weight: Weight});
1748 }
1749 }
1750 if (EC)
1751 exitWithErrorCode(EC, Whence: Filename);
1752 }
1753}
1754
1755static void parseInputFilenamesFile(MemoryBuffer *Buffer,
1756 WeightedFileVector &WFV) {
1757 if (!Buffer)
1758 return;
1759
1760 SmallVector<StringRef, 8> Entries;
1761 StringRef Data = Buffer->getBuffer();
1762 Data.split(A&: Entries, Separator: '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1763 for (const StringRef &FileWeightEntry : Entries) {
1764 StringRef SanitizedEntry = FileWeightEntry.trim(Chars: " \t\v\f\r");
1765 // Skip comments.
1766 if (SanitizedEntry.starts_with(Prefix: "#"))
1767 continue;
1768 // If there's no comma, it's an unweighted profile.
1769 else if (!SanitizedEntry.contains(C: ','))
1770 addWeightedInput(WNI&: WFV, WF: {.Filename: std::string(SanitizedEntry), .Weight: 1});
1771 else
1772 addWeightedInput(WNI&: WFV, WF: parseWeightedFile(WeightedFilename: SanitizedEntry));
1773 }
1774}
1775
1776static int merge_main(StringRef ProgName) {
1777 WeightedFileVector WeightedInputs;
1778 for (StringRef Filename : InputFilenames)
1779 addWeightedInput(WNI&: WeightedInputs, WF: {.Filename: std::string(Filename), .Weight: 1});
1780 for (StringRef WeightedFilename : WeightedInputFilenames)
1781 addWeightedInput(WNI&: WeightedInputs, WF: parseWeightedFile(WeightedFilename));
1782
1783 // Make sure that the file buffer stays alive for the duration of the
1784 // weighted input vector's lifetime.
1785 auto Buffer = getInputFileBuf(InputFile: InputFilenamesFile);
1786 parseInputFilenamesFile(Buffer: Buffer.get(), WFV&: WeightedInputs);
1787
1788 if (WeightedInputs.empty())
1789 exitWithError(Message: "no input files specified. See " + ProgName + " merge -help");
1790
1791 if (DumpInputFileList) {
1792 for (auto &WF : WeightedInputs)
1793 outs() << WF.Weight << "," << WF.Filename << "\n";
1794 return 0;
1795 }
1796
1797 std::unique_ptr<SymbolRemapper> Remapper;
1798 if (!RemappingFile.empty())
1799 Remapper = SymbolRemapper::create(InputFile: RemappingFile);
1800
1801 if (!SupplInstrWithSample.empty()) {
1802 if (ProfileKind != instr)
1803 exitWithError(
1804 Message: "-supplement-instr-with-sample can only work with -instr. ");
1805
1806 supplementInstrProfile(Inputs: WeightedInputs, SampleFilename: SupplInstrWithSample, OutputSparse,
1807 SupplMinSizeThreshold, ZeroCounterThreshold,
1808 InstrProfColdThreshold);
1809 return 0;
1810 }
1811
1812 if (ProfileKind == instr)
1813 mergeInstrProfile(Inputs: WeightedInputs, Remapper: Remapper.get(), MaxDbgCorrelationWarnings,
1814 ProfiledBinary);
1815 else
1816 mergeSampleProfile(Inputs: WeightedInputs, Remapper: Remapper.get(), ProfileSymbolListFile,
1817 OutputSizeLimit);
1818 return 0;
1819}
1820
1821/// Computer the overlap b/w profile BaseFilename and profile TestFilename.
1822static void overlapInstrProfile(const std::string &BaseFilename,
1823 const std::string &TestFilename,
1824 const OverlapFuncFilters &FuncFilter,
1825 raw_fd_ostream &OS, bool IsCS) {
1826 std::mutex ErrorLock;
1827 SmallSet<instrprof_error, 4> WriterErrorCodes;
1828 WriterContext Context(false, ErrorLock, WriterErrorCodes);
1829 WeightedFile WeightedInput{.Filename: BaseFilename, .Weight: 1};
1830 OverlapStats Overlap;
1831 Error E = Overlap.accumulateCounts(BaseFilename, TestFilename, IsCS);
1832 if (E)
1833 exitWithError(E: std::move(E), Whence: "error in getting profile count sums");
1834 if (Overlap.Base.CountSum < 1.0f) {
1835 OS << "Sum of edge counts for profile " << BaseFilename << " is 0.\n";
1836 exit(status: 0);
1837 }
1838 if (Overlap.Test.CountSum < 1.0f) {
1839 OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n";
1840 exit(status: 0);
1841 }
1842 loadInput(Input: WeightedInput, Remapper: nullptr, Correlator: nullptr, /*ProfiledBinary=*/"", WC: &Context);
1843 overlapInput(BaseFilename, TestFilename, WC: &Context, Overlap, FuncFilter, OS,
1844 IsCS);
1845 Overlap.dump(OS);
1846}
1847
1848namespace {
1849struct SampleOverlapStats {
1850 SampleContext BaseName;
1851 SampleContext TestName;
1852 // Number of overlap units
1853 uint64_t OverlapCount = 0;
1854 // Total samples of overlap units
1855 uint64_t OverlapSample = 0;
1856 // Number of and total samples of units that only present in base or test
1857 // profile
1858 uint64_t BaseUniqueCount = 0;
1859 uint64_t BaseUniqueSample = 0;
1860 uint64_t TestUniqueCount = 0;
1861 uint64_t TestUniqueSample = 0;
1862 // Number of units and total samples in base or test profile
1863 uint64_t BaseCount = 0;
1864 uint64_t BaseSample = 0;
1865 uint64_t TestCount = 0;
1866 uint64_t TestSample = 0;
1867 // Number of and total samples of units that present in at least one profile
1868 uint64_t UnionCount = 0;
1869 uint64_t UnionSample = 0;
1870 // Weighted similarity
1871 double Similarity = 0.0;
1872 // For SampleOverlapStats instances representing functions, weights of the
1873 // function in base and test profiles
1874 double BaseWeight = 0.0;
1875 double TestWeight = 0.0;
1876
1877 SampleOverlapStats() = default;
1878};
1879} // end anonymous namespace
1880
1881namespace {
1882struct FuncSampleStats {
1883 uint64_t SampleSum = 0;
1884 uint64_t MaxSample = 0;
1885 uint64_t HotBlockCount = 0;
1886 FuncSampleStats() = default;
1887 FuncSampleStats(uint64_t SampleSum, uint64_t MaxSample,
1888 uint64_t HotBlockCount)
1889 : SampleSum(SampleSum), MaxSample(MaxSample),
1890 HotBlockCount(HotBlockCount) {}
1891};
1892} // end anonymous namespace
1893
1894namespace {
1895enum MatchStatus { MS_Match, MS_FirstUnique, MS_SecondUnique, MS_None };
1896
1897// Class for updating merging steps for two sorted maps. The class should be
1898// instantiated with a map iterator type.
1899template <class T> class MatchStep {
1900public:
1901 MatchStep() = delete;
1902
1903 MatchStep(T FirstIter, T FirstEnd, T SecondIter, T SecondEnd)
1904 : FirstIter(FirstIter), FirstEnd(FirstEnd), SecondIter(SecondIter),
1905 SecondEnd(SecondEnd), Status(MS_None) {}
1906
1907 bool areBothFinished() const {
1908 return (FirstIter == FirstEnd && SecondIter == SecondEnd);
1909 }
1910
1911 bool isFirstFinished() const { return FirstIter == FirstEnd; }
1912
1913 bool isSecondFinished() const { return SecondIter == SecondEnd; }
1914
1915 /// Advance one step based on the previous match status unless the previous
1916 /// status is MS_None. Then update Status based on the comparison between two
1917 /// container iterators at the current step. If the previous status is
1918 /// MS_None, it means two iterators are at the beginning and no comparison has
1919 /// been made, so we simply update Status without advancing the iterators.
1920 void updateOneStep();
1921
1922 T getFirstIter() const { return FirstIter; }
1923
1924 T getSecondIter() const { return SecondIter; }
1925
1926 MatchStatus getMatchStatus() const { return Status; }
1927
1928private:
1929 // Current iterator and end iterator of the first container.
1930 T FirstIter;
1931 T FirstEnd;
1932 // Current iterator and end iterator of the second container.
1933 T SecondIter;
1934 T SecondEnd;
1935 // Match status of the current step.
1936 MatchStatus Status;
1937};
1938} // end anonymous namespace
1939
1940template <class T> void MatchStep<T>::updateOneStep() {
1941 switch (Status) {
1942 case MS_Match:
1943 ++FirstIter;
1944 ++SecondIter;
1945 break;
1946 case MS_FirstUnique:
1947 ++FirstIter;
1948 break;
1949 case MS_SecondUnique:
1950 ++SecondIter;
1951 break;
1952 case MS_None:
1953 break;
1954 }
1955
1956 // Update Status according to iterators at the current step.
1957 if (areBothFinished())
1958 return;
1959 if (FirstIter != FirstEnd &&
1960 (SecondIter == SecondEnd || FirstIter->first < SecondIter->first))
1961 Status = MS_FirstUnique;
1962 else if (SecondIter != SecondEnd &&
1963 (FirstIter == FirstEnd || SecondIter->first < FirstIter->first))
1964 Status = MS_SecondUnique;
1965 else
1966 Status = MS_Match;
1967}
1968
1969// Return the sum of line/block samples, the max line/block sample, and the
1970// number of line/block samples above the given threshold in a function
1971// including its inlinees.
1972static void getFuncSampleStats(const sampleprof::FunctionSamples &Func,
1973 FuncSampleStats &FuncStats,
1974 uint64_t HotThreshold) {
1975 for (const auto &L : Func.getBodySamples()) {
1976 uint64_t Sample = L.second.getSamples();
1977 FuncStats.SampleSum += Sample;
1978 FuncStats.MaxSample = std::max(a: FuncStats.MaxSample, b: Sample);
1979 if (Sample >= HotThreshold)
1980 ++FuncStats.HotBlockCount;
1981 }
1982
1983 for (const auto &C : Func.getCallsiteSamples()) {
1984 for (const auto &F : C.second)
1985 getFuncSampleStats(Func: F.second, FuncStats, HotThreshold);
1986 }
1987}
1988
1989/// Predicate that determines if a function is hot with a given threshold. We
1990/// keep it separate from its callsites for possible extension in the future.
1991static bool isFunctionHot(const FuncSampleStats &FuncStats,
1992 uint64_t HotThreshold) {
1993 // We intentionally compare the maximum sample count in a function with the
1994 // HotThreshold to get an approximate determination on hot functions.
1995 return (FuncStats.MaxSample >= HotThreshold);
1996}
1997
1998namespace {
1999class SampleOverlapAggregator {
2000public:
2001 SampleOverlapAggregator(const std::string &BaseFilename,
2002 const std::string &TestFilename,
2003 double LowSimilarityThreshold, double Epsilon,
2004 const OverlapFuncFilters &FuncFilter)
2005 : BaseFilename(BaseFilename), TestFilename(TestFilename),
2006 LowSimilarityThreshold(LowSimilarityThreshold), Epsilon(Epsilon),
2007 FuncFilter(FuncFilter) {}
2008
2009 /// Detect 0-sample input profile and report to output stream. This interface
2010 /// should be called after loadProfiles().
2011 bool detectZeroSampleProfile(raw_fd_ostream &OS) const;
2012
2013 /// Write out function-level similarity statistics for functions specified by
2014 /// options --function, --value-cutoff, and --similarity-cutoff.
2015 void dumpFuncSimilarity(raw_fd_ostream &OS) const;
2016
2017 /// Write out program-level similarity and overlap statistics.
2018 void dumpProgramSummary(raw_fd_ostream &OS) const;
2019
2020 /// Write out hot-function and hot-block statistics for base_profile,
2021 /// test_profile, and their overlap. For both cases, the overlap HO is
2022 /// calculated as follows:
2023 /// Given the number of functions (or blocks) that are hot in both profiles
2024 /// HCommon and the number of functions (or blocks) that are hot in at
2025 /// least one profile HUnion, HO = HCommon / HUnion.
2026 void dumpHotFuncAndBlockOverlap(raw_fd_ostream &OS) const;
2027
2028 /// This function tries matching functions in base and test profiles. For each
2029 /// pair of matched functions, it aggregates the function-level
2030 /// similarity into a profile-level similarity. It also dump function-level
2031 /// similarity information of functions specified by --function,
2032 /// --value-cutoff, and --similarity-cutoff options. The program-level
2033 /// similarity PS is computed as follows:
2034 /// Given function-level similarity FS(A) for all function A, the
2035 /// weight of function A in base profile WB(A), and the weight of function
2036 /// A in test profile WT(A), compute PS(base_profile, test_profile) =
2037 /// sum_A(FS(A) * avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0
2038 /// meaning no-overlap.
2039 void computeSampleProfileOverlap(raw_fd_ostream &OS);
2040
2041 /// Initialize ProfOverlap with the sum of samples in base and test
2042 /// profiles. This function also computes and keeps the sum of samples and
2043 /// max sample counts of each function in BaseStats and TestStats for later
2044 /// use to avoid re-computations.
2045 void initializeSampleProfileOverlap();
2046
2047 /// Load profiles specified by BaseFilename and TestFilename.
2048 std::error_code loadProfiles();
2049
2050 using FuncSampleStatsMap =
2051 std::unordered_map<SampleContext, FuncSampleStats, SampleContext::Hash>;
2052
2053private:
2054 SampleOverlapStats ProfOverlap;
2055 SampleOverlapStats HotFuncOverlap;
2056 SampleOverlapStats HotBlockOverlap;
2057 std::string BaseFilename;
2058 std::string TestFilename;
2059 std::unique_ptr<sampleprof::SampleProfileReader> BaseReader;
2060 std::unique_ptr<sampleprof::SampleProfileReader> TestReader;
2061 // BaseStats and TestStats hold FuncSampleStats for each function, with
2062 // function name as the key.
2063 FuncSampleStatsMap BaseStats;
2064 FuncSampleStatsMap TestStats;
2065 // Low similarity threshold in floating point number
2066 double LowSimilarityThreshold;
2067 // Block samples above BaseHotThreshold or TestHotThreshold are considered hot
2068 // for tracking hot blocks.
2069 uint64_t BaseHotThreshold;
2070 uint64_t TestHotThreshold;
2071 // A small threshold used to round the results of floating point accumulations
2072 // to resolve imprecision.
2073 const double Epsilon;
2074 std::multimap<double, SampleOverlapStats, std::greater<double>>
2075 FuncSimilarityDump;
2076 // FuncFilter carries specifications in options --value-cutoff and
2077 // --function.
2078 OverlapFuncFilters FuncFilter;
2079 // Column offsets for printing the function-level details table.
2080 static const unsigned int TestWeightCol = 15;
2081 static const unsigned int SimilarityCol = 30;
2082 static const unsigned int OverlapCol = 43;
2083 static const unsigned int BaseUniqueCol = 53;
2084 static const unsigned int TestUniqueCol = 67;
2085 static const unsigned int BaseSampleCol = 81;
2086 static const unsigned int TestSampleCol = 96;
2087 static const unsigned int FuncNameCol = 111;
2088
2089 /// Return a similarity of two line/block sample counters in the same
2090 /// function in base and test profiles. The line/block-similarity BS(i) is
2091 /// computed as follows:
2092 /// For an offsets i, given the sample count at i in base profile BB(i),
2093 /// the sample count at i in test profile BT(i), the sum of sample counts
2094 /// in this function in base profile SB, and the sum of sample counts in
2095 /// this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB -
2096 /// BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap.
2097 double computeBlockSimilarity(uint64_t BaseSample, uint64_t TestSample,
2098 const SampleOverlapStats &FuncOverlap) const;
2099
2100 void updateHotBlockOverlap(uint64_t BaseSample, uint64_t TestSample,
2101 uint64_t HotBlockCount);
2102
2103 void getHotFunctions(const FuncSampleStatsMap &ProfStats,
2104 FuncSampleStatsMap &HotFunc,
2105 uint64_t HotThreshold) const;
2106
2107 void computeHotFuncOverlap();
2108
2109 /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
2110 /// Difference for two sample units in a matched function according to the
2111 /// given match status.
2112 void updateOverlapStatsForFunction(uint64_t BaseSample, uint64_t TestSample,
2113 uint64_t HotBlockCount,
2114 SampleOverlapStats &FuncOverlap,
2115 double &Difference, MatchStatus Status);
2116
2117 /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
2118 /// Difference for unmatched callees that only present in one profile in a
2119 /// matched caller function.
2120 void updateForUnmatchedCallee(const sampleprof::FunctionSamples &Func,
2121 SampleOverlapStats &FuncOverlap,
2122 double &Difference, MatchStatus Status);
2123
2124 /// This function updates sample overlap statistics of an overlap function in
2125 /// base and test profile. It also calculates a function-internal similarity
2126 /// FIS as follows:
2127 /// For offsets i that have samples in at least one profile in this
2128 /// function A, given BS(i) returned by computeBlockSimilarity(), compute
2129 /// FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with
2130 /// 0.0 meaning no overlap.
2131 double computeSampleFunctionInternalOverlap(
2132 const sampleprof::FunctionSamples &BaseFunc,
2133 const sampleprof::FunctionSamples &TestFunc,
2134 SampleOverlapStats &FuncOverlap);
2135
2136 /// Function-level similarity (FS) is a weighted value over function internal
2137 /// similarity (FIS). This function computes a function's FS from its FIS by
2138 /// applying the weight.
2139 double weightForFuncSimilarity(double FuncSimilarity, uint64_t BaseFuncSample,
2140 uint64_t TestFuncSample) const;
2141
2142 /// The function-level similarity FS(A) for a function A is computed as
2143 /// follows:
2144 /// Compute a function-internal similarity FIS(A) by
2145 /// computeSampleFunctionInternalOverlap(). Then, with the weight of
2146 /// function A in base profile WB(A), and the weight of function A in test
2147 /// profile WT(A), compute FS(A) = FIS(A) * (1.0 - fabs(WB(A) - WT(A)))
2148 /// ranging in [0.0f to 1.0f] with 0.0 meaning no overlap.
2149 double
2150 computeSampleFunctionOverlap(const sampleprof::FunctionSamples *BaseFunc,
2151 const sampleprof::FunctionSamples *TestFunc,
2152 SampleOverlapStats *FuncOverlap,
2153 uint64_t BaseFuncSample,
2154 uint64_t TestFuncSample);
2155
2156 /// Profile-level similarity (PS) is a weighted aggregate over function-level
2157 /// similarities (FS). This method weights the FS value by the function
2158 /// weights in the base and test profiles for the aggregation.
2159 double weightByImportance(double FuncSimilarity, uint64_t BaseFuncSample,
2160 uint64_t TestFuncSample) const;
2161};
2162} // end anonymous namespace
2163
2164bool SampleOverlapAggregator::detectZeroSampleProfile(
2165 raw_fd_ostream &OS) const {
2166 bool HaveZeroSample = false;
2167 if (ProfOverlap.BaseSample == 0) {
2168 OS << "Sum of sample counts for profile " << BaseFilename << " is 0.\n";
2169 HaveZeroSample = true;
2170 }
2171 if (ProfOverlap.TestSample == 0) {
2172 OS << "Sum of sample counts for profile " << TestFilename << " is 0.\n";
2173 HaveZeroSample = true;
2174 }
2175 return HaveZeroSample;
2176}
2177
2178double SampleOverlapAggregator::computeBlockSimilarity(
2179 uint64_t BaseSample, uint64_t TestSample,
2180 const SampleOverlapStats &FuncOverlap) const {
2181 double BaseFrac = 0.0;
2182 double TestFrac = 0.0;
2183 if (FuncOverlap.BaseSample > 0)
2184 BaseFrac = static_cast<double>(BaseSample) / FuncOverlap.BaseSample;
2185 if (FuncOverlap.TestSample > 0)
2186 TestFrac = static_cast<double>(TestSample) / FuncOverlap.TestSample;
2187 return 1.0 - std::fabs(x: BaseFrac - TestFrac);
2188}
2189
2190void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample,
2191 uint64_t TestSample,
2192 uint64_t HotBlockCount) {
2193 bool IsBaseHot = (BaseSample >= BaseHotThreshold);
2194 bool IsTestHot = (TestSample >= TestHotThreshold);
2195 if (!IsBaseHot && !IsTestHot)
2196 return;
2197
2198 HotBlockOverlap.UnionCount += HotBlockCount;
2199 if (IsBaseHot)
2200 HotBlockOverlap.BaseCount += HotBlockCount;
2201 if (IsTestHot)
2202 HotBlockOverlap.TestCount += HotBlockCount;
2203 if (IsBaseHot && IsTestHot)
2204 HotBlockOverlap.OverlapCount += HotBlockCount;
2205}
2206
2207void SampleOverlapAggregator::getHotFunctions(
2208 const FuncSampleStatsMap &ProfStats, FuncSampleStatsMap &HotFunc,
2209 uint64_t HotThreshold) const {
2210 for (const auto &F : ProfStats) {
2211 if (isFunctionHot(FuncStats: F.second, HotThreshold))
2212 HotFunc.emplace(args: F.first, args: F.second);
2213 }
2214}
2215
2216void SampleOverlapAggregator::computeHotFuncOverlap() {
2217 FuncSampleStatsMap BaseHotFunc;
2218 getHotFunctions(ProfStats: BaseStats, HotFunc&: BaseHotFunc, HotThreshold: BaseHotThreshold);
2219 HotFuncOverlap.BaseCount = BaseHotFunc.size();
2220
2221 FuncSampleStatsMap TestHotFunc;
2222 getHotFunctions(ProfStats: TestStats, HotFunc&: TestHotFunc, HotThreshold: TestHotThreshold);
2223 HotFuncOverlap.TestCount = TestHotFunc.size();
2224 HotFuncOverlap.UnionCount = HotFuncOverlap.TestCount;
2225
2226 for (const auto &F : BaseHotFunc) {
2227 if (TestHotFunc.count(x: F.first))
2228 ++HotFuncOverlap.OverlapCount;
2229 else
2230 ++HotFuncOverlap.UnionCount;
2231 }
2232}
2233
2234void SampleOverlapAggregator::updateOverlapStatsForFunction(
2235 uint64_t BaseSample, uint64_t TestSample, uint64_t HotBlockCount,
2236 SampleOverlapStats &FuncOverlap, double &Difference, MatchStatus Status) {
2237 assert(Status != MS_None &&
2238 "Match status should be updated before updating overlap statistics");
2239 if (Status == MS_FirstUnique) {
2240 TestSample = 0;
2241 FuncOverlap.BaseUniqueSample += BaseSample;
2242 } else if (Status == MS_SecondUnique) {
2243 BaseSample = 0;
2244 FuncOverlap.TestUniqueSample += TestSample;
2245 } else {
2246 ++FuncOverlap.OverlapCount;
2247 }
2248
2249 FuncOverlap.UnionSample += std::max(a: BaseSample, b: TestSample);
2250 FuncOverlap.OverlapSample += std::min(a: BaseSample, b: TestSample);
2251 Difference +=
2252 1.0 - computeBlockSimilarity(BaseSample, TestSample, FuncOverlap);
2253 updateHotBlockOverlap(BaseSample, TestSample, HotBlockCount);
2254}
2255
2256void SampleOverlapAggregator::updateForUnmatchedCallee(
2257 const sampleprof::FunctionSamples &Func, SampleOverlapStats &FuncOverlap,
2258 double &Difference, MatchStatus Status) {
2259 assert((Status == MS_FirstUnique || Status == MS_SecondUnique) &&
2260 "Status must be either of the two unmatched cases");
2261 FuncSampleStats FuncStats;
2262 if (Status == MS_FirstUnique) {
2263 getFuncSampleStats(Func, FuncStats, HotThreshold: BaseHotThreshold);
2264 updateOverlapStatsForFunction(BaseSample: FuncStats.SampleSum, TestSample: 0,
2265 HotBlockCount: FuncStats.HotBlockCount, FuncOverlap,
2266 Difference, Status);
2267 } else {
2268 getFuncSampleStats(Func, FuncStats, HotThreshold: TestHotThreshold);
2269 updateOverlapStatsForFunction(BaseSample: 0, TestSample: FuncStats.SampleSum,
2270 HotBlockCount: FuncStats.HotBlockCount, FuncOverlap,
2271 Difference, Status);
2272 }
2273}
2274
2275double SampleOverlapAggregator::computeSampleFunctionInternalOverlap(
2276 const sampleprof::FunctionSamples &BaseFunc,
2277 const sampleprof::FunctionSamples &TestFunc,
2278 SampleOverlapStats &FuncOverlap) {
2279
2280 using namespace sampleprof;
2281
2282 double Difference = 0;
2283
2284 // Accumulate Difference for regular line/block samples in the function.
2285 // We match them through sort-merge join algorithm because
2286 // FunctionSamples::getBodySamples() returns a map of sample counters ordered
2287 // by their offsets.
2288 MatchStep<BodySampleMap::const_iterator> BlockIterStep(
2289 BaseFunc.getBodySamples().cbegin(), BaseFunc.getBodySamples().cend(),
2290 TestFunc.getBodySamples().cbegin(), TestFunc.getBodySamples().cend());
2291 BlockIterStep.updateOneStep();
2292 while (!BlockIterStep.areBothFinished()) {
2293 uint64_t BaseSample =
2294 BlockIterStep.isFirstFinished()
2295 ? 0
2296 : BlockIterStep.getFirstIter()->second.getSamples();
2297 uint64_t TestSample =
2298 BlockIterStep.isSecondFinished()
2299 ? 0
2300 : BlockIterStep.getSecondIter()->second.getSamples();
2301 updateOverlapStatsForFunction(BaseSample, TestSample, HotBlockCount: 1, FuncOverlap,
2302 Difference, Status: BlockIterStep.getMatchStatus());
2303
2304 BlockIterStep.updateOneStep();
2305 }
2306
2307 // Accumulate Difference for callsite lines in the function. We match
2308 // them through sort-merge algorithm because
2309 // FunctionSamples::getCallsiteSamples() returns a map of callsite records
2310 // ordered by their offsets.
2311 MatchStep<CallsiteSampleMap::const_iterator> CallsiteIterStep(
2312 BaseFunc.getCallsiteSamples().cbegin(),
2313 BaseFunc.getCallsiteSamples().cend(),
2314 TestFunc.getCallsiteSamples().cbegin(),
2315 TestFunc.getCallsiteSamples().cend());
2316 CallsiteIterStep.updateOneStep();
2317 while (!CallsiteIterStep.areBothFinished()) {
2318 MatchStatus CallsiteStepStatus = CallsiteIterStep.getMatchStatus();
2319 assert(CallsiteStepStatus != MS_None &&
2320 "Match status should be updated before entering loop body");
2321
2322 if (CallsiteStepStatus != MS_Match) {
2323 auto Callsite = (CallsiteStepStatus == MS_FirstUnique)
2324 ? CallsiteIterStep.getFirstIter()
2325 : CallsiteIterStep.getSecondIter();
2326 for (const auto &F : Callsite->second)
2327 updateForUnmatchedCallee(Func: F.second, FuncOverlap, Difference,
2328 Status: CallsiteStepStatus);
2329 } else {
2330 // There may be multiple inlinees at the same offset, so we need to try
2331 // matching all of them. This match is implemented through sort-merge
2332 // algorithm because callsite records at the same offset are ordered by
2333 // function names.
2334 MatchStep<FunctionSamplesMap::const_iterator> CalleeIterStep(
2335 CallsiteIterStep.getFirstIter()->second.cbegin(),
2336 CallsiteIterStep.getFirstIter()->second.cend(),
2337 CallsiteIterStep.getSecondIter()->second.cbegin(),
2338 CallsiteIterStep.getSecondIter()->second.cend());
2339 CalleeIterStep.updateOneStep();
2340 while (!CalleeIterStep.areBothFinished()) {
2341 MatchStatus CalleeStepStatus = CalleeIterStep.getMatchStatus();
2342 if (CalleeStepStatus != MS_Match) {
2343 auto Callee = (CalleeStepStatus == MS_FirstUnique)
2344 ? CalleeIterStep.getFirstIter()
2345 : CalleeIterStep.getSecondIter();
2346 updateForUnmatchedCallee(Func: Callee->second, FuncOverlap, Difference,
2347 Status: CalleeStepStatus);
2348 } else {
2349 // An inlined function can contain other inlinees inside, so compute
2350 // the Difference recursively.
2351 Difference += 2.0 - 2 * computeSampleFunctionInternalOverlap(
2352 BaseFunc: CalleeIterStep.getFirstIter()->second,
2353 TestFunc: CalleeIterStep.getSecondIter()->second,
2354 FuncOverlap);
2355 }
2356 CalleeIterStep.updateOneStep();
2357 }
2358 }
2359 CallsiteIterStep.updateOneStep();
2360 }
2361
2362 // Difference reflects the total differences of line/block samples in this
2363 // function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to
2364 // reflect the similarity between function profiles in [0.0f to 1.0f].
2365 return (2.0 - Difference) / 2;
2366}
2367
2368double SampleOverlapAggregator::weightForFuncSimilarity(
2369 double FuncInternalSimilarity, uint64_t BaseFuncSample,
2370 uint64_t TestFuncSample) const {
2371 // Compute the weight as the distance between the function weights in two
2372 // profiles.
2373 double BaseFrac = 0.0;
2374 double TestFrac = 0.0;
2375 assert(ProfOverlap.BaseSample > 0 &&
2376 "Total samples in base profile should be greater than 0");
2377 BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample;
2378 assert(ProfOverlap.TestSample > 0 &&
2379 "Total samples in test profile should be greater than 0");
2380 TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample;
2381 double WeightDistance = std::fabs(x: BaseFrac - TestFrac);
2382
2383 // Take WeightDistance into the similarity.
2384 return FuncInternalSimilarity * (1 - WeightDistance);
2385}
2386
2387double
2388SampleOverlapAggregator::weightByImportance(double FuncSimilarity,
2389 uint64_t BaseFuncSample,
2390 uint64_t TestFuncSample) const {
2391
2392 double BaseFrac = 0.0;
2393 double TestFrac = 0.0;
2394 assert(ProfOverlap.BaseSample > 0 &&
2395 "Total samples in base profile should be greater than 0");
2396 BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample / 2.0;
2397 assert(ProfOverlap.TestSample > 0 &&
2398 "Total samples in test profile should be greater than 0");
2399 TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample / 2.0;
2400 return FuncSimilarity * (BaseFrac + TestFrac);
2401}
2402
2403double SampleOverlapAggregator::computeSampleFunctionOverlap(
2404 const sampleprof::FunctionSamples *BaseFunc,
2405 const sampleprof::FunctionSamples *TestFunc,
2406 SampleOverlapStats *FuncOverlap, uint64_t BaseFuncSample,
2407 uint64_t TestFuncSample) {
2408 // Default function internal similarity before weighted, meaning two functions
2409 // has no overlap.
2410 const double DefaultFuncInternalSimilarity = 0;
2411 double FuncSimilarity;
2412 double FuncInternalSimilarity;
2413
2414 // If BaseFunc or TestFunc is nullptr, it means the functions do not overlap.
2415 // In this case, we use DefaultFuncInternalSimilarity as the function internal
2416 // similarity.
2417 if (!BaseFunc || !TestFunc) {
2418 FuncInternalSimilarity = DefaultFuncInternalSimilarity;
2419 } else {
2420 assert(FuncOverlap != nullptr &&
2421 "FuncOverlap should be provided in this case");
2422 FuncInternalSimilarity = computeSampleFunctionInternalOverlap(
2423 BaseFunc: *BaseFunc, TestFunc: *TestFunc, FuncOverlap&: *FuncOverlap);
2424 // Now, FuncInternalSimilarity may be a little less than 0 due to
2425 // imprecision of floating point accumulations. Make it zero if the
2426 // difference is below Epsilon.
2427 FuncInternalSimilarity = (std::fabs(x: FuncInternalSimilarity - 0) < Epsilon)
2428 ? 0
2429 : FuncInternalSimilarity;
2430 }
2431 FuncSimilarity = weightForFuncSimilarity(FuncInternalSimilarity,
2432 BaseFuncSample, TestFuncSample);
2433 return FuncSimilarity;
2434}
2435
2436void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
2437 using namespace sampleprof;
2438
2439 std::unordered_map<SampleContext, const FunctionSamples *,
2440 SampleContext::Hash>
2441 BaseFuncProf;
2442 const auto &BaseProfiles = BaseReader->getProfiles();
2443 for (const auto &BaseFunc : BaseProfiles) {
2444 BaseFuncProf.emplace(args&: BaseFunc.second.getContext(), args: &(BaseFunc.second));
2445 }
2446 ProfOverlap.UnionCount = BaseFuncProf.size();
2447
2448 const auto &TestProfiles = TestReader->getProfiles();
2449 for (const auto &TestFunc : TestProfiles) {
2450 SampleOverlapStats FuncOverlap;
2451 FuncOverlap.TestName = TestFunc.second.getContext();
2452 assert(TestStats.count(FuncOverlap.TestName) &&
2453 "TestStats should have records for all functions in test profile "
2454 "except inlinees");
2455 FuncOverlap.TestSample = TestStats[FuncOverlap.TestName].SampleSum;
2456
2457 bool Matched = false;
2458 const auto Match = BaseFuncProf.find(x: FuncOverlap.TestName);
2459 if (Match == BaseFuncProf.end()) {
2460 const FuncSampleStats &FuncStats = TestStats[FuncOverlap.TestName];
2461 ++ProfOverlap.TestUniqueCount;
2462 ProfOverlap.TestUniqueSample += FuncStats.SampleSum;
2463 FuncOverlap.TestUniqueSample = FuncStats.SampleSum;
2464
2465 updateHotBlockOverlap(BaseSample: 0, TestSample: FuncStats.SampleSum, HotBlockCount: FuncStats.HotBlockCount);
2466
2467 double FuncSimilarity = computeSampleFunctionOverlap(
2468 BaseFunc: nullptr, TestFunc: nullptr, FuncOverlap: nullptr, BaseFuncSample: 0, TestFuncSample: FuncStats.SampleSum);
2469 ProfOverlap.Similarity +=
2470 weightByImportance(FuncSimilarity, BaseFuncSample: 0, TestFuncSample: FuncStats.SampleSum);
2471
2472 ++ProfOverlap.UnionCount;
2473 ProfOverlap.UnionSample += FuncStats.SampleSum;
2474 } else {
2475 ++ProfOverlap.OverlapCount;
2476
2477 // Two functions match with each other. Compute function-level overlap and
2478 // aggregate them into profile-level overlap.
2479 FuncOverlap.BaseName = Match->second->getContext();
2480 assert(BaseStats.count(FuncOverlap.BaseName) &&
2481 "BaseStats should have records for all functions in base profile "
2482 "except inlinees");
2483 FuncOverlap.BaseSample = BaseStats[FuncOverlap.BaseName].SampleSum;
2484
2485 FuncOverlap.Similarity = computeSampleFunctionOverlap(
2486 BaseFunc: Match->second, TestFunc: &TestFunc.second, FuncOverlap: &FuncOverlap, BaseFuncSample: FuncOverlap.BaseSample,
2487 TestFuncSample: FuncOverlap.TestSample);
2488 ProfOverlap.Similarity +=
2489 weightByImportance(FuncSimilarity: FuncOverlap.Similarity, BaseFuncSample: FuncOverlap.BaseSample,
2490 TestFuncSample: FuncOverlap.TestSample);
2491 ProfOverlap.OverlapSample += FuncOverlap.OverlapSample;
2492 ProfOverlap.UnionSample += FuncOverlap.UnionSample;
2493
2494 // Accumulate the percentage of base unique and test unique samples into
2495 // ProfOverlap.
2496 ProfOverlap.BaseUniqueSample += FuncOverlap.BaseUniqueSample;
2497 ProfOverlap.TestUniqueSample += FuncOverlap.TestUniqueSample;
2498
2499 // Remove matched base functions for later reporting functions not found
2500 // in test profile.
2501 BaseFuncProf.erase(position: Match);
2502 Matched = true;
2503 }
2504
2505 // Print function-level similarity information if specified by options.
2506 assert(TestStats.count(FuncOverlap.TestName) &&
2507 "TestStats should have records for all functions in test profile "
2508 "except inlinees");
2509 if (TestStats[FuncOverlap.TestName].MaxSample >= FuncFilter.ValueCutoff ||
2510 (Matched && FuncOverlap.Similarity < LowSimilarityThreshold) ||
2511 (Matched && !FuncFilter.NameFilter.empty() &&
2512 FuncOverlap.BaseName.toString().find(str: FuncFilter.NameFilter) !=
2513 std::string::npos)) {
2514 assert(ProfOverlap.BaseSample > 0 &&
2515 "Total samples in base profile should be greater than 0");
2516 FuncOverlap.BaseWeight =
2517 static_cast<double>(FuncOverlap.BaseSample) / ProfOverlap.BaseSample;
2518 assert(ProfOverlap.TestSample > 0 &&
2519 "Total samples in test profile should be greater than 0");
2520 FuncOverlap.TestWeight =
2521 static_cast<double>(FuncOverlap.TestSample) / ProfOverlap.TestSample;
2522 FuncSimilarityDump.emplace(args&: FuncOverlap.BaseWeight, args&: FuncOverlap);
2523 }
2524 }
2525
2526 // Traverse through functions in base profile but not in test profile.
2527 for (const auto &F : BaseFuncProf) {
2528 assert(BaseStats.count(F.second->getContext()) &&
2529 "BaseStats should have records for all functions in base profile "
2530 "except inlinees");
2531 const FuncSampleStats &FuncStats = BaseStats[F.second->getContext()];
2532 ++ProfOverlap.BaseUniqueCount;
2533 ProfOverlap.BaseUniqueSample += FuncStats.SampleSum;
2534
2535 updateHotBlockOverlap(BaseSample: FuncStats.SampleSum, TestSample: 0, HotBlockCount: FuncStats.HotBlockCount);
2536
2537 double FuncSimilarity = computeSampleFunctionOverlap(
2538 BaseFunc: nullptr, TestFunc: nullptr, FuncOverlap: nullptr, BaseFuncSample: FuncStats.SampleSum, TestFuncSample: 0);
2539 ProfOverlap.Similarity +=
2540 weightByImportance(FuncSimilarity, BaseFuncSample: FuncStats.SampleSum, TestFuncSample: 0);
2541
2542 ProfOverlap.UnionSample += FuncStats.SampleSum;
2543 }
2544
2545 // Now, ProfSimilarity may be a little greater than 1 due to imprecision
2546 // of floating point accumulations. Make it 1.0 if the difference is below
2547 // Epsilon.
2548 ProfOverlap.Similarity = (std::fabs(x: ProfOverlap.Similarity - 1) < Epsilon)
2549 ? 1
2550 : ProfOverlap.Similarity;
2551
2552 computeHotFuncOverlap();
2553}
2554
2555void SampleOverlapAggregator::initializeSampleProfileOverlap() {
2556 const auto &BaseProf = BaseReader->getProfiles();
2557 for (const auto &I : BaseProf) {
2558 ++ProfOverlap.BaseCount;
2559 FuncSampleStats FuncStats;
2560 getFuncSampleStats(Func: I.second, FuncStats, HotThreshold: BaseHotThreshold);
2561 ProfOverlap.BaseSample += FuncStats.SampleSum;
2562 BaseStats.emplace(args&: I.second.getContext(), args&: FuncStats);
2563 }
2564
2565 const auto &TestProf = TestReader->getProfiles();
2566 for (const auto &I : TestProf) {
2567 ++ProfOverlap.TestCount;
2568 FuncSampleStats FuncStats;
2569 getFuncSampleStats(Func: I.second, FuncStats, HotThreshold: TestHotThreshold);
2570 ProfOverlap.TestSample += FuncStats.SampleSum;
2571 TestStats.emplace(args&: I.second.getContext(), args&: FuncStats);
2572 }
2573
2574 ProfOverlap.BaseName = StringRef(BaseFilename);
2575 ProfOverlap.TestName = StringRef(TestFilename);
2576}
2577
2578void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream &OS) const {
2579 using namespace sampleprof;
2580
2581 if (FuncSimilarityDump.empty())
2582 return;
2583
2584 formatted_raw_ostream FOS(OS);
2585 FOS << "Function-level details:\n";
2586 FOS << "Base weight";
2587 FOS.PadToColumn(NewCol: TestWeightCol);
2588 FOS << "Test weight";
2589 FOS.PadToColumn(NewCol: SimilarityCol);
2590 FOS << "Similarity";
2591 FOS.PadToColumn(NewCol: OverlapCol);
2592 FOS << "Overlap";
2593 FOS.PadToColumn(NewCol: BaseUniqueCol);
2594 FOS << "Base unique";
2595 FOS.PadToColumn(NewCol: TestUniqueCol);
2596 FOS << "Test unique";
2597 FOS.PadToColumn(NewCol: BaseSampleCol);
2598 FOS << "Base samples";
2599 FOS.PadToColumn(NewCol: TestSampleCol);
2600 FOS << "Test samples";
2601 FOS.PadToColumn(NewCol: FuncNameCol);
2602 FOS << "Function name\n";
2603 for (const auto &F : FuncSimilarityDump) {
2604 double OverlapPercent =
2605 F.second.UnionSample > 0
2606 ? static_cast<double>(F.second.OverlapSample) / F.second.UnionSample
2607 : 0;
2608 double BaseUniquePercent =
2609 F.second.BaseSample > 0
2610 ? static_cast<double>(F.second.BaseUniqueSample) /
2611 F.second.BaseSample
2612 : 0;
2613 double TestUniquePercent =
2614 F.second.TestSample > 0
2615 ? static_cast<double>(F.second.TestUniqueSample) /
2616 F.second.TestSample
2617 : 0;
2618
2619 FOS << format(Fmt: "%.2f%%", Vals: F.second.BaseWeight * 100);
2620 FOS.PadToColumn(NewCol: TestWeightCol);
2621 FOS << format(Fmt: "%.2f%%", Vals: F.second.TestWeight * 100);
2622 FOS.PadToColumn(NewCol: SimilarityCol);
2623 FOS << format(Fmt: "%.2f%%", Vals: F.second.Similarity * 100);
2624 FOS.PadToColumn(NewCol: OverlapCol);
2625 FOS << format(Fmt: "%.2f%%", Vals: OverlapPercent * 100);
2626 FOS.PadToColumn(NewCol: BaseUniqueCol);
2627 FOS << format(Fmt: "%.2f%%", Vals: BaseUniquePercent * 100);
2628 FOS.PadToColumn(NewCol: TestUniqueCol);
2629 FOS << format(Fmt: "%.2f%%", Vals: TestUniquePercent * 100);
2630 FOS.PadToColumn(NewCol: BaseSampleCol);
2631 FOS << F.second.BaseSample;
2632 FOS.PadToColumn(NewCol: TestSampleCol);
2633 FOS << F.second.TestSample;
2634 FOS.PadToColumn(NewCol: FuncNameCol);
2635 FOS << F.second.TestName.toString() << "\n";
2636 }
2637}
2638
2639void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream &OS) const {
2640 OS << "Profile overlap information for base_profile: "
2641 << ProfOverlap.BaseName.toString()
2642 << " and test_profile: " << ProfOverlap.TestName.toString()
2643 << "\nProgram level:\n";
2644
2645 OS << " Whole program profile similarity: "
2646 << format(Fmt: "%.3f%%", Vals: ProfOverlap.Similarity * 100) << "\n";
2647
2648 assert(ProfOverlap.UnionSample > 0 &&
2649 "Total samples in two profile should be greater than 0");
2650 double OverlapPercent =
2651 static_cast<double>(ProfOverlap.OverlapSample) / ProfOverlap.UnionSample;
2652 assert(ProfOverlap.BaseSample > 0 &&
2653 "Total samples in base profile should be greater than 0");
2654 double BaseUniquePercent = static_cast<double>(ProfOverlap.BaseUniqueSample) /
2655 ProfOverlap.BaseSample;
2656 assert(ProfOverlap.TestSample > 0 &&
2657 "Total samples in test profile should be greater than 0");
2658 double TestUniquePercent = static_cast<double>(ProfOverlap.TestUniqueSample) /
2659 ProfOverlap.TestSample;
2660
2661 OS << " Whole program sample overlap: "
2662 << format(Fmt: "%.3f%%", Vals: OverlapPercent * 100) << "\n";
2663 OS << " percentage of samples unique in base profile: "
2664 << format(Fmt: "%.3f%%", Vals: BaseUniquePercent * 100) << "\n";
2665 OS << " percentage of samples unique in test profile: "
2666 << format(Fmt: "%.3f%%", Vals: TestUniquePercent * 100) << "\n";
2667 OS << " total samples in base profile: " << ProfOverlap.BaseSample << "\n"
2668 << " total samples in test profile: " << ProfOverlap.TestSample << "\n";
2669
2670 assert(ProfOverlap.UnionCount > 0 &&
2671 "There should be at least one function in two input profiles");
2672 double FuncOverlapPercent =
2673 static_cast<double>(ProfOverlap.OverlapCount) / ProfOverlap.UnionCount;
2674 OS << " Function overlap: " << format(Fmt: "%.3f%%", Vals: FuncOverlapPercent * 100)
2675 << "\n";
2676 OS << " overlap functions: " << ProfOverlap.OverlapCount << "\n";
2677 OS << " functions unique in base profile: " << ProfOverlap.BaseUniqueCount
2678 << "\n";
2679 OS << " functions unique in test profile: " << ProfOverlap.TestUniqueCount
2680 << "\n";
2681}
2682
2683void SampleOverlapAggregator::dumpHotFuncAndBlockOverlap(
2684 raw_fd_ostream &OS) const {
2685 assert(HotFuncOverlap.UnionCount > 0 &&
2686 "There should be at least one hot function in two input profiles");
2687 OS << " Hot-function overlap: "
2688 << format(Fmt: "%.3f%%", Vals: static_cast<double>(HotFuncOverlap.OverlapCount) /
2689 HotFuncOverlap.UnionCount * 100)
2690 << "\n";
2691 OS << " overlap hot functions: " << HotFuncOverlap.OverlapCount << "\n";
2692 OS << " hot functions unique in base profile: "
2693 << HotFuncOverlap.BaseCount - HotFuncOverlap.OverlapCount << "\n";
2694 OS << " hot functions unique in test profile: "
2695 << HotFuncOverlap.TestCount - HotFuncOverlap.OverlapCount << "\n";
2696
2697 assert(HotBlockOverlap.UnionCount > 0 &&
2698 "There should be at least one hot block in two input profiles");
2699 OS << " Hot-block overlap: "
2700 << format(Fmt: "%.3f%%", Vals: static_cast<double>(HotBlockOverlap.OverlapCount) /
2701 HotBlockOverlap.UnionCount * 100)
2702 << "\n";
2703 OS << " overlap hot blocks: " << HotBlockOverlap.OverlapCount << "\n";
2704 OS << " hot blocks unique in base profile: "
2705 << HotBlockOverlap.BaseCount - HotBlockOverlap.OverlapCount << "\n";
2706 OS << " hot blocks unique in test profile: "
2707 << HotBlockOverlap.TestCount - HotBlockOverlap.OverlapCount << "\n";
2708}
2709
2710std::error_code SampleOverlapAggregator::loadProfiles() {
2711 using namespace sampleprof;
2712
2713 LLVMContext Context;
2714 auto FS = vfs::getRealFileSystem();
2715 auto BaseReaderOrErr = SampleProfileReader::create(Filename: BaseFilename, C&: Context, FS&: *FS,
2716 P: FSDiscriminatorPassOption);
2717 if (std::error_code EC = BaseReaderOrErr.getError())
2718 exitWithErrorCode(EC, Whence: BaseFilename);
2719
2720 auto TestReaderOrErr = SampleProfileReader::create(Filename: TestFilename, C&: Context, FS&: *FS,
2721 P: FSDiscriminatorPassOption);
2722 if (std::error_code EC = TestReaderOrErr.getError())
2723 exitWithErrorCode(EC, Whence: TestFilename);
2724
2725 BaseReader = std::move(BaseReaderOrErr.get());
2726 TestReader = std::move(TestReaderOrErr.get());
2727
2728 if (std::error_code EC = BaseReader->read())
2729 exitWithErrorCode(EC, Whence: BaseFilename);
2730 if (std::error_code EC = TestReader->read())
2731 exitWithErrorCode(EC, Whence: TestFilename);
2732 if (BaseReader->profileIsProbeBased() != TestReader->profileIsProbeBased())
2733 exitWithError(
2734 Message: "cannot compare probe-based profile with non-probe-based profile");
2735 if (BaseReader->profileIsCS() != TestReader->profileIsCS())
2736 exitWithError(Message: "cannot compare CS profile with non-CS profile");
2737
2738 // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in
2739 // profile summary.
2740 ProfileSummary &BasePS = BaseReader->getSummary();
2741 ProfileSummary &TestPS = TestReader->getSummary();
2742 BaseHotThreshold =
2743 ProfileSummaryBuilder::getHotCountThreshold(DS: BasePS.getDetailedSummary());
2744 TestHotThreshold =
2745 ProfileSummaryBuilder::getHotCountThreshold(DS: TestPS.getDetailedSummary());
2746
2747 return std::error_code();
2748}
2749
2750void overlapSampleProfile(const std::string &BaseFilename,
2751 const std::string &TestFilename,
2752 const OverlapFuncFilters &FuncFilter,
2753 uint64_t SimilarityCutoff, raw_fd_ostream &OS) {
2754 using namespace sampleprof;
2755
2756 // We use 0.000005 to initialize OverlapAggr.Epsilon because the final metrics
2757 // report 2--3 places after decimal point in percentage numbers.
2758 SampleOverlapAggregator OverlapAggr(
2759 BaseFilename, TestFilename,
2760 static_cast<double>(SimilarityCutoff) / 1000000, 0.000005, FuncFilter);
2761 if (std::error_code EC = OverlapAggr.loadProfiles())
2762 exitWithErrorCode(EC);
2763
2764 OverlapAggr.initializeSampleProfileOverlap();
2765 if (OverlapAggr.detectZeroSampleProfile(OS))
2766 return;
2767
2768 OverlapAggr.computeSampleProfileOverlap(OS);
2769
2770 OverlapAggr.dumpProgramSummary(OS);
2771 OverlapAggr.dumpHotFuncAndBlockOverlap(OS);
2772 OverlapAggr.dumpFuncSimilarity(OS);
2773}
2774
2775static int overlap_main() {
2776 std::error_code EC;
2777 raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
2778 if (EC)
2779 exitWithErrorCode(EC, Whence: OutputFilename);
2780
2781 if (ProfileKind == instr)
2782 overlapInstrProfile(BaseFilename, TestFilename,
2783 FuncFilter: OverlapFuncFilters{.ValueCutoff: OverlapValueCutoff, .NameFilter: FuncNameFilter},
2784 OS, IsCS);
2785 else
2786 overlapSampleProfile(BaseFilename, TestFilename,
2787 FuncFilter: OverlapFuncFilters{.ValueCutoff: OverlapValueCutoff, .NameFilter: FuncNameFilter},
2788 SimilarityCutoff, OS);
2789
2790 return 0;
2791}
2792
2793namespace {
2794struct ValueSitesStats {
2795 ValueSitesStats() = default;
2796 uint64_t TotalNumValueSites = 0;
2797 uint64_t TotalNumValueSitesWithValueProfile = 0;
2798 uint64_t TotalNumValues = 0;
2799 std::vector<unsigned> ValueSitesHistogram;
2800};
2801} // namespace
2802
2803static void traverseAllValueSites(const InstrProfRecord &Func, uint32_t VK,
2804 ValueSitesStats &Stats, raw_fd_ostream &OS,
2805 InstrProfSymtab *Symtab) {
2806 uint32_t NS = Func.getNumValueSites(ValueKind: VK);
2807 Stats.TotalNumValueSites += NS;
2808 for (size_t I = 0; I < NS; ++I) {
2809 auto VD = Func.getValueArrayForSite(ValueKind: VK, Site: I);
2810 uint32_t NV = VD.size();
2811 if (NV == 0)
2812 continue;
2813 Stats.TotalNumValues += NV;
2814 Stats.TotalNumValueSitesWithValueProfile++;
2815 if (NV > Stats.ValueSitesHistogram.size())
2816 Stats.ValueSitesHistogram.resize(new_size: NV, x: 0);
2817 Stats.ValueSitesHistogram[NV - 1]++;
2818
2819 uint64_t SiteSum = 0;
2820 for (const auto &V : VD)
2821 SiteSum += V.Count;
2822 if (SiteSum == 0)
2823 SiteSum = 1;
2824
2825 for (const auto &V : VD) {
2826 OS << "\t[ " << format(Fmt: "%2u", Vals: I) << ", ";
2827 if (Symtab == nullptr)
2828 OS << format(Fmt: "%4" PRIu64, Vals: V.Value);
2829 else
2830 OS << Symtab->getFuncOrVarName(MD5Hash: V.Value);
2831 OS << ", " << format(Fmt: "%10" PRId64, Vals: V.Count) << " ] ("
2832 << format(Fmt: "%.2f%%", Vals: (V.Count * 100.0 / SiteSum)) << ")\n";
2833 }
2834 }
2835}
2836
2837static void showValueSitesStats(raw_fd_ostream &OS, uint32_t VK,
2838 ValueSitesStats &Stats) {
2839 OS << " Total number of sites: " << Stats.TotalNumValueSites << "\n";
2840 OS << " Total number of sites with values: "
2841 << Stats.TotalNumValueSitesWithValueProfile << "\n";
2842 OS << " Total number of profiled values: " << Stats.TotalNumValues << "\n";
2843
2844 OS << " Value sites histogram:\n\tNumTargets, SiteCount\n";
2845 for (unsigned I = 0; I < Stats.ValueSitesHistogram.size(); I++) {
2846 if (Stats.ValueSitesHistogram[I] > 0)
2847 OS << "\t" << I + 1 << ", " << Stats.ValueSitesHistogram[I] << "\n";
2848 }
2849}
2850
2851static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
2852 if (SFormat == ShowFormat::Json)
2853 exitWithError(Message: "JSON output is not supported for instr profiles");
2854 if (SFormat == ShowFormat::Yaml)
2855 exitWithError(Message: "YAML output is not supported for instr profiles");
2856 auto FS = vfs::getRealFileSystem();
2857 auto ReaderOrErr = InstrProfReader::create(Path: Filename, FS&: *FS);
2858 std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
2859 if (Cutoffs.empty() && (ShowDetailedSummary || ShowHotFuncList))
2860 Cutoffs = ProfileSummaryBuilder::DefaultCutoffs;
2861 InstrProfSummaryBuilder Builder(std::move(Cutoffs));
2862 if (Error E = ReaderOrErr.takeError())
2863 exitWithError(E: std::move(E), Whence: Filename);
2864
2865 auto Reader = std::move(ReaderOrErr.get());
2866 bool IsIRInstr = Reader->isIRLevelProfile();
2867 size_t ShownFunctions = 0;
2868 size_t BelowCutoffFunctions = 0;
2869 int NumVPKind = IPVK_Last - IPVK_First + 1;
2870 std::vector<ValueSitesStats> VPStats(NumVPKind);
2871
2872 std::vector<std::pair<StringRef, uint64_t>> NameAndMaxCount;
2873
2874 if (!TextFormat && OnlyListBelow) {
2875 OS << "The list of functions with the maximum counter less than "
2876 << ShowValueCutoff << ":\n";
2877 }
2878
2879 // Add marker so that IR-level instrumentation round-trips properly.
2880 if (TextFormat && IsIRInstr)
2881 OS << ":ir\n";
2882
2883 for (const auto &Func : *Reader) {
2884 if (Reader->isIRLevelProfile()) {
2885 bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(FuncHash: Func.Hash);
2886 if (FuncIsCS != ShowCS)
2887 continue;
2888 }
2889 bool Show = ShowAllFunctions ||
2890 (!FuncNameFilter.empty() && Func.Name.contains(Other: FuncNameFilter));
2891
2892 bool doTextFormatDump = (Show && TextFormat);
2893
2894 if (doTextFormatDump) {
2895 InstrProfSymtab &Symtab = Reader->getSymtab();
2896 InstrProfWriter::writeRecordInText(Name: Func.Name, Hash: Func.Hash, Counters: Func, Symtab,
2897 OS);
2898 continue;
2899 }
2900
2901 assert(Func.Counts.size() > 0 && "function missing entry counter");
2902 Builder.addRecord(Func);
2903
2904 if (ShowCovered) {
2905 if (llvm::any_of(Range: Func.Counts, P: [](uint64_t C) { return C; }))
2906 OS << Func.Name << "\n";
2907 continue;
2908 }
2909
2910 uint64_t FuncMax = 0;
2911 uint64_t FuncSum = 0;
2912
2913 auto PseudoKind = Func.getCountPseudoKind();
2914 if (PseudoKind != InstrProfRecord::NotPseudo) {
2915 if (Show) {
2916 if (!ShownFunctions)
2917 OS << "Counters:\n";
2918 ++ShownFunctions;
2919 OS << " " << Func.Name << ":\n"
2920 << " Hash: " << format(Fmt: "0x%016" PRIx64, Vals: Func.Hash) << "\n"
2921 << " Counters: " << Func.Counts.size();
2922 if (PseudoKind == InstrProfRecord::PseudoHot)
2923 OS << " <PseudoHot>\n";
2924 else if (PseudoKind == InstrProfRecord::PseudoWarm)
2925 OS << " <PseudoWarm>\n";
2926 else
2927 llvm_unreachable("Unknown PseudoKind");
2928 }
2929 continue;
2930 }
2931
2932 for (uint64_t Count : Func.Counts) {
2933 FuncMax = std::max(a: FuncMax, b: Count);
2934 FuncSum += Count;
2935 }
2936
2937 if (FuncMax < ShowValueCutoff) {
2938 ++BelowCutoffFunctions;
2939 if (OnlyListBelow) {
2940 OS << " " << Func.Name << ": (Max = " << FuncMax
2941 << " Sum = " << FuncSum << ")\n";
2942 }
2943 continue;
2944 } else if (OnlyListBelow)
2945 continue;
2946
2947 if (TopNFunctions || ShowHotFuncList)
2948 NameAndMaxCount.emplace_back(args: Func.Name, args&: FuncMax);
2949
2950 if (Show) {
2951 if (!ShownFunctions)
2952 OS << "Counters:\n";
2953
2954 ++ShownFunctions;
2955
2956 OS << " " << Func.Name << ":\n"
2957 << " Hash: " << format(Fmt: "0x%016" PRIx64, Vals: Func.Hash) << "\n"
2958 << " Counters: " << Func.Counts.size() << "\n";
2959 if (!IsIRInstr)
2960 OS << " Function count: " << Func.Counts[0] << "\n";
2961
2962 if (ShowIndirectCallTargets)
2963 OS << " Indirect Call Site Count: "
2964 << Func.getNumValueSites(ValueKind: IPVK_IndirectCallTarget) << "\n";
2965
2966 if (ShowVTables)
2967 OS << " Number of instrumented vtables: "
2968 << Func.getNumValueSites(ValueKind: IPVK_VTableTarget) << "\n";
2969
2970 uint32_t NumMemOPCalls = Func.getNumValueSites(ValueKind: IPVK_MemOPSize);
2971 if (ShowMemOPSizes && NumMemOPCalls > 0)
2972 OS << " Number of Memory Intrinsics Calls: " << NumMemOPCalls
2973 << "\n";
2974
2975 if (ShowCounts) {
2976 OS << " Block counts: [";
2977 size_t Start = (IsIRInstr ? 0 : 1);
2978 for (size_t I = Start, E = Func.Counts.size(); I < E; ++I) {
2979 OS << (I == Start ? "" : ", ") << Func.Counts[I];
2980 }
2981 OS << "]\n";
2982 }
2983
2984 if (ShowIndirectCallTargets) {
2985 OS << " Indirect Target Results:\n";
2986 traverseAllValueSites(Func, VK: IPVK_IndirectCallTarget,
2987 Stats&: VPStats[IPVK_IndirectCallTarget], OS,
2988 Symtab: &(Reader->getSymtab()));
2989 }
2990
2991 if (ShowVTables) {
2992 OS << " VTable Results:\n";
2993 traverseAllValueSites(Func, VK: IPVK_VTableTarget,
2994 Stats&: VPStats[IPVK_VTableTarget], OS,
2995 Symtab: &(Reader->getSymtab()));
2996 }
2997
2998 if (ShowMemOPSizes && NumMemOPCalls > 0) {
2999 OS << " Memory Intrinsic Size Results:\n";
3000 traverseAllValueSites(Func, VK: IPVK_MemOPSize, Stats&: VPStats[IPVK_MemOPSize], OS,
3001 Symtab: nullptr);
3002 }
3003 }
3004 }
3005 if (Reader->hasError())
3006 exitWithError(E: Reader->getError(), Whence: Filename);
3007
3008 if (TextFormat || ShowCovered)
3009 return 0;
3010 std::unique_ptr<ProfileSummary> PS(Builder.getSummary());
3011 bool IsIR = Reader->isIRLevelProfile();
3012 OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end");
3013 if (IsIR) {
3014 OS << " entry_first = " << Reader->instrEntryBBEnabled();
3015 OS << " instrument_loop_entries = " << Reader->instrLoopEntriesEnabled();
3016 }
3017 OS << "\n";
3018 if (ShowAllFunctions || !FuncNameFilter.empty())
3019 OS << "Functions shown: " << ShownFunctions << "\n";
3020 PS->printSummary(OS);
3021 if (ShowValueCutoff > 0) {
3022 OS << "Number of functions with maximum count (< " << ShowValueCutoff
3023 << "): " << BelowCutoffFunctions << "\n";
3024 OS << "Number of functions with maximum count (>= " << ShowValueCutoff
3025 << "): " << PS->getNumFunctions() - BelowCutoffFunctions << "\n";
3026 }
3027
3028 // Sort by MaxCount in decreasing order
3029 llvm::stable_sort(Range&: NameAndMaxCount, C: [](const auto &L, const auto &R) {
3030 return L.second > R.second;
3031 });
3032 if (TopNFunctions) {
3033 OS << "Top " << TopNFunctions
3034 << " functions with the largest internal block counts: \n";
3035 auto TopFuncs = ArrayRef(NameAndMaxCount).take_front(N: TopNFunctions);
3036 for (auto [Name, MaxCount] : TopFuncs)
3037 OS << " " << Name << ", max count = " << MaxCount << "\n";
3038 }
3039
3040 if (ShowHotFuncList) {
3041 auto HotCountThreshold =
3042 ProfileSummaryBuilder::getHotCountThreshold(DS: PS->getDetailedSummary());
3043 OS << "# Hot count threshold: " << HotCountThreshold << "\n";
3044 for (auto [Name, MaxCount] : NameAndMaxCount) {
3045 if (MaxCount < HotCountThreshold)
3046 break;
3047 OS << Name << "\n";
3048 }
3049 }
3050
3051 if (ShownFunctions && ShowIndirectCallTargets) {
3052 OS << "Statistics for indirect call sites profile:\n";
3053 showValueSitesStats(OS, VK: IPVK_IndirectCallTarget,
3054 Stats&: VPStats[IPVK_IndirectCallTarget]);
3055 }
3056
3057 if (ShownFunctions && ShowVTables) {
3058 OS << "Statistics for vtable profile:\n";
3059 showValueSitesStats(OS, VK: IPVK_VTableTarget, Stats&: VPStats[IPVK_VTableTarget]);
3060 }
3061
3062 if (ShownFunctions && ShowMemOPSizes) {
3063 OS << "Statistics for memory intrinsic calls sizes profile:\n";
3064 showValueSitesStats(OS, VK: IPVK_MemOPSize, Stats&: VPStats[IPVK_MemOPSize]);
3065 }
3066
3067 if (ShowDetailedSummary)
3068 PS->printDetailedSummary(OS);
3069
3070 if (ShowBinaryIds)
3071 if (Error E = Reader->printBinaryIds(OS))
3072 exitWithError(E: std::move(E), Whence: Filename);
3073
3074 if (ShowProfileVersion)
3075 OS << "Profile version: " << Reader->getVersion() << "\n";
3076
3077 if (ShowTemporalProfTraces) {
3078 auto &Traces = Reader->getTemporalProfTraces();
3079 OS << "Temporal Profile Traces (samples=" << Traces.size()
3080 << " seen=" << Reader->getTemporalProfTraceStreamSize() << "):\n";
3081 for (unsigned i = 0; i < Traces.size(); i++) {
3082 OS << " Temporal Profile Trace " << i << " (weight=" << Traces[i].Weight
3083 << " count=" << Traces[i].FunctionNameRefs.size() << "):\n";
3084 for (auto &NameRef : Traces[i].FunctionNameRefs)
3085 OS << " " << Reader->getSymtab().getFuncOrVarName(MD5Hash: NameRef) << "\n";
3086 }
3087 }
3088
3089 return 0;
3090}
3091
3092static void showSectionInfo(sampleprof::SampleProfileReader *Reader,
3093 raw_fd_ostream &OS) {
3094 if (!Reader->dumpSectionInfo(OS)) {
3095 WithColor::warning() << "-show-sec-info-only is only supported for "
3096 << "sample profile in extbinary format and is "
3097 << "ignored for other formats.\n";
3098 return;
3099 }
3100}
3101
3102namespace {
3103struct HotFuncInfo {
3104 std::string FuncName;
3105 uint64_t TotalCount = 0;
3106 double TotalCountPercent = 0.0f;
3107 uint64_t MaxCount = 0;
3108 uint64_t EntryCount = 0;
3109
3110 HotFuncInfo() = default;
3111
3112 HotFuncInfo(StringRef FN, uint64_t TS, double TSP, uint64_t MS, uint64_t ES)
3113 : FuncName(FN.begin(), FN.end()), TotalCount(TS), TotalCountPercent(TSP),
3114 MaxCount(MS), EntryCount(ES) {}
3115};
3116} // namespace
3117
3118// Print out detailed information about hot functions in PrintValues vector.
3119// Users specify titles and offset of every columns through ColumnTitle and
3120// ColumnOffset. The size of ColumnTitle and ColumnOffset need to be the same
3121// and at least 4. Besides, users can optionally give a HotFuncMetric string to
3122// print out or let it be an empty string.
3123static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle,
3124 const std::vector<int> &ColumnOffset,
3125 const std::vector<HotFuncInfo> &PrintValues,
3126 uint64_t HotFuncCount, uint64_t TotalFuncCount,
3127 uint64_t HotProfCount, uint64_t TotalProfCount,
3128 const std::string &HotFuncMetric,
3129 uint32_t TopNFunctions, raw_fd_ostream &OS) {
3130 assert(ColumnOffset.size() == ColumnTitle.size() &&
3131 "ColumnOffset and ColumnTitle should have the same size");
3132 assert(ColumnTitle.size() >= 4 &&
3133 "ColumnTitle should have at least 4 elements");
3134 assert(TotalFuncCount > 0 &&
3135 "There should be at least one function in the profile");
3136 double TotalProfPercent = 0;
3137 if (TotalProfCount > 0)
3138 TotalProfPercent = static_cast<double>(HotProfCount) / TotalProfCount * 100;
3139
3140 formatted_raw_ostream FOS(OS);
3141 FOS << HotFuncCount << " out of " << TotalFuncCount
3142 << " functions with profile ("
3143 << format(Fmt: "%.2f%%",
3144 Vals: (static_cast<double>(HotFuncCount) / TotalFuncCount * 100))
3145 << ") are considered hot functions";
3146 if (!HotFuncMetric.empty())
3147 FOS << " (" << HotFuncMetric << ")";
3148 FOS << ".\n";
3149 FOS << HotProfCount << " out of " << TotalProfCount << " profile counts ("
3150 << format(Fmt: "%.2f%%", Vals: TotalProfPercent) << ") are from hot functions.\n";
3151
3152 for (size_t I = 0; I < ColumnTitle.size(); ++I) {
3153 FOS.PadToColumn(NewCol: ColumnOffset[I]);
3154 FOS << ColumnTitle[I];
3155 }
3156 FOS << "\n";
3157
3158 uint32_t Count = 0;
3159 for (const auto &R : PrintValues) {
3160 if (TopNFunctions && (Count++ == TopNFunctions))
3161 break;
3162 FOS.PadToColumn(NewCol: ColumnOffset[0]);
3163 FOS << R.TotalCount << " (" << format(Fmt: "%.2f%%", Vals: R.TotalCountPercent) << ")";
3164 FOS.PadToColumn(NewCol: ColumnOffset[1]);
3165 FOS << R.MaxCount;
3166 FOS.PadToColumn(NewCol: ColumnOffset[2]);
3167 FOS << R.EntryCount;
3168 FOS.PadToColumn(NewCol: ColumnOffset[3]);
3169 FOS << R.FuncName << "\n";
3170 }
3171}
3172
3173static int showHotFunctionList(const sampleprof::SampleProfileMap &Profiles,
3174 ProfileSummary &PS, uint32_t TopN,
3175 raw_fd_ostream &OS) {
3176 using namespace sampleprof;
3177
3178 const uint32_t HotFuncCutoff = 990000;
3179 auto &SummaryVector = PS.getDetailedSummary();
3180 uint64_t MinCountThreshold = 0;
3181 for (const ProfileSummaryEntry &SummaryEntry : SummaryVector) {
3182 if (SummaryEntry.Cutoff == HotFuncCutoff) {
3183 MinCountThreshold = SummaryEntry.MinCount;
3184 break;
3185 }
3186 }
3187
3188 // Traverse all functions in the profile and keep only hot functions.
3189 // The following loop also calculates the sum of total samples of all
3190 // functions.
3191 std::multimap<uint64_t, std::pair<const FunctionSamples *, const uint64_t>,
3192 std::greater<uint64_t>>
3193 HotFunc;
3194 uint64_t ProfileTotalSample = 0;
3195 uint64_t HotFuncSample = 0;
3196 uint64_t HotFuncCount = 0;
3197
3198 for (const auto &I : Profiles) {
3199 FuncSampleStats FuncStats;
3200 const FunctionSamples &FuncProf = I.second;
3201 ProfileTotalSample += FuncProf.getTotalSamples();
3202 getFuncSampleStats(Func: FuncProf, FuncStats, HotThreshold: MinCountThreshold);
3203
3204 if (isFunctionHot(FuncStats, HotThreshold: MinCountThreshold)) {
3205 HotFunc.emplace(args: FuncProf.getTotalSamples(),
3206 args: std::make_pair(x: &(I.second), y&: FuncStats.MaxSample));
3207 HotFuncSample += FuncProf.getTotalSamples();
3208 ++HotFuncCount;
3209 }
3210 }
3211
3212 std::vector<std::string> ColumnTitle{"Total sample (%)", "Max sample",
3213 "Entry sample", "Function name"};
3214 std::vector<int> ColumnOffset{0, 24, 42, 58};
3215 std::string Metric =
3216 std::string("max sample >= ") + std::to_string(val: MinCountThreshold);
3217 std::vector<HotFuncInfo> PrintValues;
3218 for (const auto &FuncPair : HotFunc) {
3219 const FunctionSamples &Func = *FuncPair.second.first;
3220 double TotalSamplePercent =
3221 (ProfileTotalSample > 0)
3222 ? (Func.getTotalSamples() * 100.0) / ProfileTotalSample
3223 : 0;
3224 PrintValues.emplace_back(
3225 args: HotFuncInfo(Func.getContext().toString(), Func.getTotalSamples(),
3226 TotalSamplePercent, FuncPair.second.second,
3227 Func.getHeadSamplesEstimate()));
3228 }
3229 dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount,
3230 TotalFuncCount: Profiles.size(), HotProfCount: HotFuncSample, TotalProfCount: ProfileTotalSample,
3231 HotFuncMetric: Metric, TopNFunctions: TopN, OS);
3232
3233 return 0;
3234}
3235
3236static int showSampleProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
3237 if (SFormat == ShowFormat::Yaml)
3238 exitWithError(Message: "YAML output is not supported for sample profiles");
3239 using namespace sampleprof;
3240 LLVMContext Context;
3241 auto FS = vfs::getRealFileSystem();
3242 auto ReaderOrErr = SampleProfileReader::create(Filename, C&: Context, FS&: *FS,
3243 P: FSDiscriminatorPassOption);
3244 if (std::error_code EC = ReaderOrErr.getError())
3245 exitWithErrorCode(EC, Whence: Filename);
3246
3247 auto Reader = std::move(ReaderOrErr.get());
3248 if (ShowSectionInfoOnly) {
3249 showSectionInfo(Reader: Reader.get(), OS);
3250 return 0;
3251 }
3252
3253 if (std::error_code EC = Reader->read())
3254 exitWithErrorCode(EC, Whence: Filename);
3255
3256 if (ShowAllFunctions || FuncNameFilter.empty()) {
3257 if (SFormat == ShowFormat::Json)
3258 Reader->dumpJson(OS);
3259 else
3260 Reader->dump(OS);
3261 } else {
3262 if (SFormat == ShowFormat::Json)
3263 exitWithError(
3264 Message: "the JSON format is supported only when all functions are to "
3265 "be printed");
3266
3267 // TODO: parse context string to support filtering by contexts.
3268 FunctionSamples *FS = Reader->getSamplesFor(Fname: StringRef(FuncNameFilter));
3269 Reader->dumpFunctionProfile(FS: FS ? *FS : FunctionSamples(), OS);
3270 }
3271
3272 if (ShowProfileSymbolList) {
3273 std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
3274 Reader->getProfileSymbolList();
3275 ReaderList->dump(OS);
3276 }
3277
3278 if (ShowDetailedSummary) {
3279 auto &PS = Reader->getSummary();
3280 PS.printSummary(OS);
3281 PS.printDetailedSummary(OS);
3282 }
3283
3284 if (ShowHotFuncList || TopNFunctions)
3285 showHotFunctionList(Profiles: Reader->getProfiles(), PS&: Reader->getSummary(),
3286 TopN: TopNFunctions, OS);
3287
3288 return 0;
3289}
3290
3291static int showMemProfProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
3292 if (SFormat == ShowFormat::Json)
3293 exitWithError(Message: "JSON output is not supported for MemProf");
3294
3295 // Show the raw profile in YAML.
3296 if (memprof::RawMemProfReader::hasFormat(Path: Filename)) {
3297 auto ReaderOr = llvm::memprof::RawMemProfReader::create(
3298 Path: Filename, ProfiledBinary, /*KeepNames=*/KeepName: true);
3299 if (Error E = ReaderOr.takeError()) {
3300 // Since the error can be related to the profile or the binary we do not
3301 // pass whence. Instead additional context is provided where necessary in
3302 // the error message.
3303 exitWithError(E: std::move(E), /*Whence*/ "");
3304 }
3305
3306 std::unique_ptr<llvm::memprof::RawMemProfReader> Reader(
3307 ReaderOr.get().release());
3308
3309 Reader->printYAML(OS);
3310 return 0;
3311 }
3312
3313 // Show the indexed MemProf profile in YAML.
3314 auto FS = vfs::getRealFileSystem();
3315 auto ReaderOrErr = IndexedInstrProfReader::create(Path: Filename, FS&: *FS);
3316 if (Error E = ReaderOrErr.takeError())
3317 exitWithError(E: std::move(E), Whence: Filename);
3318
3319 auto Reader = std::move(ReaderOrErr.get());
3320 memprof::AllMemProfData Data = Reader->getAllMemProfData();
3321
3322 // For v4 and above the summary is serialized in the indexed profile, and can
3323 // be accessed from the reader. Earlier versions build the summary below.
3324 // The summary is emitted as YAML comments at the start of the output.
3325 if (auto *MemProfSum = Reader->getMemProfSummary()) {
3326 MemProfSum->printSummaryYaml(OS);
3327 } else {
3328 memprof::MemProfSummaryBuilder MemProfSumBuilder;
3329 for (auto &Pair : Data.HeapProfileRecords)
3330 MemProfSumBuilder.addRecord(Pair.Record);
3331 MemProfSumBuilder.getSummary()->printSummaryYaml(OS);
3332 }
3333 // Construct yaml::Output with the maximum column width of 80 so that each
3334 // Frame fits in one line.
3335 yaml::Output Yout(OS, nullptr, 80);
3336 Yout << Data;
3337
3338 return 0;
3339}
3340
3341static int showDebugInfoCorrelation(const std::string &Filename,
3342 ShowFormat SFormat, raw_fd_ostream &OS) {
3343 if (SFormat == ShowFormat::Json)
3344 exitWithError(Message: "JSON output is not supported for debug info correlation");
3345 std::unique_ptr<InstrProfCorrelator> Correlator;
3346 if (auto Err =
3347 InstrProfCorrelator::get(Filename, FileKind: InstrProfCorrelator::DEBUG_INFO)
3348 .moveInto(Value&: Correlator))
3349 exitWithError(E: std::move(Err), Whence: Filename);
3350 if (SFormat == ShowFormat::Yaml) {
3351 if (auto Err = Correlator->dumpYaml(MaxWarnings: MaxDbgCorrelationWarnings, OS))
3352 exitWithError(E: std::move(Err), Whence: Filename);
3353 return 0;
3354 }
3355
3356 if (auto Err = Correlator->correlateProfileData(MaxWarnings: MaxDbgCorrelationWarnings))
3357 exitWithError(E: std::move(Err), Whence: Filename);
3358
3359 InstrProfSymtab Symtab;
3360 if (auto Err = Symtab.create(
3361 NameStrings: StringRef(Correlator->getNamesPointer(), Correlator->getNamesSize())))
3362 exitWithError(E: std::move(Err), Whence: Filename);
3363
3364 if (ShowProfileSymbolList)
3365 Symtab.dumpNames(OS);
3366 // TODO: Read "Profile Data Type" from debug info to compute and show how many
3367 // counters the section holds.
3368 if (ShowDetailedSummary)
3369 OS << "Counters section size: 0x"
3370 << Twine::utohexstr(Val: Correlator->getCountersSectionSize()) << " bytes\n";
3371 OS << "Found " << Correlator->getDataSize() << " functions\n";
3372
3373 return 0;
3374}
3375
3376static int show_main(StringRef ProgName) {
3377 if (Filename.empty() && DebugInfoFilename.empty())
3378 exitWithError(
3379 Message: "the positional argument '<profdata-file>' is required unless '--" +
3380 DebugInfoFilename.ArgStr + "' is provided");
3381
3382 if (Filename == OutputFilename) {
3383 errs() << ProgName
3384 << " show: Input file name cannot be the same as the output file "
3385 "name!\n";
3386 return 1;
3387 }
3388 if (JsonFormat)
3389 SFormat = ShowFormat::Json;
3390
3391 std::error_code EC;
3392 raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3393 if (EC)
3394 exitWithErrorCode(EC, Whence: OutputFilename);
3395
3396 if (ShowAllFunctions && !FuncNameFilter.empty())
3397 WithColor::warning() << "-function argument ignored: showing all functions\n";
3398
3399 if (!DebugInfoFilename.empty())
3400 return showDebugInfoCorrelation(Filename: DebugInfoFilename, SFormat, OS);
3401
3402 if (ShowProfileKind == instr)
3403 return showInstrProfile(SFormat, OS);
3404 if (ShowProfileKind == sample)
3405 return showSampleProfile(SFormat, OS);
3406 return showMemProfProfile(SFormat, OS);
3407}
3408
3409static int order_main() {
3410 std::error_code EC;
3411 raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3412 if (EC)
3413 exitWithErrorCode(EC, Whence: OutputFilename);
3414 auto FS = vfs::getRealFileSystem();
3415 auto ReaderOrErr = InstrProfReader::create(Path: Filename, FS&: *FS);
3416 if (Error E = ReaderOrErr.takeError())
3417 exitWithError(E: std::move(E), Whence: Filename);
3418
3419 auto Reader = std::move(ReaderOrErr.get());
3420 for (auto &I : *Reader) {
3421 // Read all entries
3422 (void)I;
3423 }
3424 ArrayRef Traces = Reader->getTemporalProfTraces();
3425 if (NumTestTraces && NumTestTraces >= Traces.size())
3426 exitWithError(
3427 Message: "--" + NumTestTraces.ArgStr +
3428 " must be smaller than the total number of traces: expected: < " +
3429 Twine(Traces.size()) + ", actual: " + Twine(NumTestTraces));
3430 ArrayRef TestTraces = Traces.take_back(N: NumTestTraces);
3431 Traces = Traces.drop_back(N: NumTestTraces);
3432
3433 std::vector<BPFunctionNode> Nodes;
3434 TemporalProfTraceTy::createBPFunctionNodes(Traces, Nodes);
3435 BalancedPartitioningConfig Config;
3436 BalancedPartitioning BP(Config);
3437 BP.run(Nodes);
3438
3439 OS << "# Ordered " << Nodes.size() << " functions\n";
3440 if (!TestTraces.empty()) {
3441 // Since we don't know the symbol sizes, we assume 32 functions per page.
3442 DenseMap<BPFunctionNode::IDT, unsigned> IdToPageNumber;
3443 for (auto &Node : Nodes)
3444 IdToPageNumber[Node.Id] = IdToPageNumber.size() / 32;
3445
3446 SmallSet<unsigned, 0> TouchedPages;
3447 unsigned Area = 0;
3448 for (auto &Trace : TestTraces) {
3449 for (auto Id : Trace.FunctionNameRefs) {
3450 auto It = IdToPageNumber.find(Val: Id);
3451 if (It == IdToPageNumber.end())
3452 continue;
3453 TouchedPages.insert(V: It->getSecond());
3454 Area += TouchedPages.size();
3455 }
3456 TouchedPages.clear();
3457 }
3458 OS << "# Total area under the page fault curve: " << (float)Area << "\n";
3459 }
3460 OS << "# Warning: Mach-O may prefix symbols with \"_\" depending on the "
3461 "linkage and this output does not take that into account. Some "
3462 "post-processing may be required before passing to the linker via "
3463 "-order_file.\n";
3464 for (auto &N : Nodes) {
3465 auto [Filename, ParsedFuncName] =
3466 getParsedIRPGOName(IRPGOName: Reader->getSymtab().getFuncOrVarName(MD5Hash: N.Id));
3467 if (!Filename.empty())
3468 OS << "# " << Filename << "\n";
3469 OS << ParsedFuncName << "\n";
3470 }
3471 return 0;
3472}
3473
3474int main(int argc, const char *argv[]) {
3475 InitLLVM X(argc, argv);
3476 StringRef ProgName(sys::path::filename(path: argv[0]));
3477
3478 if (argc < 2) {
3479 errs()
3480 << ProgName
3481 << ": No subcommand specified! Run llvm-profdata --help for usage.\n";
3482 return 1;
3483 }
3484
3485 cl::ParseCommandLineOptions(argc, argv, Overview: "LLVM profile data\n");
3486
3487 if (ShowSubcommand)
3488 return show_main(ProgName);
3489
3490 if (OrderSubcommand)
3491 return order_main();
3492
3493 if (OverlapSubcommand)
3494 return overlap_main();
3495
3496 if (MergeSubcommand)
3497 return merge_main(ProgName);
3498
3499 errs() << ProgName
3500 << ": Unknown command. Run llvm-profdata --help for usage.\n";
3501 return 1;
3502}
3503