1//===- llvm-profdata.cpp - LLVM profile data tool -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// llvm-profdata merges .profdata files.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/SmallSet.h"
14#include "llvm/ADT/SmallVector.h"
15#include "llvm/ADT/StringRef.h"
16#include "llvm/Debuginfod/HTTPClient.h"
17#include "llvm/IR/LLVMContext.h"
18#include "llvm/Object/Binary.h"
19#include "llvm/ProfileData/DataAccessProf.h"
20#include "llvm/ProfileData/InstrProfCorrelator.h"
21#include "llvm/ProfileData/InstrProfReader.h"
22#include "llvm/ProfileData/InstrProfWriter.h"
23#include "llvm/ProfileData/MemProf.h"
24#include "llvm/ProfileData/MemProfReader.h"
25#include "llvm/ProfileData/MemProfSummaryBuilder.h"
26#include "llvm/ProfileData/MemProfYAML.h"
27#include "llvm/ProfileData/ProfileCommon.h"
28#include "llvm/ProfileData/SampleProfReader.h"
29#include "llvm/ProfileData/SampleProfWriter.h"
30#include "llvm/Support/BalancedPartitioning.h"
31#include "llvm/Support/CommandLine.h"
32#include "llvm/Support/Discriminator.h"
33#include "llvm/Support/Errc.h"
34#include "llvm/Support/FileSystem.h"
35#include "llvm/Support/Format.h"
36#include "llvm/Support/FormattedStream.h"
37#include "llvm/Support/LLVMDriver.h"
38#include "llvm/Support/MD5.h"
39#include "llvm/Support/MemoryBuffer.h"
40#include "llvm/Support/Path.h"
41#include "llvm/Support/Regex.h"
42#include "llvm/Support/ThreadPool.h"
43#include "llvm/Support/Threading.h"
44#include "llvm/Support/VirtualFileSystem.h"
45#include "llvm/Support/WithColor.h"
46#include "llvm/Support/raw_ostream.h"
47#include <algorithm>
48#include <cmath>
49#include <optional>
50#include <queue>
51
52using namespace llvm;
53using ProfCorrelatorKind = InstrProfCorrelator::ProfCorrelatorKind;
54
55// https://llvm.org/docs/CommandGuide/llvm-profdata.html has documentations
56// on each subcommand.
57cl::SubCommand ShowSubcommand(
58 "show",
59 "Takes a profile data file and displays the profiles. See detailed "
60 "documentation in "
61 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-show");
62cl::SubCommand OrderSubcommand(
63 "order",
64 "Reads temporal profiling traces from a profile and outputs a function "
65 "order that reduces the number of page faults for those traces. See "
66 "detailed documentation in "
67 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-order");
68cl::SubCommand OverlapSubcommand(
69 "overlap",
70 "Computes and displays the overlap between two profiles. See detailed "
71 "documentation in "
72 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-overlap");
73cl::SubCommand MergeSubcommand(
74 "merge",
75 "Takes several profiles and merge them together. See detailed "
76 "documentation in "
77 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge");
78
79namespace {
80enum ProfileKinds { instr, sample, memory };
81enum FailureMode { warnOnly, failIfAnyAreInvalid, failIfAllAreInvalid };
82
83enum ProfileFormat {
84 PF_None = 0,
85 PF_Text,
86 PF_Compact_Binary, // Deprecated
87 PF_Ext_Binary,
88 PF_GCC,
89 PF_Binary
90};
91
92enum class ShowFormat { Text, Json, Yaml };
93} // namespace
94
95// Common options.
96cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
97 cl::init(Val: "-"), cl::desc("Output file"),
98 cl::sub(ShowSubcommand),
99 cl::sub(OrderSubcommand),
100 cl::sub(OverlapSubcommand),
101 cl::sub(MergeSubcommand));
102// NOTE: cl::alias must not have cl::sub(), since aliased option's cl::sub()
103// will be used. llvm::cl::alias::done() method asserts this condition.
104static cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
105 cl::aliasopt(OutputFilename));
106
107// Options common to at least two commands.
108static cl::opt<ProfileKinds> ProfileKind(
109 cl::desc("Profile kind:"), cl::sub(MergeSubcommand),
110 cl::sub(OverlapSubcommand), cl::init(Val: instr),
111 cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
112 clEnumVal(sample, "Sample profile")));
113static cl::opt<std::string> Filename(cl::Positional,
114 cl::desc("<profdata-file>"),
115 cl::sub(ShowSubcommand),
116 cl::sub(OrderSubcommand));
117static cl::opt<unsigned> MaxDbgCorrelationWarnings(
118 "max-debug-info-correlation-warnings",
119 cl::desc("The maximum number of warnings to emit when correlating "
120 "profile from debug info (0 = no limit)"),
121 cl::sub(MergeSubcommand), cl::sub(ShowSubcommand), cl::init(Val: 5));
122static cl::opt<std::string> ProfiledBinary(
123 "profiled-binary", cl::init(Val: ""),
124 cl::desc("Path to binary from which the profile was collected."),
125 cl::sub(ShowSubcommand), cl::sub(MergeSubcommand));
126static cl::opt<std::string> DebugInfoFilename(
127 "debug-info", cl::init(Val: ""),
128 cl::desc(
129 "For show, read and extract profile metadata from debug info and show "
130 "the functions it found. For merge, use the provided debug info to "
131 "correlate the raw profile."),
132 cl::sub(ShowSubcommand), cl::sub(MergeSubcommand));
133static cl::opt<std::string>
134 BinaryFilename("binary-file", cl::init(Val: ""),
135 cl::desc("For merge, use the provided unstripped binary to "
136 "correlate the raw profile."),
137 cl::sub(MergeSubcommand));
138static cl::list<std::string> DebugFileDirectory(
139 "debug-file-directory",
140 cl::desc("Directories to search for object files by build ID"));
141static cl::opt<bool> DebugInfod("debuginfod", cl::init(Val: false), cl::Hidden,
142 cl::sub(MergeSubcommand),
143 cl::desc("Enable debuginfod"));
144static cl::opt<ProfCorrelatorKind> BIDFetcherProfileCorrelate(
145 "correlate",
146 cl::desc("Use debug-info or binary correlation to correlate profiles with "
147 "build id fetcher"),
148 cl::init(Val: InstrProfCorrelator::NONE),
149 cl::values(clEnumValN(InstrProfCorrelator::NONE, "",
150 "No profile correlation"),
151 clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info",
152 "Use debug info to correlate"),
153 clEnumValN(InstrProfCorrelator::BINARY, "binary",
154 "Use binary to correlate")));
155static cl::opt<std::string> FuncNameFilter(
156 "function",
157 cl::desc("Only functions matching the filter are shown in the output. For "
158 "overlapping CSSPGO, this takes a function name with calling "
159 "context."),
160 cl::sub(ShowSubcommand), cl::sub(OverlapSubcommand),
161 cl::sub(MergeSubcommand));
162
163// TODO: Consider creating a template class (e.g., MergeOption, ShowOption) to
164// factor out the common cl::sub in cl::opt constructor for subcommand-specific
165// options.
166
167// Options specific to merge subcommand.
168static cl::list<std::string> InputFilenames(cl::Positional,
169 cl::sub(MergeSubcommand),
170 cl::desc("<filename...>"));
171static cl::list<std::string>
172 WeightedInputFilenames("weighted-input", cl::sub(MergeSubcommand),
173 cl::desc("<weight>,<filename>"));
174static cl::opt<ProfileFormat> OutputFormat(
175 cl::desc("Format of output profile"), cl::sub(MergeSubcommand),
176 cl::init(Val: PF_Ext_Binary),
177 cl::values(clEnumValN(PF_Binary, "binary", "Binary encoding"),
178 clEnumValN(PF_Ext_Binary, "extbinary",
179 "Extensible binary encoding "
180 "(default)"),
181 clEnumValN(PF_Text, "text", "Text encoding"),
182 clEnumValN(PF_GCC, "gcc",
183 "GCC encoding (only meaningful for -sample)")));
184static cl::opt<std::string>
185 InputFilenamesFile("input-files", cl::init(Val: ""), cl::sub(MergeSubcommand),
186 cl::desc("Path to file containing newline-separated "
187 "[<weight>,]<filename> entries"));
188static cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
189 cl::aliasopt(InputFilenamesFile));
190static cl::opt<bool> DumpInputFileList(
191 "dump-input-file-list", cl::init(Val: false), cl::Hidden,
192 cl::sub(MergeSubcommand),
193 cl::desc("Dump the list of input files and their weights, then exit"));
194static cl::opt<std::string> RemappingFile("remapping-file",
195 cl::value_desc("file"),
196 cl::sub(MergeSubcommand),
197 cl::desc("Symbol remapping file"));
198static cl::alias RemappingFileA("r", cl::desc("Alias for --remapping-file"),
199 cl::aliasopt(RemappingFile));
200static cl::opt<bool>
201 UseMD5("use-md5", cl::init(Val: false), cl::Hidden,
202 cl::desc("Choose to use MD5 to represent string in name table (only "
203 "meaningful for -extbinary)"),
204 cl::sub(MergeSubcommand));
205static cl::opt<bool> CompressAllSections(
206 "compress-all-sections", cl::init(Val: false), cl::Hidden,
207 cl::sub(MergeSubcommand),
208 cl::desc("Compress all sections when writing the profile (only "
209 "meaningful for -extbinary)"));
210static cl::opt<bool> SampleMergeColdContext(
211 "sample-merge-cold-context", cl::init(Val: false), cl::Hidden,
212 cl::sub(MergeSubcommand),
213 cl::desc(
214 "Merge context sample profiles whose count is below cold threshold"));
215static cl::opt<bool> SampleTrimColdContext(
216 "sample-trim-cold-context", cl::init(Val: false), cl::Hidden,
217 cl::sub(MergeSubcommand),
218 cl::desc(
219 "Trim context sample profiles whose count is below cold threshold"));
220static cl::opt<uint32_t> SampleColdContextFrameDepth(
221 "sample-frame-depth-for-cold-context", cl::init(Val: 1),
222 cl::sub(MergeSubcommand),
223 cl::desc("Keep the last K frames while merging cold profile. 1 means the "
224 "context-less base profile"));
225static cl::opt<size_t> OutputSizeLimit(
226 "output-size-limit", cl::init(Val: 0), cl::Hidden, cl::sub(MergeSubcommand),
227 cl::desc("Trim cold functions until profile size is below specified "
228 "limit in bytes. This uses a heursitic and functions may be "
229 "excessively trimmed"));
230static cl::opt<bool> GenPartialProfile(
231 "gen-partial-profile", cl::init(Val: false), cl::Hidden,
232 cl::sub(MergeSubcommand),
233 cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
234static cl::opt<bool> SplitLayout(
235 "split-layout", cl::init(Val: false), cl::Hidden, cl::sub(MergeSubcommand),
236 cl::desc("Split the profile to two sections with one containing sample "
237 "profiles with inlined functions and the other without (only "
238 "meaningful for -extbinary)"));
239static cl::opt<std::string> SupplInstrWithSample(
240 "supplement-instr-with-sample", cl::init(Val: ""), cl::Hidden,
241 cl::sub(MergeSubcommand),
242 cl::desc("Supplement an instr profile with sample profile, to correct "
243 "the profile unrepresentativeness issue. The sample "
244 "profile is the input of the flag. Output will be in instr "
245 "format (The flag only works with -instr)"));
246static cl::opt<float> ZeroCounterThreshold(
247 "zero-counter-threshold", cl::init(Val: 0.7), cl::Hidden,
248 cl::sub(MergeSubcommand),
249 cl::desc("For the function which is cold in instr profile but hot in "
250 "sample profile, if the ratio of the number of zero counters "
251 "divided by the total number of counters is above the "
252 "threshold, the profile of the function will be regarded as "
253 "being harmful for performance and will be dropped."));
254static cl::opt<unsigned> SupplMinSizeThreshold(
255 "suppl-min-size-threshold", cl::init(Val: 10), cl::Hidden,
256 cl::sub(MergeSubcommand),
257 cl::desc("If the size of a function is smaller than the threshold, "
258 "assume it can be inlined by PGO early inliner and it won't "
259 "be adjusted based on sample profile."));
260static cl::opt<unsigned> InstrProfColdThreshold(
261 "instr-prof-cold-threshold", cl::init(Val: 0), cl::Hidden,
262 cl::sub(MergeSubcommand),
263 cl::desc("User specified cold threshold for instr profile which will "
264 "override the cold threshold got from profile summary. "));
265// WARNING: This reservoir size value is propagated to any input indexed
266// profiles for simplicity. Changing this value between invocations could
267// result in sample bias.
268static cl::opt<uint64_t> TemporalProfTraceReservoirSize(
269 "temporal-profile-trace-reservoir-size", cl::init(Val: 100),
270 cl::sub(MergeSubcommand),
271 cl::desc("The maximum number of stored temporal profile traces (default: "
272 "100)"));
273static cl::opt<uint64_t> TemporalProfMaxTraceLength(
274 "temporal-profile-max-trace-length", cl::init(Val: 10000),
275 cl::sub(MergeSubcommand),
276 cl::desc("The maximum length of a single temporal profile trace "
277 "(default: 10000)"));
278static cl::opt<std::string> FuncNameNegativeFilter(
279 "no-function", cl::init(Val: ""), cl::sub(MergeSubcommand),
280 cl::desc("Exclude functions matching the filter from the output."));
281
282static cl::opt<FailureMode>
283 FailMode("failure-mode", cl::init(Val: failIfAnyAreInvalid),
284 cl::desc("Failure mode:"), cl::sub(MergeSubcommand),
285 cl::values(clEnumValN(warnOnly, "warn",
286 "Do not fail and just print warnings."),
287 clEnumValN(failIfAnyAreInvalid, "any",
288 "Fail if any profile is invalid."),
289 clEnumValN(failIfAllAreInvalid, "all",
290 "Fail only if all profiles are invalid.")));
291
292static cl::opt<bool> OutputSparse(
293 "sparse", cl::init(Val: false), cl::sub(MergeSubcommand),
294 cl::desc("Generate a sparse profile (only meaningful for -instr)"));
295static cl::opt<unsigned> NumThreads(
296 "num-threads", cl::init(Val: 0), cl::sub(MergeSubcommand),
297 cl::desc("Number of merge threads to use (default: autodetect)"));
298static cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"),
299 cl::aliasopt(NumThreads));
300
301static cl::opt<std::string> ProfileSymbolListFile(
302 "prof-sym-list", cl::init(Val: ""), cl::sub(MergeSubcommand),
303 cl::desc("Path to file containing the list of function symbols "
304 "used to populate profile symbol list"));
305
306static cl::opt<SampleProfileLayout> ProfileLayout(
307 "convert-sample-profile-layout",
308 cl::desc("Convert the generated profile to a profile with a new layout"),
309 cl::sub(MergeSubcommand), cl::init(Val: SPL_None),
310 cl::values(
311 clEnumValN(SPL_Nest, "nest",
312 "Nested profile, the input should be CS flat profile"),
313 clEnumValN(SPL_Flat, "flat",
314 "Profile with nested inlinee flatten out")));
315
316static cl::opt<bool> DropProfileSymbolList(
317 "drop-profile-symbol-list", cl::init(Val: false), cl::Hidden,
318 cl::sub(MergeSubcommand),
319 cl::desc("Drop the profile symbol list when merging AutoFDO profiles "
320 "(only meaningful for -sample)"));
321
322static cl::opt<bool> KeepVTableSymbols(
323 "keep-vtable-symbols", cl::init(Val: false), cl::Hidden,
324 cl::sub(MergeSubcommand),
325 cl::desc("If true, keep the vtable symbols in indexed profiles"));
326
327// Temporary support for writing the previous version of the format, to enable
328// some forward compatibility.
329// TODO: Consider enabling this with future version changes as well, to ease
330// deployment of newer versions of llvm-profdata.
331static cl::opt<bool> DoWritePrevVersion(
332 "write-prev-version", cl::init(Val: false), cl::Hidden,
333 cl::desc("Write the previous version of indexed format, to enable "
334 "some forward compatibility."));
335
336static cl::opt<memprof::IndexedVersion> MemProfVersionRequested(
337 "memprof-version", cl::Hidden, cl::sub(MergeSubcommand),
338 cl::desc("Specify the version of the memprof format to use"),
339 cl::init(Val: memprof::Version3),
340 cl::values(clEnumValN(memprof::Version2, "2", "version 2"),
341 clEnumValN(memprof::Version3, "3", "version 3"),
342 clEnumValN(memprof::Version4, "4", "version 4")));
343
344static cl::opt<bool> MemProfFullSchema(
345 "memprof-full-schema", cl::Hidden, cl::sub(MergeSubcommand),
346 cl::desc("Use the full schema for serialization"), cl::init(Val: false));
347
348static cl::opt<bool>
349 MemprofGenerateRandomHotness("memprof-random-hotness", cl::init(Val: false),
350 cl::Hidden, cl::sub(MergeSubcommand),
351 cl::desc("Generate random hotness values"));
352static cl::opt<unsigned> MemprofGenerateRandomHotnessSeed(
353 "memprof-random-hotness-seed", cl::init(Val: 0), cl::Hidden,
354 cl::sub(MergeSubcommand),
355 cl::desc("Random hotness seed to use (0 to generate new seed)"));
356
357// Options specific to overlap subcommand.
358static cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
359 cl::desc("<base profile file>"),
360 cl::sub(OverlapSubcommand));
361static cl::opt<std::string> TestFilename(cl::Positional, cl::Required,
362 cl::desc("<test profile file>"),
363 cl::sub(OverlapSubcommand));
364
365static cl::opt<unsigned long long> SimilarityCutoff(
366 "similarity-cutoff", cl::init(Val: 0),
367 cl::desc("For sample profiles, list function names (with calling context "
368 "for csspgo) for overlapped functions "
369 "with similarities below the cutoff (percentage times 10000)."),
370 cl::sub(OverlapSubcommand));
371
372static cl::opt<bool> IsCS(
373 "cs", cl::init(Val: false),
374 cl::desc("For context sensitive PGO counts. Does not work with CSSPGO."),
375 cl::sub(OverlapSubcommand));
376
377static cl::opt<unsigned long long> OverlapValueCutoff(
378 "value-cutoff", cl::init(Val: -1),
379 cl::desc(
380 "Function level overlap information for every function (with calling "
381 "context for csspgo) in test "
382 "profile with max count value greater than the parameter value"),
383 cl::sub(OverlapSubcommand));
384
385// Options specific to show subcommand.
386static cl::opt<bool>
387 ShowCounts("counts", cl::init(Val: false),
388 cl::desc("Show counter values for shown functions"),
389 cl::sub(ShowSubcommand));
390static cl::opt<ShowFormat>
391 SFormat("show-format", cl::init(Val: ShowFormat::Text),
392 cl::desc("Emit output in the selected format if supported"),
393 cl::sub(ShowSubcommand),
394 cl::values(clEnumValN(ShowFormat::Text, "text",
395 "emit normal text output (default)"),
396 clEnumValN(ShowFormat::Json, "json", "emit JSON"),
397 clEnumValN(ShowFormat::Yaml, "yaml", "emit YAML")));
398// TODO: Consider replacing this with `--show-format=text-encoding`.
399static cl::opt<bool>
400 TextFormat("text", cl::init(Val: false),
401 cl::desc("Show instr profile data in text dump format"),
402 cl::sub(ShowSubcommand));
403static cl::opt<bool>
404 JsonFormat("json",
405 cl::desc("Show sample profile data in the JSON format "
406 "(deprecated, please use --show-format=json)"),
407 cl::sub(ShowSubcommand));
408static cl::opt<bool> ShowIndirectCallTargets(
409 "ic-targets", cl::init(Val: false),
410 cl::desc("Show indirect call site target values for shown functions"),
411 cl::sub(ShowSubcommand));
412static cl::opt<bool>
413 ShowVTables("show-vtables", cl::init(Val: false),
414 cl::desc("Show vtable names for shown functions"),
415 cl::sub(ShowSubcommand));
416static cl::opt<bool> ShowMemOPSizes(
417 "memop-sizes", cl::init(Val: false),
418 cl::desc("Show the profiled sizes of the memory intrinsic calls "
419 "for shown functions"),
420 cl::sub(ShowSubcommand));
421static cl::opt<bool>
422 ShowDetailedSummary("detailed-summary", cl::init(Val: false),
423 cl::desc("Show detailed profile summary"),
424 cl::sub(ShowSubcommand));
425static cl::list<uint32_t> DetailedSummaryCutoffs(
426 cl::CommaSeparated, "detailed-summary-cutoffs",
427 cl::desc(
428 "Cutoff percentages (times 10000) for generating detailed summary"),
429 cl::value_desc("800000,901000,999999"), cl::sub(ShowSubcommand));
430static cl::opt<bool>
431 ShowHotFuncList("hot-func-list", cl::init(Val: false),
432 cl::desc("Show profile summary of a list of hot functions"),
433 cl::sub(ShowSubcommand));
434static cl::opt<bool>
435 ShowAllFunctions("all-functions", cl::init(Val: false),
436 cl::desc("Details for each and every function"),
437 cl::sub(ShowSubcommand));
438static cl::opt<bool> ShowCS("showcs", cl::init(Val: false),
439 cl::desc("Show context sensitive counts"),
440 cl::sub(ShowSubcommand));
441static cl::opt<ProfileKinds> ShowProfileKind(
442 cl::desc("Profile kind supported by show:"), cl::sub(ShowSubcommand),
443 cl::init(Val: instr),
444 cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
445 clEnumVal(sample, "Sample profile"),
446 clEnumVal(memory, "MemProf memory access profile")));
447static cl::opt<uint32_t> TopNFunctions(
448 "topn", cl::init(Val: 0),
449 cl::desc("Show the list of functions with the largest internal counts"),
450 cl::sub(ShowSubcommand));
451static cl::opt<uint32_t> ShowValueCutoff(
452 "value-cutoff", cl::init(Val: 0),
453 cl::desc("Set the count value cutoff. Functions with the maximum count "
454 "less than this value will not be printed out. (Default is 0)"),
455 cl::sub(ShowSubcommand));
456static cl::opt<bool> OnlyListBelow(
457 "list-below-cutoff", cl::init(Val: false),
458 cl::desc("Only output names of functions whose max count values are "
459 "below the cutoff value"),
460 cl::sub(ShowSubcommand));
461static cl::opt<bool> ShowProfileSymbolList(
462 "show-prof-sym-list", cl::init(Val: false),
463 cl::desc("Show profile symbol list if it exists in the profile. "),
464 cl::sub(ShowSubcommand));
465static cl::opt<bool> ShowSectionInfoOnly(
466 "show-sec-info-only", cl::init(Val: false),
467 cl::desc("Show the information of each section in the sample profile. "
468 "The flag is only usable when the sample profile is in "
469 "extbinary format"),
470 cl::sub(ShowSubcommand));
471static cl::opt<bool> ShowBinaryIds("binary-ids", cl::init(Val: false),
472 cl::desc("Show binary ids in the profile. "),
473 cl::sub(ShowSubcommand));
474static cl::opt<bool> ShowTemporalProfTraces(
475 "temporal-profile-traces",
476 cl::desc("Show temporal profile traces in the profile."),
477 cl::sub(ShowSubcommand));
478
479static cl::opt<bool>
480 ShowCovered("covered", cl::init(Val: false),
481 cl::desc("Show only the functions that have been executed."),
482 cl::sub(ShowSubcommand));
483
484static cl::opt<bool> ShowProfileVersion("profile-version", cl::init(Val: false),
485 cl::desc("Show profile version. "),
486 cl::sub(ShowSubcommand));
487
488// Options specific to order subcommand.
489static cl::opt<unsigned>
490 NumTestTraces("num-test-traces", cl::init(Val: 0),
491 cl::desc("Keep aside the last <num-test-traces> traces in "
492 "the profile when computing the function order and "
493 "instead use them to evaluate that order"),
494 cl::sub(OrderSubcommand));
495
496// We use this string to indicate that there are
497// multiple static functions map to the same name.
498const std::string DuplicateNameStr = "----";
499
500static void warn(Twine Message, StringRef Whence = "", StringRef Hint = "") {
501 WithColor::warning();
502 if (!Whence.empty())
503 errs() << Whence << ": ";
504 errs() << Message << "\n";
505 if (!Hint.empty())
506 WithColor::note() << Hint << "\n";
507}
508
509static void warn(Error E, StringRef Whence = "") {
510 if (E.isA<InstrProfError>()) {
511 handleAllErrors(E: std::move(E), Handlers: [&](const InstrProfError &IPE) {
512 warn(Message: IPE.message(), Whence);
513 });
514 }
515}
516
517static void exitWithError(Twine Message, StringRef Whence = "",
518 StringRef Hint = "") {
519 WithColor::error();
520 if (!Whence.empty())
521 errs() << Whence << ": ";
522 errs() << Message << "\n";
523 if (!Hint.empty())
524 WithColor::note() << Hint << "\n";
525 ::exit(status: 1);
526}
527
528static void exitWithError(Error E, StringRef Whence = "") {
529 if (E.isA<InstrProfError>()) {
530 handleAllErrors(E: std::move(E), Handlers: [&](const InstrProfError &IPE) {
531 instrprof_error instrError = IPE.get();
532 StringRef Hint = "";
533 if (instrError == instrprof_error::unrecognized_format) {
534 // Hint in case user missed specifying the profile type.
535 Hint = "Perhaps you forgot to use the --sample or --memory option?";
536 }
537 exitWithError(Message: IPE.message(), Whence, Hint);
538 });
539 return;
540 }
541
542 exitWithError(Message: toString(E: std::move(E)), Whence);
543}
544
545static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
546 exitWithError(Message: EC.message(), Whence);
547}
548
549static void warnOrExitGivenError(FailureMode FailMode, std::error_code EC,
550 StringRef Whence = "") {
551 if (FailMode == failIfAnyAreInvalid)
552 exitWithErrorCode(EC, Whence);
553 else
554 warn(Message: EC.message(), Whence);
555}
556
557static void handleMergeWriterError(Error E, StringRef WhenceFile = "",
558 StringRef WhenceFunction = "",
559 bool ShowHint = true) {
560 if (!WhenceFile.empty())
561 errs() << WhenceFile << ": ";
562 if (!WhenceFunction.empty())
563 errs() << WhenceFunction << ": ";
564
565 auto IPE = instrprof_error::success;
566 E = handleErrors(E: std::move(E),
567 Hs: [&IPE](std::unique_ptr<InstrProfError> E) -> Error {
568 IPE = E->get();
569 return Error(std::move(E));
570 });
571 errs() << toString(E: std::move(E)) << "\n";
572
573 if (ShowHint) {
574 StringRef Hint = "";
575 if (IPE != instrprof_error::success) {
576 switch (IPE) {
577 case instrprof_error::hash_mismatch:
578 case instrprof_error::count_mismatch:
579 case instrprof_error::value_site_count_mismatch:
580 Hint = "Make sure that all profile data to be merged is generated "
581 "from the same binary.";
582 break;
583 default:
584 break;
585 }
586 }
587
588 if (!Hint.empty())
589 errs() << Hint << "\n";
590 }
591}
592
593namespace {
594/// A remapper from original symbol names to new symbol names based on a file
595/// containing a list of mappings from old name to new name.
596class SymbolRemapper {
597 std::unique_ptr<MemoryBuffer> File;
598 DenseMap<StringRef, StringRef> RemappingTable;
599
600public:
601 /// Build a SymbolRemapper from a file containing a list of old/new symbols.
602 static std::unique_ptr<SymbolRemapper> create(StringRef InputFile) {
603 auto BufOrError = MemoryBuffer::getFileOrSTDIN(Filename: InputFile);
604 if (!BufOrError)
605 exitWithErrorCode(EC: BufOrError.getError(), Whence: InputFile);
606
607 auto Remapper = std::make_unique<SymbolRemapper>();
608 Remapper->File = std::move(BufOrError.get());
609
610 for (line_iterator LineIt(*Remapper->File, /*SkipBlanks=*/true, '#');
611 !LineIt.is_at_eof(); ++LineIt) {
612 std::pair<StringRef, StringRef> Parts = LineIt->split(Separator: ' ');
613 if (Parts.first.empty() || Parts.second.empty() ||
614 Parts.second.count(C: ' ')) {
615 exitWithError(Message: "unexpected line in remapping file",
616 Whence: (InputFile + ":" + Twine(LineIt.line_number())).str(),
617 Hint: "expected 'old_symbol new_symbol'");
618 }
619 Remapper->RemappingTable.insert(KV: Parts);
620 }
621 return Remapper;
622 }
623
624 /// Attempt to map the given old symbol into a new symbol.
625 ///
626 /// \return The new symbol, or \p Name if no such symbol was found.
627 StringRef operator()(StringRef Name) {
628 StringRef New = RemappingTable.lookup(Val: Name);
629 return New.empty() ? Name : New;
630 }
631
632 FunctionId operator()(FunctionId Name) {
633 // MD5 name cannot be remapped.
634 if (!Name.isStringRef())
635 return Name;
636 StringRef New = RemappingTable.lookup(Val: Name.stringRef());
637 return New.empty() ? Name : FunctionId(New);
638 }
639};
640}
641
642struct WeightedFile {
643 std::string Filename;
644 uint64_t Weight;
645};
646typedef SmallVector<WeightedFile, 5> WeightedFileVector;
647
648/// Keep track of merged data and reported errors.
649struct WriterContext {
650 std::mutex Lock;
651 InstrProfWriter Writer;
652 std::vector<std::pair<Error, std::string>> Errors;
653 std::mutex &ErrLock;
654 SmallSet<instrprof_error, 4> &WriterErrorCodes;
655
656 WriterContext(bool IsSparse, std::mutex &ErrLock,
657 SmallSet<instrprof_error, 4> &WriterErrorCodes,
658 uint64_t ReservoirSize = 0, uint64_t MaxTraceLength = 0)
659 : Writer(IsSparse, ReservoirSize, MaxTraceLength, DoWritePrevVersion,
660 MemProfVersionRequested, MemProfFullSchema,
661 MemprofGenerateRandomHotness, MemprofGenerateRandomHotnessSeed),
662 ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {}
663};
664
665/// Computer the overlap b/w profile BaseFilename and TestFileName,
666/// and store the program level result to Overlap.
667static void overlapInput(const std::string &BaseFilename,
668 const std::string &TestFilename, WriterContext *WC,
669 OverlapStats &Overlap,
670 const OverlapFuncFilters &FuncFilter,
671 raw_fd_ostream &OS, bool IsCS) {
672 auto FS = vfs::getRealFileSystem();
673 auto ReaderOrErr = InstrProfReader::create(Path: TestFilename, FS&: *FS);
674 if (Error E = ReaderOrErr.takeError()) {
675 // Skip the empty profiles by returning sliently.
676 auto [ErrorCode, Msg] = InstrProfError::take(E: std::move(E));
677 if (ErrorCode != instrprof_error::empty_raw_profile)
678 WC->Errors.emplace_back(args: make_error<InstrProfError>(Args&: ErrorCode, Args&: Msg),
679 args: TestFilename);
680 return;
681 }
682
683 auto Reader = std::move(ReaderOrErr.get());
684 for (auto &I : *Reader) {
685 OverlapStats FuncOverlap(OverlapStats::FunctionLevel);
686 FuncOverlap.setFuncInfo(Name: I.Name, Hash: I.Hash);
687
688 WC->Writer.overlapRecord(Other: std::move(I), Overlap, FuncLevelOverlap&: FuncOverlap, FuncFilter);
689 FuncOverlap.dump(OS);
690 }
691}
692
693/// Load an input into a writer context.
694static void
695loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
696 const InstrProfCorrelator *Correlator, const StringRef ProfiledBinary,
697 WriterContext *WC, const object::BuildIDFetcher *BIDFetcher = nullptr,
698 const ProfCorrelatorKind *BIDFetcherCorrelatorKind = nullptr) {
699 std::unique_lock<std::mutex> CtxGuard{WC->Lock};
700
701 // Copy the filename, because llvm::ThreadPool copied the input "const
702 // WeightedFile &" by value, making a reference to the filename within it
703 // invalid outside of this packaged task.
704 std::string Filename = Input.Filename;
705
706 using ::llvm::memprof::RawMemProfReader;
707 if (RawMemProfReader::hasFormat(Path: Input.Filename)) {
708 auto ReaderOrErr = RawMemProfReader::create(Path: Input.Filename, ProfiledBinary);
709 if (!ReaderOrErr) {
710 exitWithError(E: ReaderOrErr.takeError(), Whence: Input.Filename);
711 }
712 std::unique_ptr<RawMemProfReader> Reader = std::move(ReaderOrErr.get());
713 // Check if the profile types can be merged, e.g. clang frontend profiles
714 // should not be merged with memprof profiles.
715 if (Error E = WC->Writer.mergeProfileKind(Other: Reader->getProfileKind())) {
716 consumeError(Err: std::move(E));
717 WC->Errors.emplace_back(
718 args: make_error<StringError>(
719 Args: "Cannot merge MemProf profile with Clang generated profile.",
720 Args: std::error_code()),
721 args&: Filename);
722 return;
723 }
724
725 auto MemProfError = [&](Error E) {
726 auto [ErrorCode, Msg] = InstrProfError::take(E: std::move(E));
727 WC->Errors.emplace_back(args: make_error<InstrProfError>(Args&: ErrorCode, Args&: Msg),
728 args&: Filename);
729 };
730
731 WC->Writer.addMemProfData(Incoming: Reader->takeMemProfData(), Warn: MemProfError);
732 return;
733 }
734
735 using ::llvm::memprof::YAMLMemProfReader;
736 if (YAMLMemProfReader::hasFormat(Path: Input.Filename)) {
737 auto ReaderOrErr = YAMLMemProfReader::create(Path: Input.Filename);
738 if (!ReaderOrErr)
739 exitWithError(E: ReaderOrErr.takeError(), Whence: Input.Filename);
740 std::unique_ptr<YAMLMemProfReader> Reader = std::move(ReaderOrErr.get());
741 // Check if the profile types can be merged, e.g. clang frontend profiles
742 // should not be merged with memprof profiles.
743 if (Error E = WC->Writer.mergeProfileKind(Other: Reader->getProfileKind())) {
744 consumeError(Err: std::move(E));
745 WC->Errors.emplace_back(
746 args: make_error<StringError>(
747 Args: "Cannot merge MemProf profile with incompatible profile.",
748 Args: std::error_code()),
749 args&: Filename);
750 return;
751 }
752
753 auto MemProfError = [&](Error E) {
754 auto [ErrorCode, Msg] = InstrProfError::take(E: std::move(E));
755 WC->Errors.emplace_back(args: make_error<InstrProfError>(Args&: ErrorCode, Args&: Msg),
756 args&: Filename);
757 };
758
759 auto MemProfData = Reader->takeMemProfData();
760
761 auto DataAccessProfData = Reader->takeDataAccessProfData();
762
763 // Check for the empty input in case the YAML file is invalid.
764 if (MemProfData.Records.empty()) {
765 WC->Errors.emplace_back(
766 args: make_error<StringError>(Args: "The profile is empty.", Args: std::error_code()),
767 args&: Filename);
768 }
769
770 WC->Writer.addMemProfData(Incoming: std::move(MemProfData), Warn: MemProfError);
771 WC->Writer.addDataAccessProfData(DataAccessProfile: std::move(DataAccessProfData));
772 return;
773 }
774
775 auto FS = vfs::getRealFileSystem();
776 // TODO: This only saves the first non-fatal error from InstrProfReader, and
777 // then added to WriterContext::Errors. However, this is not extensible, if
778 // we have more non-fatal errors from InstrProfReader in the future. How
779 // should this interact with different -failure-mode?
780 std::optional<std::pair<Error, std::string>> ReaderWarning;
781 auto Warn = [&](Error E) {
782 if (ReaderWarning) {
783 consumeError(Err: std::move(E));
784 return;
785 }
786 // Only show the first time an error occurs in this file.
787 auto [ErrCode, Msg] = InstrProfError::take(E: std::move(E));
788 ReaderWarning = {make_error<InstrProfError>(Args&: ErrCode, Args&: Msg), Filename};
789 };
790
791 const ProfCorrelatorKind CorrelatorKind = BIDFetcherCorrelatorKind
792 ? *BIDFetcherCorrelatorKind
793 : ProfCorrelatorKind::NONE;
794 auto ReaderOrErr = InstrProfReader::create(Path: Input.Filename, FS&: *FS, Correlator,
795 BIDFetcher, BIDFetcherCorrelatorKind: CorrelatorKind, Warn);
796 if (Error E = ReaderOrErr.takeError()) {
797 // Skip the empty profiles by returning silently.
798 auto [ErrCode, Msg] = InstrProfError::take(E: std::move(E));
799 if (ErrCode != instrprof_error::empty_raw_profile)
800 WC->Errors.emplace_back(args: make_error<InstrProfError>(Args&: ErrCode, Args&: Msg),
801 args&: Filename);
802 return;
803 }
804
805 auto Reader = std::move(ReaderOrErr.get());
806 if (Error E = WC->Writer.mergeProfileKind(Other: Reader->getProfileKind())) {
807 consumeError(Err: std::move(E));
808 WC->Errors.emplace_back(
809 args: make_error<StringError>(
810 Args: "Merge IR generated profile with Clang generated profile.",
811 Args: std::error_code()),
812 args&: Filename);
813 return;
814 }
815
816 for (auto &I : *Reader) {
817 if (Remapper)
818 I.Name = (*Remapper)(I.Name);
819 const StringRef FuncName = I.Name;
820 bool Reported = false;
821 WC->Writer.addRecord(I: std::move(I), Weight: Input.Weight, Warn: [&](Error E) {
822 if (Reported) {
823 consumeError(Err: std::move(E));
824 return;
825 }
826 Reported = true;
827 // Only show hint the first time an error occurs.
828 auto [ErrCode, Msg] = InstrProfError::take(E: std::move(E));
829 std::unique_lock<std::mutex> ErrGuard{WC->ErrLock};
830 bool firstTime = WC->WriterErrorCodes.insert(V: ErrCode).second;
831 handleMergeWriterError(E: make_error<InstrProfError>(Args&: ErrCode, Args&: Msg),
832 WhenceFile: Input.Filename, WhenceFunction: FuncName, ShowHint: firstTime);
833 });
834 }
835
836 if (KeepVTableSymbols) {
837 const InstrProfSymtab &symtab = Reader->getSymtab();
838 const auto &VTableNames = symtab.getVTableNames();
839
840 for (const auto &kv : VTableNames)
841 WC->Writer.addVTableName(VTableName: kv.getKey());
842 }
843
844 if (Reader->hasTemporalProfile()) {
845 auto &Traces = Reader->getTemporalProfTraces(Weight: Input.Weight);
846 if (!Traces.empty())
847 WC->Writer.addTemporalProfileTraces(
848 SrcTraces&: Traces, SrcStreamSize: Reader->getTemporalProfTraceStreamSize());
849 }
850 if (Reader->hasError()) {
851 if (Error E = Reader->getError()) {
852 WC->Errors.emplace_back(args: std::move(E), args&: Filename);
853 return;
854 }
855 }
856
857 std::vector<llvm::object::BuildID> BinaryIds;
858 if (Error E = Reader->readBinaryIds(BinaryIds)) {
859 WC->Errors.emplace_back(args: std::move(E), args&: Filename);
860 return;
861 }
862 WC->Writer.addBinaryIds(BIs: BinaryIds);
863
864 if (ReaderWarning) {
865 WC->Errors.emplace_back(args: std::move(ReaderWarning->first),
866 args&: ReaderWarning->second);
867 }
868}
869
870/// Merge the \p Src writer context into \p Dst.
871static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
872 for (auto &ErrorPair : Src->Errors)
873 Dst->Errors.push_back(x: std::move(ErrorPair));
874 Src->Errors.clear();
875
876 if (Error E = Dst->Writer.mergeProfileKind(Other: Src->Writer.getProfileKind()))
877 exitWithError(E: std::move(E));
878
879 Dst->Writer.mergeRecordsFromWriter(IPW: std::move(Src->Writer), Warn: [&](Error E) {
880 auto [ErrorCode, Msg] = InstrProfError::take(E: std::move(E));
881 std::unique_lock<std::mutex> ErrGuard{Dst->ErrLock};
882 bool firstTime = Dst->WriterErrorCodes.insert(V: ErrorCode).second;
883 if (firstTime)
884 warn(Message: toString(E: make_error<InstrProfError>(Args&: ErrorCode, Args&: Msg)));
885 });
886}
887
888static StringRef
889getFuncName(const StringMap<InstrProfWriter::ProfilingData>::value_type &Val) {
890 return Val.first();
891}
892
893static std::string
894getFuncName(const SampleProfileMap::value_type &Val) {
895 return Val.second.getContext().toString();
896}
897
898template <typename T>
899static void filterFunctions(T &ProfileMap) {
900 bool hasFilter = !FuncNameFilter.empty();
901 bool hasNegativeFilter = !FuncNameNegativeFilter.empty();
902 if (!hasFilter && !hasNegativeFilter)
903 return;
904
905 // If filter starts with '?' it is MSVC mangled name, not a regex.
906 llvm::Regex ProbablyMSVCMangledName("[?@$_0-9A-Za-z]+");
907 if (hasFilter && FuncNameFilter[0] == '?' &&
908 ProbablyMSVCMangledName.match(String: FuncNameFilter))
909 FuncNameFilter = llvm::Regex::escape(String: FuncNameFilter);
910 if (hasNegativeFilter && FuncNameNegativeFilter[0] == '?' &&
911 ProbablyMSVCMangledName.match(String: FuncNameNegativeFilter))
912 FuncNameNegativeFilter = llvm::Regex::escape(String: FuncNameNegativeFilter);
913
914 size_t Count = ProfileMap.size();
915 llvm::Regex Pattern(FuncNameFilter);
916 llvm::Regex NegativePattern(FuncNameNegativeFilter);
917 std::string Error;
918 if (hasFilter && !Pattern.isValid(Error))
919 exitWithError(Message: Error);
920 if (hasNegativeFilter && !NegativePattern.isValid(Error))
921 exitWithError(Message: Error);
922
923 // Handle MD5 profile, so it is still able to match using the original name.
924 std::string MD5Name = std::to_string(val: llvm::MD5Hash(Str: FuncNameFilter));
925 std::string NegativeMD5Name =
926 std::to_string(val: llvm::MD5Hash(Str: FuncNameNegativeFilter));
927
928 for (auto I = ProfileMap.begin(); I != ProfileMap.end();) {
929 auto Tmp = I++;
930 const auto &FuncName = getFuncName(*Tmp);
931 // Negative filter has higher precedence than positive filter.
932 if ((hasNegativeFilter &&
933 (NegativePattern.match(String: FuncName) ||
934 (FunctionSamples::UseMD5 && NegativeMD5Name == FuncName))) ||
935 (hasFilter && !(Pattern.match(String: FuncName) ||
936 (FunctionSamples::UseMD5 && MD5Name == FuncName))))
937 ProfileMap.erase(Tmp);
938 }
939
940 llvm::dbgs() << Count - ProfileMap.size() << " of " << Count << " functions "
941 << "in the original profile are filtered.\n";
942}
943
944static void writeInstrProfile(StringRef OutputFilename,
945 ProfileFormat OutputFormat,
946 InstrProfWriter &Writer) {
947 std::error_code EC;
948 raw_fd_ostream Output(OutputFilename.data(), EC,
949 OutputFormat == PF_Text ? sys::fs::OF_TextWithCRLF
950 : sys::fs::OF_None);
951 if (EC)
952 exitWithErrorCode(EC, Whence: OutputFilename);
953
954 if (OutputFormat == PF_Text) {
955 if (Error E = Writer.writeText(OS&: Output))
956 warn(E: std::move(E));
957 } else {
958 if (Output.is_displayed())
959 exitWithError(Message: "cannot write a non-text format profile to the terminal");
960 if (Error E = Writer.write(OS&: Output))
961 warn(E: std::move(E));
962 }
963}
964
965static void mergeInstrProfile(const WeightedFileVector &Inputs,
966 SymbolRemapper *Remapper,
967 int MaxDbgCorrelationWarnings,
968 const StringRef ProfiledBinary) {
969 const uint64_t TraceReservoirSize = TemporalProfTraceReservoirSize.getValue();
970 const uint64_t MaxTraceLength = TemporalProfMaxTraceLength.getValue();
971 if (OutputFormat == PF_Compact_Binary)
972 exitWithError(Message: "Compact Binary is deprecated");
973 if (OutputFormat != PF_Binary && OutputFormat != PF_Ext_Binary &&
974 OutputFormat != PF_Text)
975 exitWithError(Message: "unknown format is specified");
976
977 // TODO: Maybe we should support correlation with mixture of different
978 // correlation modes(w/wo debug-info/object correlation).
979 if (DebugInfoFilename.empty()) {
980 if (!BinaryFilename.empty() && (DebugInfod || !DebugFileDirectory.empty()))
981 exitWithError(Message: "Expected only one of -binary-file, -debuginfod or "
982 "-debug-file-directory");
983 } else if (!BinaryFilename.empty() || DebugInfod ||
984 !DebugFileDirectory.empty()) {
985 exitWithError(Message: "Expected only one of -debug-info, -binary-file, -debuginfod "
986 "or -debug-file-directory");
987 }
988 std::string CorrelateFilename;
989 ProfCorrelatorKind CorrelateKind = ProfCorrelatorKind::NONE;
990 if (!DebugInfoFilename.empty()) {
991 CorrelateFilename = DebugInfoFilename;
992 CorrelateKind = ProfCorrelatorKind::DEBUG_INFO;
993 } else if (!BinaryFilename.empty()) {
994 CorrelateFilename = BinaryFilename;
995 CorrelateKind = ProfCorrelatorKind::BINARY;
996 }
997
998 std::unique_ptr<InstrProfCorrelator> Correlator;
999 if (CorrelateKind != InstrProfCorrelator::NONE) {
1000 if (auto Err = InstrProfCorrelator::get(Filename: CorrelateFilename, FileKind: CorrelateKind)
1001 .moveInto(Value&: Correlator))
1002 exitWithError(E: std::move(Err), Whence: CorrelateFilename);
1003 if (auto Err = Correlator->correlateProfileData(MaxWarnings: MaxDbgCorrelationWarnings))
1004 exitWithError(E: std::move(Err), Whence: CorrelateFilename);
1005 }
1006
1007 ProfCorrelatorKind BIDFetcherCorrelateKind = ProfCorrelatorKind::NONE;
1008 std::unique_ptr<object::BuildIDFetcher> BIDFetcher;
1009 if (DebugInfod) {
1010 llvm::HTTPClient::initialize();
1011 BIDFetcher = std::make_unique<DebuginfodFetcher>(args&: DebugFileDirectory);
1012 if (!BIDFetcherProfileCorrelate)
1013 exitWithError(Message: "Expected --correlate when --debuginfod is provided");
1014 BIDFetcherCorrelateKind = BIDFetcherProfileCorrelate;
1015 } else if (!DebugFileDirectory.empty()) {
1016 BIDFetcher = std::make_unique<object::BuildIDFetcher>(args&: DebugFileDirectory);
1017 if (!BIDFetcherProfileCorrelate)
1018 exitWithError(Message: "Expected --correlate when --debug-file-directory "
1019 "is provided");
1020 BIDFetcherCorrelateKind = BIDFetcherProfileCorrelate;
1021 } else if (BIDFetcherProfileCorrelate) {
1022 exitWithError(Message: "Expected --debuginfod or --debug-file-directory when "
1023 "--correlate is provided");
1024 }
1025
1026 std::mutex ErrorLock;
1027 SmallSet<instrprof_error, 4> WriterErrorCodes;
1028
1029 // If NumThreads is not specified, auto-detect a good default.
1030 if (NumThreads == 0)
1031 NumThreads = std::min(a: hardware_concurrency().compute_thread_count(),
1032 b: unsigned((Inputs.size() + 1) / 2));
1033
1034 // Initialize the writer contexts.
1035 SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;
1036 for (unsigned I = 0; I < NumThreads; ++I)
1037 Contexts.emplace_back(Args: std::make_unique<WriterContext>(
1038 args&: OutputSparse, args&: ErrorLock, args&: WriterErrorCodes, args: TraceReservoirSize,
1039 args: MaxTraceLength));
1040
1041 if (NumThreads == 1) {
1042 for (const auto &Input : Inputs)
1043 loadInput(Input, Remapper, Correlator: Correlator.get(), ProfiledBinary,
1044 WC: Contexts[0].get(), BIDFetcher: BIDFetcher.get(), BIDFetcherCorrelatorKind: &BIDFetcherCorrelateKind);
1045 } else {
1046 DefaultThreadPool Pool(hardware_concurrency(ThreadCount: NumThreads));
1047
1048 // Load the inputs in parallel (N/NumThreads serial steps).
1049 unsigned Ctx = 0;
1050 for (const auto &Input : Inputs) {
1051 Pool.async(F&: loadInput, ArgList: Input, ArgList&: Remapper, ArgList: Correlator.get(), ArgList: ProfiledBinary,
1052 ArgList: Contexts[Ctx].get(), ArgList: BIDFetcher.get(),
1053 ArgList: &BIDFetcherCorrelateKind);
1054 Ctx = (Ctx + 1) % NumThreads;
1055 }
1056 Pool.wait();
1057
1058 // Merge the writer contexts together (~ lg(NumThreads) serial steps).
1059 unsigned Mid = Contexts.size() / 2;
1060 unsigned End = Contexts.size();
1061 assert(Mid > 0 && "Expected more than one context");
1062 do {
1063 for (unsigned I = 0; I < Mid; ++I)
1064 Pool.async(F&: mergeWriterContexts, ArgList: Contexts[I].get(),
1065 ArgList: Contexts[I + Mid].get());
1066 Pool.wait();
1067 if (End & 1) {
1068 Pool.async(F&: mergeWriterContexts, ArgList: Contexts[0].get(),
1069 ArgList: Contexts[End - 1].get());
1070 Pool.wait();
1071 }
1072 End = Mid;
1073 Mid /= 2;
1074 } while (Mid > 0);
1075 }
1076
1077 // Handle deferred errors encountered during merging. If the number of errors
1078 // is equal to the number of inputs the merge failed.
1079 unsigned NumErrors = 0;
1080 for (std::unique_ptr<WriterContext> &WC : Contexts) {
1081 for (auto &ErrorPair : WC->Errors) {
1082 ++NumErrors;
1083 warn(Message: toString(E: std::move(ErrorPair.first)), Whence: ErrorPair.second);
1084 }
1085 }
1086 if ((NumErrors == Inputs.size() && FailMode == failIfAllAreInvalid) ||
1087 (NumErrors > 0 && FailMode == failIfAnyAreInvalid))
1088 exitWithError(Message: "no profile can be merged");
1089
1090 filterFunctions(ProfileMap&: Contexts[0]->Writer.getProfileData());
1091
1092 writeInstrProfile(OutputFilename, OutputFormat, Writer&: Contexts[0]->Writer);
1093}
1094
1095/// The profile entry for a function in instrumentation profile.
1096struct InstrProfileEntry {
1097 uint64_t MaxCount = 0;
1098 uint64_t NumEdgeCounters = 0;
1099 float ZeroCounterRatio = 0.0;
1100 InstrProfRecord *ProfRecord;
1101 InstrProfileEntry(InstrProfRecord *Record);
1102 InstrProfileEntry() = default;
1103};
1104
1105InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) {
1106 ProfRecord = Record;
1107 uint64_t CntNum = Record->Counts.size();
1108 uint64_t ZeroCntNum = 0;
1109 for (size_t I = 0; I < CntNum; ++I) {
1110 MaxCount = std::max(a: MaxCount, b: Record->Counts[I]);
1111 ZeroCntNum += !Record->Counts[I];
1112 }
1113 ZeroCounterRatio = (float)ZeroCntNum / CntNum;
1114 NumEdgeCounters = CntNum;
1115}
1116
1117/// Either set all the counters in the instr profile entry \p IFE to
1118/// -1 / -2 /in order to drop the profile or scale up the
1119/// counters in \p IFP to be above hot / cold threshold. We use
1120/// the ratio of zero counters in the profile of a function to
1121/// decide the profile is helpful or harmful for performance,
1122/// and to choose whether to scale up or drop it.
1123static void updateInstrProfileEntry(InstrProfileEntry &IFE, bool SetToHot,
1124 uint64_t HotInstrThreshold,
1125 uint64_t ColdInstrThreshold,
1126 float ZeroCounterThreshold) {
1127 InstrProfRecord *ProfRecord = IFE.ProfRecord;
1128 if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) {
1129 // If all or most of the counters of the function are zero, the
1130 // profile is unaccountable and should be dropped. Reset all the
1131 // counters to be -1 / -2 and PGO profile-use will drop the profile.
1132 // All counters being -1 also implies that the function is hot so
1133 // PGO profile-use will also set the entry count metadata to be
1134 // above hot threshold.
1135 // All counters being -2 implies that the function is warm so
1136 // PGO profile-use will also set the entry count metadata to be
1137 // above cold threshold.
1138 auto Kind =
1139 (SetToHot ? InstrProfRecord::PseudoHot : InstrProfRecord::PseudoWarm);
1140 ProfRecord->setPseudoCount(Kind);
1141 return;
1142 }
1143
1144 // Scale up the MaxCount to be multiple times above hot / cold threshold.
1145 const unsigned MultiplyFactor = 3;
1146 uint64_t Threshold = (SetToHot ? HotInstrThreshold : ColdInstrThreshold);
1147 uint64_t Numerator = Threshold * MultiplyFactor;
1148
1149 // Make sure Threshold for warm counters is below the HotInstrThreshold.
1150 if (!SetToHot && Threshold >= HotInstrThreshold) {
1151 Threshold = (HotInstrThreshold + ColdInstrThreshold) / 2;
1152 }
1153
1154 uint64_t Denominator = IFE.MaxCount;
1155 if (Numerator <= Denominator)
1156 return;
1157 ProfRecord->scale(N: Numerator, D: Denominator, Warn: [&](instrprof_error E) {
1158 warn(Message: toString(E: make_error<InstrProfError>(Args&: E)));
1159 });
1160}
1161
1162const uint64_t ColdPercentileIdx = 15;
1163const uint64_t HotPercentileIdx = 11;
1164
1165using sampleprof::FSDiscriminatorPass;
1166
1167// Internal options to set FSDiscriminatorPass. Used in merge and show
1168// commands.
1169static cl::opt<FSDiscriminatorPass> FSDiscriminatorPassOption(
1170 "fs-discriminator-pass", cl::init(Val: PassLast), cl::Hidden,
1171 cl::desc("Zero out the discriminator bits for the FS discrimiantor "
1172 "pass beyond this value. The enum values are defined in "
1173 "Support/Discriminator.h"),
1174 cl::values(clEnumVal(Base, "Use base discriminators only"),
1175 clEnumVal(Pass1, "Use base and pass 1 discriminators"),
1176 clEnumVal(Pass2, "Use base and pass 1-2 discriminators"),
1177 clEnumVal(Pass3, "Use base and pass 1-3 discriminators"),
1178 clEnumVal(PassLast, "Use all discriminator bits (default)")));
1179
1180static unsigned getDiscriminatorMask() {
1181 return getN1Bits(N: getFSPassBitEnd(P: FSDiscriminatorPassOption.getValue()));
1182}
1183
1184/// Adjust the instr profile in \p WC based on the sample profile in
1185/// \p Reader.
1186static void
1187adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
1188 std::unique_ptr<sampleprof::SampleProfileReader> &Reader,
1189 unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
1190 unsigned InstrProfColdThreshold) {
1191 // Function to its entry in instr profile.
1192 StringMap<InstrProfileEntry> InstrProfileMap;
1193 StringMap<StringRef> StaticFuncMap;
1194 InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs);
1195
1196 auto checkSampleProfileHasFUnique = [&Reader]() {
1197 for (const auto &PD : Reader->getProfiles()) {
1198 auto &FContext = PD.second.getContext();
1199 if (FContext.toString().find(s: FunctionSamples::UniqSuffix) !=
1200 std::string::npos) {
1201 return true;
1202 }
1203 }
1204 return false;
1205 };
1206
1207 bool SampleProfileHasFUnique = checkSampleProfileHasFUnique();
1208
1209 auto buildStaticFuncMap = [&StaticFuncMap,
1210 SampleProfileHasFUnique](const StringRef Name) {
1211 std::string FilePrefixes[] = {".cpp", "cc", ".c", ".hpp", ".h"};
1212 size_t PrefixPos = StringRef::npos;
1213 for (auto &FilePrefix : FilePrefixes) {
1214 std::string NamePrefix = FilePrefix + GlobalIdentifierDelimiter;
1215 PrefixPos = Name.find_insensitive(Str: NamePrefix);
1216 if (PrefixPos == StringRef::npos)
1217 continue;
1218 PrefixPos += NamePrefix.size();
1219 break;
1220 }
1221
1222 if (PrefixPos == StringRef::npos) {
1223 return;
1224 }
1225
1226 StringRef NewName = Name.drop_front(N: PrefixPos);
1227 StringRef FName = Name.substr(Start: 0, N: PrefixPos - 1);
1228 if (NewName.size() == 0) {
1229 return;
1230 }
1231
1232 // This name should have a static linkage.
1233 size_t PostfixPos = NewName.find(Str: FunctionSamples::UniqSuffix);
1234 bool ProfileHasFUnique = (PostfixPos != StringRef::npos);
1235
1236 // If sample profile and instrumented profile do not agree on symbol
1237 // uniqification.
1238 if (SampleProfileHasFUnique != ProfileHasFUnique) {
1239 // If instrumented profile uses -funique-internal-linkage-symbols,
1240 // we need to trim the name.
1241 if (ProfileHasFUnique) {
1242 NewName = NewName.substr(Start: 0, N: PostfixPos);
1243 } else {
1244 // If sample profile uses -funique-internal-linkage-symbols,
1245 // we build the map.
1246 std::string NStr =
1247 NewName.str() + getUniqueInternalLinkagePostfix(FName);
1248 NewName = StringRef(NStr);
1249 StaticFuncMap[NewName] = Name;
1250 return;
1251 }
1252 }
1253
1254 auto [It, Inserted] = StaticFuncMap.try_emplace(Key: NewName, Args: Name);
1255 if (!Inserted)
1256 It->second = DuplicateNameStr;
1257 };
1258
1259 // We need to flatten the SampleFDO profile as the InstrFDO
1260 // profile does not have inlined callsite profiles.
1261 // One caveat is the pre-inlined function -- their samples
1262 // should be collapsed into the caller function.
1263 // Here we do a DFS traversal to get the flatten profile
1264 // info: the sum of entrycount and the max of maxcount.
1265 // Here is the algorithm:
1266 // recursive (FS, root_name) {
1267 // name = FS->getName();
1268 // get samples for FS;
1269 // if (InstrProf.find(name) {
1270 // root_name = name;
1271 // } else {
1272 // if (name is in static_func map) {
1273 // root_name = static_name;
1274 // }
1275 // }
1276 // update the Map entry for root_name;
1277 // for (subfs: FS) {
1278 // recursive(subfs, root_name);
1279 // }
1280 // }
1281 //
1282 // Here is an example.
1283 //
1284 // SampleProfile:
1285 // foo:12345:1000
1286 // 1: 1000
1287 // 2.1: 1000
1288 // 15: 5000
1289 // 4: bar:1000
1290 // 1: 1000
1291 // 2: goo:3000
1292 // 1: 3000
1293 // 8: bar:40000
1294 // 1: 10000
1295 // 2: goo:30000
1296 // 1: 30000
1297 //
1298 // InstrProfile has two entries:
1299 // foo
1300 // bar.cc;bar
1301 //
1302 // After BuildMaxSampleMap, we should have the following in FlattenSampleMap:
1303 // {"foo", {1000, 5000}}
1304 // {"bar.cc;bar", {11000, 30000}}
1305 //
1306 // foo's has an entry count of 1000, and max body count of 5000.
1307 // bar.cc;bar has an entry count of 11000 (sum two callsites of 1000 and
1308 // 10000), and max count of 30000 (from the callsite in line 8).
1309 //
1310 // Note that goo's count will remain in bar.cc;bar() as it does not have an
1311 // entry in InstrProfile.
1312 llvm::StringMap<std::pair<uint64_t, uint64_t>> FlattenSampleMap;
1313 auto BuildMaxSampleMap = [&FlattenSampleMap, &StaticFuncMap,
1314 &InstrProfileMap](const FunctionSamples &FS,
1315 const StringRef &RootName) {
1316 auto BuildMaxSampleMapImpl = [&](const FunctionSamples &FS,
1317 const StringRef &RootName,
1318 auto &BuildImpl) -> void {
1319 std::string NameStr = FS.getFunction().str();
1320 const StringRef Name = NameStr;
1321 const StringRef *NewRootName = &RootName;
1322 uint64_t EntrySample = FS.getHeadSamplesEstimate();
1323 uint64_t MaxBodySample = FS.getMaxCountInside(/* SkipCallSite*/ true);
1324
1325 auto It = InstrProfileMap.find(Key: Name);
1326 if (It != InstrProfileMap.end()) {
1327 NewRootName = &Name;
1328 } else {
1329 auto NewName = StaticFuncMap.find(Key: Name);
1330 if (NewName != StaticFuncMap.end()) {
1331 It = InstrProfileMap.find(Key: NewName->second);
1332 if (NewName->second != DuplicateNameStr) {
1333 NewRootName = &NewName->second;
1334 }
1335 } else {
1336 // Here the EntrySample is of an inlined function, so we should not
1337 // update the EntrySample in the map.
1338 EntrySample = 0;
1339 }
1340 }
1341 EntrySample += FlattenSampleMap[*NewRootName].first;
1342 MaxBodySample =
1343 std::max(a: FlattenSampleMap[*NewRootName].second, b: MaxBodySample);
1344 FlattenSampleMap[*NewRootName] =
1345 std::make_pair(x&: EntrySample, y&: MaxBodySample);
1346
1347 for (const auto &C : FS.getCallsiteSamples())
1348 for (const auto &F : C.second)
1349 BuildImpl(F.second, *NewRootName, BuildImpl);
1350 };
1351 BuildMaxSampleMapImpl(FS, RootName, BuildMaxSampleMapImpl);
1352 };
1353
1354 for (auto &PD : WC->Writer.getProfileData()) {
1355 // Populate IPBuilder.
1356 for (const auto &PDV : PD.getValue()) {
1357 InstrProfRecord Record = PDV.second;
1358 IPBuilder.addRecord(Record);
1359 }
1360
1361 // If a function has multiple entries in instr profile, skip it.
1362 if (PD.getValue().size() != 1)
1363 continue;
1364
1365 // Initialize InstrProfileMap.
1366 InstrProfRecord *R = &PD.getValue().begin()->second;
1367 StringRef FullName = PD.getKey();
1368 InstrProfileMap[FullName] = InstrProfileEntry(R);
1369 buildStaticFuncMap(FullName);
1370 }
1371
1372 for (auto &PD : Reader->getProfiles()) {
1373 sampleprof::FunctionSamples &FS = PD.second;
1374 std::string Name = FS.getFunction().str();
1375 BuildMaxSampleMap(FS, Name);
1376 }
1377
1378 ProfileSummary InstrPS = *IPBuilder.getSummary();
1379 ProfileSummary SamplePS = Reader->getSummary();
1380
1381 // Compute cold thresholds for instr profile and sample profile.
1382 uint64_t HotSampleThreshold =
1383 ProfileSummaryBuilder::getEntryForPercentile(
1384 DS: SamplePS.getDetailedSummary(),
1385 Percentile: ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
1386 .MinCount;
1387 uint64_t ColdSampleThreshold =
1388 ProfileSummaryBuilder::getEntryForPercentile(
1389 DS: SamplePS.getDetailedSummary(),
1390 Percentile: ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
1391 .MinCount;
1392 uint64_t HotInstrThreshold =
1393 ProfileSummaryBuilder::getEntryForPercentile(
1394 DS: InstrPS.getDetailedSummary(),
1395 Percentile: ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
1396 .MinCount;
1397 uint64_t ColdInstrThreshold =
1398 InstrProfColdThreshold
1399 ? InstrProfColdThreshold
1400 : ProfileSummaryBuilder::getEntryForPercentile(
1401 DS: InstrPS.getDetailedSummary(),
1402 Percentile: ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
1403 .MinCount;
1404
1405 // Find hot/warm functions in sample profile which is cold in instr profile
1406 // and adjust the profiles of those functions in the instr profile.
1407 for (const auto &E : FlattenSampleMap) {
1408 uint64_t SampleMaxCount = std::max(a: E.second.first, b: E.second.second);
1409 if (SampleMaxCount < ColdSampleThreshold)
1410 continue;
1411 StringRef Name = E.first();
1412 auto It = InstrProfileMap.find(Key: Name);
1413 if (It == InstrProfileMap.end()) {
1414 auto NewName = StaticFuncMap.find(Key: Name);
1415 if (NewName != StaticFuncMap.end()) {
1416 It = InstrProfileMap.find(Key: NewName->second);
1417 if (NewName->second == DuplicateNameStr) {
1418 WithColor::warning()
1419 << "Static function " << Name
1420 << " has multiple promoted names, cannot adjust profile.\n";
1421 }
1422 }
1423 }
1424 if (It == InstrProfileMap.end() ||
1425 It->second.MaxCount > ColdInstrThreshold ||
1426 It->second.NumEdgeCounters < SupplMinSizeThreshold)
1427 continue;
1428 bool SetToHot = SampleMaxCount >= HotSampleThreshold;
1429 updateInstrProfileEntry(IFE&: It->second, SetToHot, HotInstrThreshold,
1430 ColdInstrThreshold, ZeroCounterThreshold);
1431 }
1432}
1433
1434/// The main function to supplement instr profile with sample profile.
1435/// \Inputs contains the instr profile. \p SampleFilename specifies the
1436/// sample profile. \p OutputFilename specifies the output profile name.
1437/// \p OutputFormat specifies the output profile format. \p OutputSparse
1438/// specifies whether to generate sparse profile. \p SupplMinSizeThreshold
1439/// specifies the minimal size for the functions whose profile will be
1440/// adjusted. \p ZeroCounterThreshold is the threshold to check whether
1441/// a function contains too many zero counters and whether its profile
1442/// should be dropped. \p InstrProfColdThreshold is the user specified
1443/// cold threshold which will override the cold threshold got from the
1444/// instr profile summary.
1445static void supplementInstrProfile(const WeightedFileVector &Inputs,
1446 StringRef SampleFilename, bool OutputSparse,
1447 unsigned SupplMinSizeThreshold,
1448 float ZeroCounterThreshold,
1449 unsigned InstrProfColdThreshold) {
1450 if (OutputFilename == "-")
1451 exitWithError(Message: "cannot write indexed profdata format to stdout");
1452 if (Inputs.size() != 1)
1453 exitWithError(Message: "expect one input to be an instr profile");
1454 if (Inputs[0].Weight != 1)
1455 exitWithError(Message: "expect instr profile doesn't have weight");
1456
1457 StringRef InstrFilename = Inputs[0].Filename;
1458
1459 // Read sample profile.
1460 LLVMContext Context;
1461 auto FS = vfs::getRealFileSystem();
1462 auto ReaderOrErr = sampleprof::SampleProfileReader::create(
1463 Filename: SampleFilename.str(), C&: Context, FS&: *FS, P: FSDiscriminatorPassOption);
1464 if (std::error_code EC = ReaderOrErr.getError())
1465 exitWithErrorCode(EC, Whence: SampleFilename);
1466 auto Reader = std::move(ReaderOrErr.get());
1467 if (std::error_code EC = Reader->read())
1468 exitWithErrorCode(EC, Whence: SampleFilename);
1469
1470 // Read instr profile.
1471 std::mutex ErrorLock;
1472 SmallSet<instrprof_error, 4> WriterErrorCodes;
1473 auto WC = std::make_unique<WriterContext>(args&: OutputSparse, args&: ErrorLock,
1474 args&: WriterErrorCodes);
1475 loadInput(Input: Inputs[0], Remapper: nullptr, Correlator: nullptr, /*ProfiledBinary=*/"", WC: WC.get());
1476 if (WC->Errors.size() > 0)
1477 exitWithError(E: std::move(WC->Errors[0].first), Whence: InstrFilename);
1478
1479 adjustInstrProfile(WC, Reader, SupplMinSizeThreshold, ZeroCounterThreshold,
1480 InstrProfColdThreshold);
1481 writeInstrProfile(OutputFilename, OutputFormat, Writer&: WC->Writer);
1482}
1483
1484/// Make a copy of the given function samples with all symbol names remapped
1485/// by the provided symbol remapper.
1486static sampleprof::FunctionSamples
1487remapSamples(const sampleprof::FunctionSamples &Samples,
1488 SymbolRemapper &Remapper, sampleprof_error &Error) {
1489 sampleprof::FunctionSamples Result;
1490 Result.setFunction(Remapper(Samples.getFunction()));
1491 Result.addTotalSamples(Num: Samples.getTotalSamples());
1492 Result.addHeadSamples(Num: Samples.getHeadSamples());
1493 for (const auto &BodySample : Samples.getBodySamples()) {
1494 uint32_t MaskedDiscriminator =
1495 BodySample.first.Discriminator & getDiscriminatorMask();
1496 Result.addBodySamples(LineOffset: BodySample.first.LineOffset, Discriminator: MaskedDiscriminator,
1497 Num: BodySample.second.getSamples());
1498 for (const auto &Target : BodySample.second.getCallTargets()) {
1499 Result.addCalledTargetSamples(LineOffset: BodySample.first.LineOffset,
1500 Discriminator: MaskedDiscriminator,
1501 Func: Remapper(Target.first), Num: Target.second);
1502 }
1503 }
1504 for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
1505 sampleprof::FunctionSamplesMap &Target =
1506 Result.functionSamplesAt(Loc: CallsiteSamples.first);
1507 for (const auto &Callsite : CallsiteSamples.second) {
1508 sampleprof::FunctionSamples Remapped =
1509 remapSamples(Samples: Callsite.second, Remapper, Error);
1510 mergeSampleProfErrors(Accumulator&: Error,
1511 Result: Target[Remapped.getFunction()].merge(Other: Remapped));
1512 }
1513 }
1514 return Result;
1515}
1516
1517static sampleprof::SampleProfileFormat FormatMap[] = {
1518 sampleprof::SPF_None,
1519 sampleprof::SPF_Text,
1520 sampleprof::SPF_None,
1521 sampleprof::SPF_Ext_Binary,
1522 sampleprof::SPF_GCC,
1523 sampleprof::SPF_Binary};
1524
1525static std::unique_ptr<MemoryBuffer>
1526getInputFileBuf(const StringRef &InputFile) {
1527 if (InputFile == "")
1528 return {};
1529
1530 auto BufOrError = MemoryBuffer::getFileOrSTDIN(Filename: InputFile);
1531 if (!BufOrError)
1532 exitWithErrorCode(EC: BufOrError.getError(), Whence: InputFile);
1533
1534 return std::move(*BufOrError);
1535}
1536
1537static void populateProfileSymbolList(MemoryBuffer *Buffer,
1538 sampleprof::ProfileSymbolList &PSL) {
1539 if (!Buffer)
1540 return;
1541
1542 SmallVector<StringRef, 32> SymbolVec;
1543 StringRef Data = Buffer->getBuffer();
1544 Data.split(A&: SymbolVec, Separator: '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1545
1546 for (StringRef SymbolStr : SymbolVec)
1547 PSL.add(Name: SymbolStr.trim());
1548}
1549
1550static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer,
1551 ProfileFormat OutputFormat,
1552 MemoryBuffer *Buffer,
1553 sampleprof::ProfileSymbolList &WriterList,
1554 bool CompressAllSections, bool UseMD5,
1555 bool GenPartialProfile) {
1556 if (SplitLayout) {
1557 if (OutputFormat == PF_Binary)
1558 warn(Message: "-split-layout is ignored. Specify -extbinary to enable it");
1559 else
1560 Writer.setUseCtxSplitLayout();
1561 }
1562
1563 populateProfileSymbolList(Buffer, PSL&: WriterList);
1564 if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary)
1565 warn(Message: "Profile Symbol list is not empty but the output format is not "
1566 "ExtBinary format. The list will be lost in the output. ");
1567
1568 Writer.setProfileSymbolList(&WriterList);
1569
1570 if (CompressAllSections) {
1571 if (OutputFormat != PF_Ext_Binary)
1572 warn(Message: "-compress-all-section is ignored. Specify -extbinary to enable it");
1573 else
1574 Writer.setToCompressAllSections();
1575 }
1576 if (UseMD5) {
1577 if (OutputFormat != PF_Ext_Binary)
1578 warn(Message: "-use-md5 is ignored. Specify -extbinary to enable it");
1579 else
1580 Writer.setUseMD5();
1581 }
1582 if (GenPartialProfile) {
1583 if (OutputFormat != PF_Ext_Binary)
1584 warn(Message: "-gen-partial-profile is ignored. Specify -extbinary to enable it");
1585 else
1586 Writer.setPartialProfile();
1587 }
1588}
1589
1590static void mergeSampleProfile(const WeightedFileVector &Inputs,
1591 SymbolRemapper *Remapper,
1592 StringRef ProfileSymbolListFile,
1593 size_t OutputSizeLimit) {
1594 using namespace sampleprof;
1595 SampleProfileMap ProfileMap;
1596 SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
1597 LLVMContext Context;
1598 sampleprof::ProfileSymbolList WriterList;
1599 std::optional<bool> ProfileIsProbeBased;
1600 std::optional<bool> ProfileIsCS;
1601 for (const auto &Input : Inputs) {
1602 auto FS = vfs::getRealFileSystem();
1603 auto ReaderOrErr = SampleProfileReader::create(Filename: Input.Filename, C&: Context, FS&: *FS,
1604 P: FSDiscriminatorPassOption);
1605 if (std::error_code EC = ReaderOrErr.getError()) {
1606 warnOrExitGivenError(FailMode, EC, Whence: Input.Filename);
1607 continue;
1608 }
1609
1610 // We need to keep the readers around until after all the files are
1611 // read so that we do not lose the function names stored in each
1612 // reader's memory. The function names are needed to write out the
1613 // merged profile map.
1614 Readers.push_back(Elt: std::move(ReaderOrErr.get()));
1615 const auto Reader = Readers.back().get();
1616 if (std::error_code EC = Reader->read()) {
1617 warnOrExitGivenError(FailMode, EC, Whence: Input.Filename);
1618 Readers.pop_back();
1619 continue;
1620 }
1621
1622 SampleProfileMap &Profiles = Reader->getProfiles();
1623 if (ProfileIsProbeBased &&
1624 ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased)
1625 exitWithError(
1626 Message: "cannot merge probe-based profile with non-probe-based profile");
1627 ProfileIsProbeBased = FunctionSamples::ProfileIsProbeBased;
1628 if (ProfileIsCS && ProfileIsCS != FunctionSamples::ProfileIsCS)
1629 exitWithError(Message: "cannot merge CS profile with non-CS profile");
1630 ProfileIsCS = FunctionSamples::ProfileIsCS;
1631 for (SampleProfileMap::iterator I = Profiles.begin(), E = Profiles.end();
1632 I != E; ++I) {
1633 sampleprof_error Result = sampleprof_error::success;
1634 FunctionSamples Remapped =
1635 Remapper ? remapSamples(Samples: I->second, Remapper&: *Remapper, Error&: Result)
1636 : FunctionSamples();
1637 FunctionSamples &Samples = Remapper ? Remapped : I->second;
1638 SampleContext FContext = Samples.getContext();
1639 mergeSampleProfErrors(Accumulator&: Result,
1640 Result: ProfileMap[FContext].merge(Other: Samples, Weight: Input.Weight));
1641 if (Result != sampleprof_error::success) {
1642 std::error_code EC = make_error_code(E: Result);
1643 handleMergeWriterError(E: errorCodeToError(EC), WhenceFile: Input.Filename,
1644 WhenceFunction: FContext.toString());
1645 }
1646 }
1647
1648 if (!DropProfileSymbolList) {
1649 std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
1650 Reader->getProfileSymbolList();
1651 if (ReaderList)
1652 WriterList.merge(List: *ReaderList);
1653 }
1654 }
1655
1656 if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) {
1657 // Use threshold calculated from profile summary unless specified.
1658 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1659 auto Summary = Builder.computeSummaryForProfiles(Profiles: ProfileMap);
1660 uint64_t SampleProfColdThreshold =
1661 ProfileSummaryBuilder::getColdCountThreshold(
1662 DS: (Summary->getDetailedSummary()));
1663
1664 // Trim and merge cold context profile using cold threshold above;
1665 SampleContextTrimmer(ProfileMap)
1666 .trimAndMergeColdContextProfiles(
1667 ColdCountThreshold: SampleProfColdThreshold, TrimColdContext: SampleTrimColdContext,
1668 MergeColdContext: SampleMergeColdContext, ColdContextFrameLength: SampleColdContextFrameDepth, TrimBaseProfileOnly: false);
1669 }
1670
1671 if (ProfileLayout == llvm::sampleprof::SPL_Flat) {
1672 ProfileConverter::flattenProfile(ProfileMap, ProfileIsCS: FunctionSamples::ProfileIsCS);
1673 ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1674 } else if (ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Nest) {
1675 ProfileConverter CSConverter(ProfileMap);
1676 CSConverter.convertCSProfiles();
1677 ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1678 }
1679
1680 filterFunctions(ProfileMap);
1681
1682 auto WriterOrErr =
1683 SampleProfileWriter::create(Filename: OutputFilename, Format: FormatMap[OutputFormat]);
1684 if (std::error_code EC = WriterOrErr.getError())
1685 exitWithErrorCode(EC, Whence: OutputFilename);
1686
1687 auto Writer = std::move(WriterOrErr.get());
1688 // WriterList will have StringRef refering to string in Buffer.
1689 // Make sure Buffer lives as long as WriterList.
1690 auto Buffer = getInputFileBuf(InputFile: ProfileSymbolListFile);
1691 handleExtBinaryWriter(Writer&: *Writer, OutputFormat, Buffer: Buffer.get(), WriterList,
1692 CompressAllSections, UseMD5, GenPartialProfile);
1693
1694 // If OutputSizeLimit is 0 (default), it is the same as write().
1695 if (std::error_code EC =
1696 Writer->writeWithSizeLimit(ProfileMap, OutputSizeLimit))
1697 exitWithErrorCode(EC);
1698}
1699
1700static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
1701 StringRef WeightStr, FileName;
1702 std::tie(args&: WeightStr, args&: FileName) = WeightedFilename.split(Separator: ',');
1703
1704 uint64_t Weight;
1705 if (WeightStr.getAsInteger(Radix: 10, Result&: Weight) || Weight < 1)
1706 exitWithError(Message: "input weight must be a positive integer");
1707
1708 llvm::SmallString<128> ResolvedFileName;
1709 llvm::sys::fs::expand_tilde(path: FileName, output&: ResolvedFileName);
1710
1711 return {.Filename: std::string(ResolvedFileName), .Weight: Weight};
1712}
1713
1714static void addWeightedInput(WeightedFileVector &WNI, const WeightedFile &WF) {
1715 StringRef Filename = WF.Filename;
1716 uint64_t Weight = WF.Weight;
1717
1718 // If it's STDIN just pass it on.
1719 if (Filename == "-") {
1720 WNI.push_back(Elt: {.Filename: std::string(Filename), .Weight: Weight});
1721 return;
1722 }
1723
1724 llvm::sys::fs::file_status Status;
1725 llvm::sys::fs::status(path: Filename, result&: Status);
1726 if (!llvm::sys::fs::exists(status: Status))
1727 exitWithErrorCode(EC: make_error_code(E: errc::no_such_file_or_directory),
1728 Whence: Filename);
1729 // If it's a source file, collect it.
1730 if (llvm::sys::fs::is_regular_file(status: Status)) {
1731 WNI.push_back(Elt: {.Filename: std::string(Filename), .Weight: Weight});
1732 return;
1733 }
1734
1735 if (llvm::sys::fs::is_directory(status: Status)) {
1736 std::error_code EC;
1737 for (llvm::sys::fs::recursive_directory_iterator F(Filename, EC), E;
1738 F != E && !EC; F.increment(ec&: EC)) {
1739 if (llvm::sys::fs::is_regular_file(Path: F->path())) {
1740 addWeightedInput(WNI, WF: {.Filename: F->path(), .Weight: Weight});
1741 }
1742 }
1743 if (EC)
1744 exitWithErrorCode(EC, Whence: Filename);
1745 }
1746}
1747
1748static void parseInputFilenamesFile(MemoryBuffer *Buffer,
1749 WeightedFileVector &WFV) {
1750 if (!Buffer)
1751 return;
1752
1753 SmallVector<StringRef, 8> Entries;
1754 StringRef Data = Buffer->getBuffer();
1755 Data.split(A&: Entries, Separator: '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1756 for (const StringRef &FileWeightEntry : Entries) {
1757 StringRef SanitizedEntry = FileWeightEntry.trim(Chars: " \t\v\f\r");
1758 // Skip comments.
1759 if (SanitizedEntry.starts_with(Prefix: "#"))
1760 continue;
1761 // If there's no comma, it's an unweighted profile.
1762 else if (!SanitizedEntry.contains(C: ','))
1763 addWeightedInput(WNI&: WFV, WF: {.Filename: std::string(SanitizedEntry), .Weight: 1});
1764 else
1765 addWeightedInput(WNI&: WFV, WF: parseWeightedFile(WeightedFilename: SanitizedEntry));
1766 }
1767}
1768
1769static int merge_main(StringRef ProgName) {
1770 WeightedFileVector WeightedInputs;
1771 for (StringRef Filename : InputFilenames)
1772 addWeightedInput(WNI&: WeightedInputs, WF: {.Filename: std::string(Filename), .Weight: 1});
1773 for (StringRef WeightedFilename : WeightedInputFilenames)
1774 addWeightedInput(WNI&: WeightedInputs, WF: parseWeightedFile(WeightedFilename));
1775
1776 // Make sure that the file buffer stays alive for the duration of the
1777 // weighted input vector's lifetime.
1778 auto Buffer = getInputFileBuf(InputFile: InputFilenamesFile);
1779 parseInputFilenamesFile(Buffer: Buffer.get(), WFV&: WeightedInputs);
1780
1781 if (WeightedInputs.empty())
1782 exitWithError(Message: "no input files specified. See " + ProgName + " merge -help");
1783
1784 if (DumpInputFileList) {
1785 for (auto &WF : WeightedInputs)
1786 outs() << WF.Weight << "," << WF.Filename << "\n";
1787 return 0;
1788 }
1789
1790 std::unique_ptr<SymbolRemapper> Remapper;
1791 if (!RemappingFile.empty())
1792 Remapper = SymbolRemapper::create(InputFile: RemappingFile);
1793
1794 if (!SupplInstrWithSample.empty()) {
1795 if (ProfileKind != instr)
1796 exitWithError(
1797 Message: "-supplement-instr-with-sample can only work with -instr. ");
1798
1799 supplementInstrProfile(Inputs: WeightedInputs, SampleFilename: SupplInstrWithSample, OutputSparse,
1800 SupplMinSizeThreshold, ZeroCounterThreshold,
1801 InstrProfColdThreshold);
1802 return 0;
1803 }
1804
1805 if (ProfileKind == instr)
1806 mergeInstrProfile(Inputs: WeightedInputs, Remapper: Remapper.get(), MaxDbgCorrelationWarnings,
1807 ProfiledBinary);
1808 else
1809 mergeSampleProfile(Inputs: WeightedInputs, Remapper: Remapper.get(), ProfileSymbolListFile,
1810 OutputSizeLimit);
1811 return 0;
1812}
1813
1814/// Computer the overlap b/w profile BaseFilename and profile TestFilename.
1815static void overlapInstrProfile(const std::string &BaseFilename,
1816 const std::string &TestFilename,
1817 const OverlapFuncFilters &FuncFilter,
1818 raw_fd_ostream &OS, bool IsCS) {
1819 std::mutex ErrorLock;
1820 SmallSet<instrprof_error, 4> WriterErrorCodes;
1821 WriterContext Context(false, ErrorLock, WriterErrorCodes);
1822 WeightedFile WeightedInput{.Filename: BaseFilename, .Weight: 1};
1823 OverlapStats Overlap;
1824 Error E = Overlap.accumulateCounts(BaseFilename, TestFilename, IsCS);
1825 if (E)
1826 exitWithError(E: std::move(E), Whence: "error in getting profile count sums");
1827 if (Overlap.Base.CountSum < 1.0f) {
1828 OS << "Sum of edge counts for profile " << BaseFilename << " is 0.\n";
1829 exit(status: 0);
1830 }
1831 if (Overlap.Test.CountSum < 1.0f) {
1832 OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n";
1833 exit(status: 0);
1834 }
1835 loadInput(Input: WeightedInput, Remapper: nullptr, Correlator: nullptr, /*ProfiledBinary=*/"", WC: &Context);
1836 overlapInput(BaseFilename, TestFilename, WC: &Context, Overlap, FuncFilter, OS,
1837 IsCS);
1838 Overlap.dump(OS);
1839}
1840
1841namespace {
1842struct SampleOverlapStats {
1843 SampleContext BaseName;
1844 SampleContext TestName;
1845 // Number of overlap units
1846 uint64_t OverlapCount = 0;
1847 // Total samples of overlap units
1848 uint64_t OverlapSample = 0;
1849 // Number of and total samples of units that only present in base or test
1850 // profile
1851 uint64_t BaseUniqueCount = 0;
1852 uint64_t BaseUniqueSample = 0;
1853 uint64_t TestUniqueCount = 0;
1854 uint64_t TestUniqueSample = 0;
1855 // Number of units and total samples in base or test profile
1856 uint64_t BaseCount = 0;
1857 uint64_t BaseSample = 0;
1858 uint64_t TestCount = 0;
1859 uint64_t TestSample = 0;
1860 // Number of and total samples of units that present in at least one profile
1861 uint64_t UnionCount = 0;
1862 uint64_t UnionSample = 0;
1863 // Weighted similarity
1864 double Similarity = 0.0;
1865 // For SampleOverlapStats instances representing functions, weights of the
1866 // function in base and test profiles
1867 double BaseWeight = 0.0;
1868 double TestWeight = 0.0;
1869
1870 SampleOverlapStats() = default;
1871};
1872} // end anonymous namespace
1873
1874namespace {
1875struct FuncSampleStats {
1876 uint64_t SampleSum = 0;
1877 uint64_t MaxSample = 0;
1878 uint64_t HotBlockCount = 0;
1879 FuncSampleStats() = default;
1880 FuncSampleStats(uint64_t SampleSum, uint64_t MaxSample,
1881 uint64_t HotBlockCount)
1882 : SampleSum(SampleSum), MaxSample(MaxSample),
1883 HotBlockCount(HotBlockCount) {}
1884};
1885} // end anonymous namespace
1886
1887namespace {
1888enum MatchStatus { MS_Match, MS_FirstUnique, MS_SecondUnique, MS_None };
1889
1890// Class for updating merging steps for two sorted maps. The class should be
1891// instantiated with a map iterator type.
1892template <class T> class MatchStep {
1893public:
1894 MatchStep() = delete;
1895
1896 MatchStep(T FirstIter, T FirstEnd, T SecondIter, T SecondEnd)
1897 : FirstIter(FirstIter), FirstEnd(FirstEnd), SecondIter(SecondIter),
1898 SecondEnd(SecondEnd), Status(MS_None) {}
1899
1900 bool areBothFinished() const {
1901 return (FirstIter == FirstEnd && SecondIter == SecondEnd);
1902 }
1903
1904 bool isFirstFinished() const { return FirstIter == FirstEnd; }
1905
1906 bool isSecondFinished() const { return SecondIter == SecondEnd; }
1907
1908 /// Advance one step based on the previous match status unless the previous
1909 /// status is MS_None. Then update Status based on the comparison between two
1910 /// container iterators at the current step. If the previous status is
1911 /// MS_None, it means two iterators are at the beginning and no comparison has
1912 /// been made, so we simply update Status without advancing the iterators.
1913 void updateOneStep();
1914
1915 T getFirstIter() const { return FirstIter; }
1916
1917 T getSecondIter() const { return SecondIter; }
1918
1919 MatchStatus getMatchStatus() const { return Status; }
1920
1921private:
1922 // Current iterator and end iterator of the first container.
1923 T FirstIter;
1924 T FirstEnd;
1925 // Current iterator and end iterator of the second container.
1926 T SecondIter;
1927 T SecondEnd;
1928 // Match status of the current step.
1929 MatchStatus Status;
1930};
1931} // end anonymous namespace
1932
1933template <class T> void MatchStep<T>::updateOneStep() {
1934 switch (Status) {
1935 case MS_Match:
1936 ++FirstIter;
1937 ++SecondIter;
1938 break;
1939 case MS_FirstUnique:
1940 ++FirstIter;
1941 break;
1942 case MS_SecondUnique:
1943 ++SecondIter;
1944 break;
1945 case MS_None:
1946 break;
1947 }
1948
1949 // Update Status according to iterators at the current step.
1950 if (areBothFinished())
1951 return;
1952 if (FirstIter != FirstEnd &&
1953 (SecondIter == SecondEnd || FirstIter->first < SecondIter->first))
1954 Status = MS_FirstUnique;
1955 else if (SecondIter != SecondEnd &&
1956 (FirstIter == FirstEnd || SecondIter->first < FirstIter->first))
1957 Status = MS_SecondUnique;
1958 else
1959 Status = MS_Match;
1960}
1961
1962// Return the sum of line/block samples, the max line/block sample, and the
1963// number of line/block samples above the given threshold in a function
1964// including its inlinees.
1965static void getFuncSampleStats(const sampleprof::FunctionSamples &Func,
1966 FuncSampleStats &FuncStats,
1967 uint64_t HotThreshold) {
1968 for (const auto &L : Func.getBodySamples()) {
1969 uint64_t Sample = L.second.getSamples();
1970 FuncStats.SampleSum += Sample;
1971 FuncStats.MaxSample = std::max(a: FuncStats.MaxSample, b: Sample);
1972 if (Sample >= HotThreshold)
1973 ++FuncStats.HotBlockCount;
1974 }
1975
1976 for (const auto &C : Func.getCallsiteSamples()) {
1977 for (const auto &F : C.second)
1978 getFuncSampleStats(Func: F.second, FuncStats, HotThreshold);
1979 }
1980}
1981
1982/// Predicate that determines if a function is hot with a given threshold. We
1983/// keep it separate from its callsites for possible extension in the future.
1984static bool isFunctionHot(const FuncSampleStats &FuncStats,
1985 uint64_t HotThreshold) {
1986 // We intentionally compare the maximum sample count in a function with the
1987 // HotThreshold to get an approximate determination on hot functions.
1988 return (FuncStats.MaxSample >= HotThreshold);
1989}
1990
1991namespace {
1992class SampleOverlapAggregator {
1993public:
1994 SampleOverlapAggregator(const std::string &BaseFilename,
1995 const std::string &TestFilename,
1996 double LowSimilarityThreshold, double Epsilon,
1997 const OverlapFuncFilters &FuncFilter)
1998 : BaseFilename(BaseFilename), TestFilename(TestFilename),
1999 LowSimilarityThreshold(LowSimilarityThreshold), Epsilon(Epsilon),
2000 FuncFilter(FuncFilter) {}
2001
2002 /// Detect 0-sample input profile and report to output stream. This interface
2003 /// should be called after loadProfiles().
2004 bool detectZeroSampleProfile(raw_fd_ostream &OS) const;
2005
2006 /// Write out function-level similarity statistics for functions specified by
2007 /// options --function, --value-cutoff, and --similarity-cutoff.
2008 void dumpFuncSimilarity(raw_fd_ostream &OS) const;
2009
2010 /// Write out program-level similarity and overlap statistics.
2011 void dumpProgramSummary(raw_fd_ostream &OS) const;
2012
2013 /// Write out hot-function and hot-block statistics for base_profile,
2014 /// test_profile, and their overlap. For both cases, the overlap HO is
2015 /// calculated as follows:
2016 /// Given the number of functions (or blocks) that are hot in both profiles
2017 /// HCommon and the number of functions (or blocks) that are hot in at
2018 /// least one profile HUnion, HO = HCommon / HUnion.
2019 void dumpHotFuncAndBlockOverlap(raw_fd_ostream &OS) const;
2020
2021 /// This function tries matching functions in base and test profiles. For each
2022 /// pair of matched functions, it aggregates the function-level
2023 /// similarity into a profile-level similarity. It also dump function-level
2024 /// similarity information of functions specified by --function,
2025 /// --value-cutoff, and --similarity-cutoff options. The program-level
2026 /// similarity PS is computed as follows:
2027 /// Given function-level similarity FS(A) for all function A, the
2028 /// weight of function A in base profile WB(A), and the weight of function
2029 /// A in test profile WT(A), compute PS(base_profile, test_profile) =
2030 /// sum_A(FS(A) * avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0
2031 /// meaning no-overlap.
2032 void computeSampleProfileOverlap(raw_fd_ostream &OS);
2033
2034 /// Initialize ProfOverlap with the sum of samples in base and test
2035 /// profiles. This function also computes and keeps the sum of samples and
2036 /// max sample counts of each function in BaseStats and TestStats for later
2037 /// use to avoid re-computations.
2038 void initializeSampleProfileOverlap();
2039
2040 /// Load profiles specified by BaseFilename and TestFilename.
2041 std::error_code loadProfiles();
2042
2043 using FuncSampleStatsMap =
2044 std::unordered_map<SampleContext, FuncSampleStats, SampleContext::Hash>;
2045
2046private:
2047 SampleOverlapStats ProfOverlap;
2048 SampleOverlapStats HotFuncOverlap;
2049 SampleOverlapStats HotBlockOverlap;
2050 std::string BaseFilename;
2051 std::string TestFilename;
2052 std::unique_ptr<sampleprof::SampleProfileReader> BaseReader;
2053 std::unique_ptr<sampleprof::SampleProfileReader> TestReader;
2054 // BaseStats and TestStats hold FuncSampleStats for each function, with
2055 // function name as the key.
2056 FuncSampleStatsMap BaseStats;
2057 FuncSampleStatsMap TestStats;
2058 // Low similarity threshold in floating point number
2059 double LowSimilarityThreshold;
2060 // Block samples above BaseHotThreshold or TestHotThreshold are considered hot
2061 // for tracking hot blocks.
2062 uint64_t BaseHotThreshold;
2063 uint64_t TestHotThreshold;
2064 // A small threshold used to round the results of floating point accumulations
2065 // to resolve imprecision.
2066 const double Epsilon;
2067 std::multimap<double, SampleOverlapStats, std::greater<double>>
2068 FuncSimilarityDump;
2069 // FuncFilter carries specifications in options --value-cutoff and
2070 // --function.
2071 OverlapFuncFilters FuncFilter;
2072 // Column offsets for printing the function-level details table.
2073 static const unsigned int TestWeightCol = 15;
2074 static const unsigned int SimilarityCol = 30;
2075 static const unsigned int OverlapCol = 43;
2076 static const unsigned int BaseUniqueCol = 53;
2077 static const unsigned int TestUniqueCol = 67;
2078 static const unsigned int BaseSampleCol = 81;
2079 static const unsigned int TestSampleCol = 96;
2080 static const unsigned int FuncNameCol = 111;
2081
2082 /// Return a similarity of two line/block sample counters in the same
2083 /// function in base and test profiles. The line/block-similarity BS(i) is
2084 /// computed as follows:
2085 /// For an offsets i, given the sample count at i in base profile BB(i),
2086 /// the sample count at i in test profile BT(i), the sum of sample counts
2087 /// in this function in base profile SB, and the sum of sample counts in
2088 /// this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB -
2089 /// BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap.
2090 double computeBlockSimilarity(uint64_t BaseSample, uint64_t TestSample,
2091 const SampleOverlapStats &FuncOverlap) const;
2092
2093 void updateHotBlockOverlap(uint64_t BaseSample, uint64_t TestSample,
2094 uint64_t HotBlockCount);
2095
2096 void getHotFunctions(const FuncSampleStatsMap &ProfStats,
2097 FuncSampleStatsMap &HotFunc,
2098 uint64_t HotThreshold) const;
2099
2100 void computeHotFuncOverlap();
2101
2102 /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
2103 /// Difference for two sample units in a matched function according to the
2104 /// given match status.
2105 void updateOverlapStatsForFunction(uint64_t BaseSample, uint64_t TestSample,
2106 uint64_t HotBlockCount,
2107 SampleOverlapStats &FuncOverlap,
2108 double &Difference, MatchStatus Status);
2109
2110 /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
2111 /// Difference for unmatched callees that only present in one profile in a
2112 /// matched caller function.
2113 void updateForUnmatchedCallee(const sampleprof::FunctionSamples &Func,
2114 SampleOverlapStats &FuncOverlap,
2115 double &Difference, MatchStatus Status);
2116
2117 /// This function updates sample overlap statistics of an overlap function in
2118 /// base and test profile. It also calculates a function-internal similarity
2119 /// FIS as follows:
2120 /// For offsets i that have samples in at least one profile in this
2121 /// function A, given BS(i) returned by computeBlockSimilarity(), compute
2122 /// FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with
2123 /// 0.0 meaning no overlap.
2124 double computeSampleFunctionInternalOverlap(
2125 const sampleprof::FunctionSamples &BaseFunc,
2126 const sampleprof::FunctionSamples &TestFunc,
2127 SampleOverlapStats &FuncOverlap);
2128
2129 /// Function-level similarity (FS) is a weighted value over function internal
2130 /// similarity (FIS). This function computes a function's FS from its FIS by
2131 /// applying the weight.
2132 double weightForFuncSimilarity(double FuncSimilarity, uint64_t BaseFuncSample,
2133 uint64_t TestFuncSample) const;
2134
2135 /// The function-level similarity FS(A) for a function A is computed as
2136 /// follows:
2137 /// Compute a function-internal similarity FIS(A) by
2138 /// computeSampleFunctionInternalOverlap(). Then, with the weight of
2139 /// function A in base profile WB(A), and the weight of function A in test
2140 /// profile WT(A), compute FS(A) = FIS(A) * (1.0 - fabs(WB(A) - WT(A)))
2141 /// ranging in [0.0f to 1.0f] with 0.0 meaning no overlap.
2142 double
2143 computeSampleFunctionOverlap(const sampleprof::FunctionSamples *BaseFunc,
2144 const sampleprof::FunctionSamples *TestFunc,
2145 SampleOverlapStats *FuncOverlap,
2146 uint64_t BaseFuncSample,
2147 uint64_t TestFuncSample);
2148
2149 /// Profile-level similarity (PS) is a weighted aggregate over function-level
2150 /// similarities (FS). This method weights the FS value by the function
2151 /// weights in the base and test profiles for the aggregation.
2152 double weightByImportance(double FuncSimilarity, uint64_t BaseFuncSample,
2153 uint64_t TestFuncSample) const;
2154};
2155} // end anonymous namespace
2156
2157bool SampleOverlapAggregator::detectZeroSampleProfile(
2158 raw_fd_ostream &OS) const {
2159 bool HaveZeroSample = false;
2160 if (ProfOverlap.BaseSample == 0) {
2161 OS << "Sum of sample counts for profile " << BaseFilename << " is 0.\n";
2162 HaveZeroSample = true;
2163 }
2164 if (ProfOverlap.TestSample == 0) {
2165 OS << "Sum of sample counts for profile " << TestFilename << " is 0.\n";
2166 HaveZeroSample = true;
2167 }
2168 return HaveZeroSample;
2169}
2170
2171double SampleOverlapAggregator::computeBlockSimilarity(
2172 uint64_t BaseSample, uint64_t TestSample,
2173 const SampleOverlapStats &FuncOverlap) const {
2174 double BaseFrac = 0.0;
2175 double TestFrac = 0.0;
2176 if (FuncOverlap.BaseSample > 0)
2177 BaseFrac = static_cast<double>(BaseSample) / FuncOverlap.BaseSample;
2178 if (FuncOverlap.TestSample > 0)
2179 TestFrac = static_cast<double>(TestSample) / FuncOverlap.TestSample;
2180 return 1.0 - std::fabs(x: BaseFrac - TestFrac);
2181}
2182
2183void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample,
2184 uint64_t TestSample,
2185 uint64_t HotBlockCount) {
2186 bool IsBaseHot = (BaseSample >= BaseHotThreshold);
2187 bool IsTestHot = (TestSample >= TestHotThreshold);
2188 if (!IsBaseHot && !IsTestHot)
2189 return;
2190
2191 HotBlockOverlap.UnionCount += HotBlockCount;
2192 if (IsBaseHot)
2193 HotBlockOverlap.BaseCount += HotBlockCount;
2194 if (IsTestHot)
2195 HotBlockOverlap.TestCount += HotBlockCount;
2196 if (IsBaseHot && IsTestHot)
2197 HotBlockOverlap.OverlapCount += HotBlockCount;
2198}
2199
2200void SampleOverlapAggregator::getHotFunctions(
2201 const FuncSampleStatsMap &ProfStats, FuncSampleStatsMap &HotFunc,
2202 uint64_t HotThreshold) const {
2203 for (const auto &F : ProfStats) {
2204 if (isFunctionHot(FuncStats: F.second, HotThreshold))
2205 HotFunc.emplace(args: F.first, args: F.second);
2206 }
2207}
2208
2209void SampleOverlapAggregator::computeHotFuncOverlap() {
2210 FuncSampleStatsMap BaseHotFunc;
2211 getHotFunctions(ProfStats: BaseStats, HotFunc&: BaseHotFunc, HotThreshold: BaseHotThreshold);
2212 HotFuncOverlap.BaseCount = BaseHotFunc.size();
2213
2214 FuncSampleStatsMap TestHotFunc;
2215 getHotFunctions(ProfStats: TestStats, HotFunc&: TestHotFunc, HotThreshold: TestHotThreshold);
2216 HotFuncOverlap.TestCount = TestHotFunc.size();
2217 HotFuncOverlap.UnionCount = HotFuncOverlap.TestCount;
2218
2219 for (const auto &F : BaseHotFunc) {
2220 if (TestHotFunc.count(x: F.first))
2221 ++HotFuncOverlap.OverlapCount;
2222 else
2223 ++HotFuncOverlap.UnionCount;
2224 }
2225}
2226
2227void SampleOverlapAggregator::updateOverlapStatsForFunction(
2228 uint64_t BaseSample, uint64_t TestSample, uint64_t HotBlockCount,
2229 SampleOverlapStats &FuncOverlap, double &Difference, MatchStatus Status) {
2230 assert(Status != MS_None &&
2231 "Match status should be updated before updating overlap statistics");
2232 if (Status == MS_FirstUnique) {
2233 TestSample = 0;
2234 FuncOverlap.BaseUniqueSample += BaseSample;
2235 } else if (Status == MS_SecondUnique) {
2236 BaseSample = 0;
2237 FuncOverlap.TestUniqueSample += TestSample;
2238 } else {
2239 ++FuncOverlap.OverlapCount;
2240 }
2241
2242 FuncOverlap.UnionSample += std::max(a: BaseSample, b: TestSample);
2243 FuncOverlap.OverlapSample += std::min(a: BaseSample, b: TestSample);
2244 Difference +=
2245 1.0 - computeBlockSimilarity(BaseSample, TestSample, FuncOverlap);
2246 updateHotBlockOverlap(BaseSample, TestSample, HotBlockCount);
2247}
2248
2249void SampleOverlapAggregator::updateForUnmatchedCallee(
2250 const sampleprof::FunctionSamples &Func, SampleOverlapStats &FuncOverlap,
2251 double &Difference, MatchStatus Status) {
2252 assert((Status == MS_FirstUnique || Status == MS_SecondUnique) &&
2253 "Status must be either of the two unmatched cases");
2254 FuncSampleStats FuncStats;
2255 if (Status == MS_FirstUnique) {
2256 getFuncSampleStats(Func, FuncStats, HotThreshold: BaseHotThreshold);
2257 updateOverlapStatsForFunction(BaseSample: FuncStats.SampleSum, TestSample: 0,
2258 HotBlockCount: FuncStats.HotBlockCount, FuncOverlap,
2259 Difference, Status);
2260 } else {
2261 getFuncSampleStats(Func, FuncStats, HotThreshold: TestHotThreshold);
2262 updateOverlapStatsForFunction(BaseSample: 0, TestSample: FuncStats.SampleSum,
2263 HotBlockCount: FuncStats.HotBlockCount, FuncOverlap,
2264 Difference, Status);
2265 }
2266}
2267
2268double SampleOverlapAggregator::computeSampleFunctionInternalOverlap(
2269 const sampleprof::FunctionSamples &BaseFunc,
2270 const sampleprof::FunctionSamples &TestFunc,
2271 SampleOverlapStats &FuncOverlap) {
2272
2273 using namespace sampleprof;
2274
2275 double Difference = 0;
2276
2277 // Accumulate Difference for regular line/block samples in the function.
2278 // We match them through sort-merge join algorithm because
2279 // FunctionSamples::getBodySamples() returns a map of sample counters ordered
2280 // by their offsets.
2281 MatchStep<BodySampleMap::const_iterator> BlockIterStep(
2282 BaseFunc.getBodySamples().cbegin(), BaseFunc.getBodySamples().cend(),
2283 TestFunc.getBodySamples().cbegin(), TestFunc.getBodySamples().cend());
2284 BlockIterStep.updateOneStep();
2285 while (!BlockIterStep.areBothFinished()) {
2286 uint64_t BaseSample =
2287 BlockIterStep.isFirstFinished()
2288 ? 0
2289 : BlockIterStep.getFirstIter()->second.getSamples();
2290 uint64_t TestSample =
2291 BlockIterStep.isSecondFinished()
2292 ? 0
2293 : BlockIterStep.getSecondIter()->second.getSamples();
2294 updateOverlapStatsForFunction(BaseSample, TestSample, HotBlockCount: 1, FuncOverlap,
2295 Difference, Status: BlockIterStep.getMatchStatus());
2296
2297 BlockIterStep.updateOneStep();
2298 }
2299
2300 // Accumulate Difference for callsite lines in the function. We match
2301 // them through sort-merge algorithm because
2302 // FunctionSamples::getCallsiteSamples() returns a map of callsite records
2303 // ordered by their offsets.
2304 MatchStep<CallsiteSampleMap::const_iterator> CallsiteIterStep(
2305 BaseFunc.getCallsiteSamples().cbegin(),
2306 BaseFunc.getCallsiteSamples().cend(),
2307 TestFunc.getCallsiteSamples().cbegin(),
2308 TestFunc.getCallsiteSamples().cend());
2309 CallsiteIterStep.updateOneStep();
2310 while (!CallsiteIterStep.areBothFinished()) {
2311 MatchStatus CallsiteStepStatus = CallsiteIterStep.getMatchStatus();
2312 assert(CallsiteStepStatus != MS_None &&
2313 "Match status should be updated before entering loop body");
2314
2315 if (CallsiteStepStatus != MS_Match) {
2316 auto Callsite = (CallsiteStepStatus == MS_FirstUnique)
2317 ? CallsiteIterStep.getFirstIter()
2318 : CallsiteIterStep.getSecondIter();
2319 for (const auto &F : Callsite->second)
2320 updateForUnmatchedCallee(Func: F.second, FuncOverlap, Difference,
2321 Status: CallsiteStepStatus);
2322 } else {
2323 // There may be multiple inlinees at the same offset, so we need to try
2324 // matching all of them. This match is implemented through sort-merge
2325 // algorithm because callsite records at the same offset are ordered by
2326 // function names.
2327 MatchStep<FunctionSamplesMap::const_iterator> CalleeIterStep(
2328 CallsiteIterStep.getFirstIter()->second.cbegin(),
2329 CallsiteIterStep.getFirstIter()->second.cend(),
2330 CallsiteIterStep.getSecondIter()->second.cbegin(),
2331 CallsiteIterStep.getSecondIter()->second.cend());
2332 CalleeIterStep.updateOneStep();
2333 while (!CalleeIterStep.areBothFinished()) {
2334 MatchStatus CalleeStepStatus = CalleeIterStep.getMatchStatus();
2335 if (CalleeStepStatus != MS_Match) {
2336 auto Callee = (CalleeStepStatus == MS_FirstUnique)
2337 ? CalleeIterStep.getFirstIter()
2338 : CalleeIterStep.getSecondIter();
2339 updateForUnmatchedCallee(Func: Callee->second, FuncOverlap, Difference,
2340 Status: CalleeStepStatus);
2341 } else {
2342 // An inlined function can contain other inlinees inside, so compute
2343 // the Difference recursively.
2344 Difference += 2.0 - 2 * computeSampleFunctionInternalOverlap(
2345 BaseFunc: CalleeIterStep.getFirstIter()->second,
2346 TestFunc: CalleeIterStep.getSecondIter()->second,
2347 FuncOverlap);
2348 }
2349 CalleeIterStep.updateOneStep();
2350 }
2351 }
2352 CallsiteIterStep.updateOneStep();
2353 }
2354
2355 // Difference reflects the total differences of line/block samples in this
2356 // function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to
2357 // reflect the similarity between function profiles in [0.0f to 1.0f].
2358 return (2.0 - Difference) / 2;
2359}
2360
2361double SampleOverlapAggregator::weightForFuncSimilarity(
2362 double FuncInternalSimilarity, uint64_t BaseFuncSample,
2363 uint64_t TestFuncSample) const {
2364 // Compute the weight as the distance between the function weights in two
2365 // profiles.
2366 double BaseFrac = 0.0;
2367 double TestFrac = 0.0;
2368 assert(ProfOverlap.BaseSample > 0 &&
2369 "Total samples in base profile should be greater than 0");
2370 BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample;
2371 assert(ProfOverlap.TestSample > 0 &&
2372 "Total samples in test profile should be greater than 0");
2373 TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample;
2374 double WeightDistance = std::fabs(x: BaseFrac - TestFrac);
2375
2376 // Take WeightDistance into the similarity.
2377 return FuncInternalSimilarity * (1 - WeightDistance);
2378}
2379
2380double
2381SampleOverlapAggregator::weightByImportance(double FuncSimilarity,
2382 uint64_t BaseFuncSample,
2383 uint64_t TestFuncSample) const {
2384
2385 double BaseFrac = 0.0;
2386 double TestFrac = 0.0;
2387 assert(ProfOverlap.BaseSample > 0 &&
2388 "Total samples in base profile should be greater than 0");
2389 BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample / 2.0;
2390 assert(ProfOverlap.TestSample > 0 &&
2391 "Total samples in test profile should be greater than 0");
2392 TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample / 2.0;
2393 return FuncSimilarity * (BaseFrac + TestFrac);
2394}
2395
2396double SampleOverlapAggregator::computeSampleFunctionOverlap(
2397 const sampleprof::FunctionSamples *BaseFunc,
2398 const sampleprof::FunctionSamples *TestFunc,
2399 SampleOverlapStats *FuncOverlap, uint64_t BaseFuncSample,
2400 uint64_t TestFuncSample) {
2401 // Default function internal similarity before weighted, meaning two functions
2402 // has no overlap.
2403 const double DefaultFuncInternalSimilarity = 0;
2404 double FuncSimilarity;
2405 double FuncInternalSimilarity;
2406
2407 // If BaseFunc or TestFunc is nullptr, it means the functions do not overlap.
2408 // In this case, we use DefaultFuncInternalSimilarity as the function internal
2409 // similarity.
2410 if (!BaseFunc || !TestFunc) {
2411 FuncInternalSimilarity = DefaultFuncInternalSimilarity;
2412 } else {
2413 assert(FuncOverlap != nullptr &&
2414 "FuncOverlap should be provided in this case");
2415 FuncInternalSimilarity = computeSampleFunctionInternalOverlap(
2416 BaseFunc: *BaseFunc, TestFunc: *TestFunc, FuncOverlap&: *FuncOverlap);
2417 // Now, FuncInternalSimilarity may be a little less than 0 due to
2418 // imprecision of floating point accumulations. Make it zero if the
2419 // difference is below Epsilon.
2420 FuncInternalSimilarity = (std::fabs(x: FuncInternalSimilarity - 0) < Epsilon)
2421 ? 0
2422 : FuncInternalSimilarity;
2423 }
2424 FuncSimilarity = weightForFuncSimilarity(FuncInternalSimilarity,
2425 BaseFuncSample, TestFuncSample);
2426 return FuncSimilarity;
2427}
2428
2429void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
2430 using namespace sampleprof;
2431
2432 std::unordered_map<SampleContext, const FunctionSamples *,
2433 SampleContext::Hash>
2434 BaseFuncProf;
2435 const auto &BaseProfiles = BaseReader->getProfiles();
2436 for (const auto &BaseFunc : BaseProfiles) {
2437 BaseFuncProf.emplace(args&: BaseFunc.second.getContext(), args: &(BaseFunc.second));
2438 }
2439 ProfOverlap.UnionCount = BaseFuncProf.size();
2440
2441 const auto &TestProfiles = TestReader->getProfiles();
2442 for (const auto &TestFunc : TestProfiles) {
2443 SampleOverlapStats FuncOverlap;
2444 FuncOverlap.TestName = TestFunc.second.getContext();
2445 assert(TestStats.count(FuncOverlap.TestName) &&
2446 "TestStats should have records for all functions in test profile "
2447 "except inlinees");
2448 FuncOverlap.TestSample = TestStats[FuncOverlap.TestName].SampleSum;
2449
2450 bool Matched = false;
2451 const auto Match = BaseFuncProf.find(x: FuncOverlap.TestName);
2452 if (Match == BaseFuncProf.end()) {
2453 const FuncSampleStats &FuncStats = TestStats[FuncOverlap.TestName];
2454 ++ProfOverlap.TestUniqueCount;
2455 ProfOverlap.TestUniqueSample += FuncStats.SampleSum;
2456 FuncOverlap.TestUniqueSample = FuncStats.SampleSum;
2457
2458 updateHotBlockOverlap(BaseSample: 0, TestSample: FuncStats.SampleSum, HotBlockCount: FuncStats.HotBlockCount);
2459
2460 double FuncSimilarity = computeSampleFunctionOverlap(
2461 BaseFunc: nullptr, TestFunc: nullptr, FuncOverlap: nullptr, BaseFuncSample: 0, TestFuncSample: FuncStats.SampleSum);
2462 ProfOverlap.Similarity +=
2463 weightByImportance(FuncSimilarity, BaseFuncSample: 0, TestFuncSample: FuncStats.SampleSum);
2464
2465 ++ProfOverlap.UnionCount;
2466 ProfOverlap.UnionSample += FuncStats.SampleSum;
2467 } else {
2468 ++ProfOverlap.OverlapCount;
2469
2470 // Two functions match with each other. Compute function-level overlap and
2471 // aggregate them into profile-level overlap.
2472 FuncOverlap.BaseName = Match->second->getContext();
2473 assert(BaseStats.count(FuncOverlap.BaseName) &&
2474 "BaseStats should have records for all functions in base profile "
2475 "except inlinees");
2476 FuncOverlap.BaseSample = BaseStats[FuncOverlap.BaseName].SampleSum;
2477
2478 FuncOverlap.Similarity = computeSampleFunctionOverlap(
2479 BaseFunc: Match->second, TestFunc: &TestFunc.second, FuncOverlap: &FuncOverlap, BaseFuncSample: FuncOverlap.BaseSample,
2480 TestFuncSample: FuncOverlap.TestSample);
2481 ProfOverlap.Similarity +=
2482 weightByImportance(FuncSimilarity: FuncOverlap.Similarity, BaseFuncSample: FuncOverlap.BaseSample,
2483 TestFuncSample: FuncOverlap.TestSample);
2484 ProfOverlap.OverlapSample += FuncOverlap.OverlapSample;
2485 ProfOverlap.UnionSample += FuncOverlap.UnionSample;
2486
2487 // Accumulate the percentage of base unique and test unique samples into
2488 // ProfOverlap.
2489 ProfOverlap.BaseUniqueSample += FuncOverlap.BaseUniqueSample;
2490 ProfOverlap.TestUniqueSample += FuncOverlap.TestUniqueSample;
2491
2492 // Remove matched base functions for later reporting functions not found
2493 // in test profile.
2494 BaseFuncProf.erase(position: Match);
2495 Matched = true;
2496 }
2497
2498 // Print function-level similarity information if specified by options.
2499 assert(TestStats.count(FuncOverlap.TestName) &&
2500 "TestStats should have records for all functions in test profile "
2501 "except inlinees");
2502 if (TestStats[FuncOverlap.TestName].MaxSample >= FuncFilter.ValueCutoff ||
2503 (Matched && FuncOverlap.Similarity < LowSimilarityThreshold) ||
2504 (Matched && !FuncFilter.NameFilter.empty() &&
2505 FuncOverlap.BaseName.toString().find(str: FuncFilter.NameFilter) !=
2506 std::string::npos)) {
2507 assert(ProfOverlap.BaseSample > 0 &&
2508 "Total samples in base profile should be greater than 0");
2509 FuncOverlap.BaseWeight =
2510 static_cast<double>(FuncOverlap.BaseSample) / ProfOverlap.BaseSample;
2511 assert(ProfOverlap.TestSample > 0 &&
2512 "Total samples in test profile should be greater than 0");
2513 FuncOverlap.TestWeight =
2514 static_cast<double>(FuncOverlap.TestSample) / ProfOverlap.TestSample;
2515 FuncSimilarityDump.emplace(args&: FuncOverlap.BaseWeight, args&: FuncOverlap);
2516 }
2517 }
2518
2519 // Traverse through functions in base profile but not in test profile.
2520 for (const auto &F : BaseFuncProf) {
2521 assert(BaseStats.count(F.second->getContext()) &&
2522 "BaseStats should have records for all functions in base profile "
2523 "except inlinees");
2524 const FuncSampleStats &FuncStats = BaseStats[F.second->getContext()];
2525 ++ProfOverlap.BaseUniqueCount;
2526 ProfOverlap.BaseUniqueSample += FuncStats.SampleSum;
2527
2528 updateHotBlockOverlap(BaseSample: FuncStats.SampleSum, TestSample: 0, HotBlockCount: FuncStats.HotBlockCount);
2529
2530 double FuncSimilarity = computeSampleFunctionOverlap(
2531 BaseFunc: nullptr, TestFunc: nullptr, FuncOverlap: nullptr, BaseFuncSample: FuncStats.SampleSum, TestFuncSample: 0);
2532 ProfOverlap.Similarity +=
2533 weightByImportance(FuncSimilarity, BaseFuncSample: FuncStats.SampleSum, TestFuncSample: 0);
2534
2535 ProfOverlap.UnionSample += FuncStats.SampleSum;
2536 }
2537
2538 // Now, ProfSimilarity may be a little greater than 1 due to imprecision
2539 // of floating point accumulations. Make it 1.0 if the difference is below
2540 // Epsilon.
2541 ProfOverlap.Similarity = (std::fabs(x: ProfOverlap.Similarity - 1) < Epsilon)
2542 ? 1
2543 : ProfOverlap.Similarity;
2544
2545 computeHotFuncOverlap();
2546}
2547
2548void SampleOverlapAggregator::initializeSampleProfileOverlap() {
2549 const auto &BaseProf = BaseReader->getProfiles();
2550 for (const auto &I : BaseProf) {
2551 ++ProfOverlap.BaseCount;
2552 FuncSampleStats FuncStats;
2553 getFuncSampleStats(Func: I.second, FuncStats, HotThreshold: BaseHotThreshold);
2554 ProfOverlap.BaseSample += FuncStats.SampleSum;
2555 BaseStats.emplace(args&: I.second.getContext(), args&: FuncStats);
2556 }
2557
2558 const auto &TestProf = TestReader->getProfiles();
2559 for (const auto &I : TestProf) {
2560 ++ProfOverlap.TestCount;
2561 FuncSampleStats FuncStats;
2562 getFuncSampleStats(Func: I.second, FuncStats, HotThreshold: TestHotThreshold);
2563 ProfOverlap.TestSample += FuncStats.SampleSum;
2564 TestStats.emplace(args&: I.second.getContext(), args&: FuncStats);
2565 }
2566
2567 ProfOverlap.BaseName = StringRef(BaseFilename);
2568 ProfOverlap.TestName = StringRef(TestFilename);
2569}
2570
2571void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream &OS) const {
2572 using namespace sampleprof;
2573
2574 if (FuncSimilarityDump.empty())
2575 return;
2576
2577 formatted_raw_ostream FOS(OS);
2578 FOS << "Function-level details:\n";
2579 FOS << "Base weight";
2580 FOS.PadToColumn(NewCol: TestWeightCol);
2581 FOS << "Test weight";
2582 FOS.PadToColumn(NewCol: SimilarityCol);
2583 FOS << "Similarity";
2584 FOS.PadToColumn(NewCol: OverlapCol);
2585 FOS << "Overlap";
2586 FOS.PadToColumn(NewCol: BaseUniqueCol);
2587 FOS << "Base unique";
2588 FOS.PadToColumn(NewCol: TestUniqueCol);
2589 FOS << "Test unique";
2590 FOS.PadToColumn(NewCol: BaseSampleCol);
2591 FOS << "Base samples";
2592 FOS.PadToColumn(NewCol: TestSampleCol);
2593 FOS << "Test samples";
2594 FOS.PadToColumn(NewCol: FuncNameCol);
2595 FOS << "Function name\n";
2596 for (const auto &F : FuncSimilarityDump) {
2597 double OverlapPercent =
2598 F.second.UnionSample > 0
2599 ? static_cast<double>(F.second.OverlapSample) / F.second.UnionSample
2600 : 0;
2601 double BaseUniquePercent =
2602 F.second.BaseSample > 0
2603 ? static_cast<double>(F.second.BaseUniqueSample) /
2604 F.second.BaseSample
2605 : 0;
2606 double TestUniquePercent =
2607 F.second.TestSample > 0
2608 ? static_cast<double>(F.second.TestUniqueSample) /
2609 F.second.TestSample
2610 : 0;
2611
2612 FOS << format(Fmt: "%.2f%%", Vals: F.second.BaseWeight * 100);
2613 FOS.PadToColumn(NewCol: TestWeightCol);
2614 FOS << format(Fmt: "%.2f%%", Vals: F.second.TestWeight * 100);
2615 FOS.PadToColumn(NewCol: SimilarityCol);
2616 FOS << format(Fmt: "%.2f%%", Vals: F.second.Similarity * 100);
2617 FOS.PadToColumn(NewCol: OverlapCol);
2618 FOS << format(Fmt: "%.2f%%", Vals: OverlapPercent * 100);
2619 FOS.PadToColumn(NewCol: BaseUniqueCol);
2620 FOS << format(Fmt: "%.2f%%", Vals: BaseUniquePercent * 100);
2621 FOS.PadToColumn(NewCol: TestUniqueCol);
2622 FOS << format(Fmt: "%.2f%%", Vals: TestUniquePercent * 100);
2623 FOS.PadToColumn(NewCol: BaseSampleCol);
2624 FOS << F.second.BaseSample;
2625 FOS.PadToColumn(NewCol: TestSampleCol);
2626 FOS << F.second.TestSample;
2627 FOS.PadToColumn(NewCol: FuncNameCol);
2628 FOS << F.second.TestName.toString() << "\n";
2629 }
2630}
2631
2632void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream &OS) const {
2633 OS << "Profile overlap infomation for base_profile: "
2634 << ProfOverlap.BaseName.toString()
2635 << " and test_profile: " << ProfOverlap.TestName.toString()
2636 << "\nProgram level:\n";
2637
2638 OS << " Whole program profile similarity: "
2639 << format(Fmt: "%.3f%%", Vals: ProfOverlap.Similarity * 100) << "\n";
2640
2641 assert(ProfOverlap.UnionSample > 0 &&
2642 "Total samples in two profile should be greater than 0");
2643 double OverlapPercent =
2644 static_cast<double>(ProfOverlap.OverlapSample) / ProfOverlap.UnionSample;
2645 assert(ProfOverlap.BaseSample > 0 &&
2646 "Total samples in base profile should be greater than 0");
2647 double BaseUniquePercent = static_cast<double>(ProfOverlap.BaseUniqueSample) /
2648 ProfOverlap.BaseSample;
2649 assert(ProfOverlap.TestSample > 0 &&
2650 "Total samples in test profile should be greater than 0");
2651 double TestUniquePercent = static_cast<double>(ProfOverlap.TestUniqueSample) /
2652 ProfOverlap.TestSample;
2653
2654 OS << " Whole program sample overlap: "
2655 << format(Fmt: "%.3f%%", Vals: OverlapPercent * 100) << "\n";
2656 OS << " percentage of samples unique in base profile: "
2657 << format(Fmt: "%.3f%%", Vals: BaseUniquePercent * 100) << "\n";
2658 OS << " percentage of samples unique in test profile: "
2659 << format(Fmt: "%.3f%%", Vals: TestUniquePercent * 100) << "\n";
2660 OS << " total samples in base profile: " << ProfOverlap.BaseSample << "\n"
2661 << " total samples in test profile: " << ProfOverlap.TestSample << "\n";
2662
2663 assert(ProfOverlap.UnionCount > 0 &&
2664 "There should be at least one function in two input profiles");
2665 double FuncOverlapPercent =
2666 static_cast<double>(ProfOverlap.OverlapCount) / ProfOverlap.UnionCount;
2667 OS << " Function overlap: " << format(Fmt: "%.3f%%", Vals: FuncOverlapPercent * 100)
2668 << "\n";
2669 OS << " overlap functions: " << ProfOverlap.OverlapCount << "\n";
2670 OS << " functions unique in base profile: " << ProfOverlap.BaseUniqueCount
2671 << "\n";
2672 OS << " functions unique in test profile: " << ProfOverlap.TestUniqueCount
2673 << "\n";
2674}
2675
2676void SampleOverlapAggregator::dumpHotFuncAndBlockOverlap(
2677 raw_fd_ostream &OS) const {
2678 assert(HotFuncOverlap.UnionCount > 0 &&
2679 "There should be at least one hot function in two input profiles");
2680 OS << " Hot-function overlap: "
2681 << format(Fmt: "%.3f%%", Vals: static_cast<double>(HotFuncOverlap.OverlapCount) /
2682 HotFuncOverlap.UnionCount * 100)
2683 << "\n";
2684 OS << " overlap hot functions: " << HotFuncOverlap.OverlapCount << "\n";
2685 OS << " hot functions unique in base profile: "
2686 << HotFuncOverlap.BaseCount - HotFuncOverlap.OverlapCount << "\n";
2687 OS << " hot functions unique in test profile: "
2688 << HotFuncOverlap.TestCount - HotFuncOverlap.OverlapCount << "\n";
2689
2690 assert(HotBlockOverlap.UnionCount > 0 &&
2691 "There should be at least one hot block in two input profiles");
2692 OS << " Hot-block overlap: "
2693 << format(Fmt: "%.3f%%", Vals: static_cast<double>(HotBlockOverlap.OverlapCount) /
2694 HotBlockOverlap.UnionCount * 100)
2695 << "\n";
2696 OS << " overlap hot blocks: " << HotBlockOverlap.OverlapCount << "\n";
2697 OS << " hot blocks unique in base profile: "
2698 << HotBlockOverlap.BaseCount - HotBlockOverlap.OverlapCount << "\n";
2699 OS << " hot blocks unique in test profile: "
2700 << HotBlockOverlap.TestCount - HotBlockOverlap.OverlapCount << "\n";
2701}
2702
2703std::error_code SampleOverlapAggregator::loadProfiles() {
2704 using namespace sampleprof;
2705
2706 LLVMContext Context;
2707 auto FS = vfs::getRealFileSystem();
2708 auto BaseReaderOrErr = SampleProfileReader::create(Filename: BaseFilename, C&: Context, FS&: *FS,
2709 P: FSDiscriminatorPassOption);
2710 if (std::error_code EC = BaseReaderOrErr.getError())
2711 exitWithErrorCode(EC, Whence: BaseFilename);
2712
2713 auto TestReaderOrErr = SampleProfileReader::create(Filename: TestFilename, C&: Context, FS&: *FS,
2714 P: FSDiscriminatorPassOption);
2715 if (std::error_code EC = TestReaderOrErr.getError())
2716 exitWithErrorCode(EC, Whence: TestFilename);
2717
2718 BaseReader = std::move(BaseReaderOrErr.get());
2719 TestReader = std::move(TestReaderOrErr.get());
2720
2721 if (std::error_code EC = BaseReader->read())
2722 exitWithErrorCode(EC, Whence: BaseFilename);
2723 if (std::error_code EC = TestReader->read())
2724 exitWithErrorCode(EC, Whence: TestFilename);
2725 if (BaseReader->profileIsProbeBased() != TestReader->profileIsProbeBased())
2726 exitWithError(
2727 Message: "cannot compare probe-based profile with non-probe-based profile");
2728 if (BaseReader->profileIsCS() != TestReader->profileIsCS())
2729 exitWithError(Message: "cannot compare CS profile with non-CS profile");
2730
2731 // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in
2732 // profile summary.
2733 ProfileSummary &BasePS = BaseReader->getSummary();
2734 ProfileSummary &TestPS = TestReader->getSummary();
2735 BaseHotThreshold =
2736 ProfileSummaryBuilder::getHotCountThreshold(DS: BasePS.getDetailedSummary());
2737 TestHotThreshold =
2738 ProfileSummaryBuilder::getHotCountThreshold(DS: TestPS.getDetailedSummary());
2739
2740 return std::error_code();
2741}
2742
2743void overlapSampleProfile(const std::string &BaseFilename,
2744 const std::string &TestFilename,
2745 const OverlapFuncFilters &FuncFilter,
2746 uint64_t SimilarityCutoff, raw_fd_ostream &OS) {
2747 using namespace sampleprof;
2748
2749 // We use 0.000005 to initialize OverlapAggr.Epsilon because the final metrics
2750 // report 2--3 places after decimal point in percentage numbers.
2751 SampleOverlapAggregator OverlapAggr(
2752 BaseFilename, TestFilename,
2753 static_cast<double>(SimilarityCutoff) / 1000000, 0.000005, FuncFilter);
2754 if (std::error_code EC = OverlapAggr.loadProfiles())
2755 exitWithErrorCode(EC);
2756
2757 OverlapAggr.initializeSampleProfileOverlap();
2758 if (OverlapAggr.detectZeroSampleProfile(OS))
2759 return;
2760
2761 OverlapAggr.computeSampleProfileOverlap(OS);
2762
2763 OverlapAggr.dumpProgramSummary(OS);
2764 OverlapAggr.dumpHotFuncAndBlockOverlap(OS);
2765 OverlapAggr.dumpFuncSimilarity(OS);
2766}
2767
2768static int overlap_main() {
2769 std::error_code EC;
2770 raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
2771 if (EC)
2772 exitWithErrorCode(EC, Whence: OutputFilename);
2773
2774 if (ProfileKind == instr)
2775 overlapInstrProfile(BaseFilename, TestFilename,
2776 FuncFilter: OverlapFuncFilters{.ValueCutoff: OverlapValueCutoff, .NameFilter: FuncNameFilter},
2777 OS, IsCS);
2778 else
2779 overlapSampleProfile(BaseFilename, TestFilename,
2780 FuncFilter: OverlapFuncFilters{.ValueCutoff: OverlapValueCutoff, .NameFilter: FuncNameFilter},
2781 SimilarityCutoff, OS);
2782
2783 return 0;
2784}
2785
2786namespace {
2787struct ValueSitesStats {
2788 ValueSitesStats() = default;
2789 uint64_t TotalNumValueSites = 0;
2790 uint64_t TotalNumValueSitesWithValueProfile = 0;
2791 uint64_t TotalNumValues = 0;
2792 std::vector<unsigned> ValueSitesHistogram;
2793};
2794} // namespace
2795
2796static void traverseAllValueSites(const InstrProfRecord &Func, uint32_t VK,
2797 ValueSitesStats &Stats, raw_fd_ostream &OS,
2798 InstrProfSymtab *Symtab) {
2799 uint32_t NS = Func.getNumValueSites(ValueKind: VK);
2800 Stats.TotalNumValueSites += NS;
2801 for (size_t I = 0; I < NS; ++I) {
2802 auto VD = Func.getValueArrayForSite(ValueKind: VK, Site: I);
2803 uint32_t NV = VD.size();
2804 if (NV == 0)
2805 continue;
2806 Stats.TotalNumValues += NV;
2807 Stats.TotalNumValueSitesWithValueProfile++;
2808 if (NV > Stats.ValueSitesHistogram.size())
2809 Stats.ValueSitesHistogram.resize(new_size: NV, x: 0);
2810 Stats.ValueSitesHistogram[NV - 1]++;
2811
2812 uint64_t SiteSum = 0;
2813 for (const auto &V : VD)
2814 SiteSum += V.Count;
2815 if (SiteSum == 0)
2816 SiteSum = 1;
2817
2818 for (const auto &V : VD) {
2819 OS << "\t[ " << format(Fmt: "%2u", Vals: I) << ", ";
2820 if (Symtab == nullptr)
2821 OS << format(Fmt: "%4" PRIu64, Vals: V.Value);
2822 else
2823 OS << Symtab->getFuncOrVarName(MD5Hash: V.Value);
2824 OS << ", " << format(Fmt: "%10" PRId64, Vals: V.Count) << " ] ("
2825 << format(Fmt: "%.2f%%", Vals: (V.Count * 100.0 / SiteSum)) << ")\n";
2826 }
2827 }
2828}
2829
2830static void showValueSitesStats(raw_fd_ostream &OS, uint32_t VK,
2831 ValueSitesStats &Stats) {
2832 OS << " Total number of sites: " << Stats.TotalNumValueSites << "\n";
2833 OS << " Total number of sites with values: "
2834 << Stats.TotalNumValueSitesWithValueProfile << "\n";
2835 OS << " Total number of profiled values: " << Stats.TotalNumValues << "\n";
2836
2837 OS << " Value sites histogram:\n\tNumTargets, SiteCount\n";
2838 for (unsigned I = 0; I < Stats.ValueSitesHistogram.size(); I++) {
2839 if (Stats.ValueSitesHistogram[I] > 0)
2840 OS << "\t" << I + 1 << ", " << Stats.ValueSitesHistogram[I] << "\n";
2841 }
2842}
2843
2844static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
2845 if (SFormat == ShowFormat::Json)
2846 exitWithError(Message: "JSON output is not supported for instr profiles");
2847 if (SFormat == ShowFormat::Yaml)
2848 exitWithError(Message: "YAML output is not supported for instr profiles");
2849 auto FS = vfs::getRealFileSystem();
2850 auto ReaderOrErr = InstrProfReader::create(Path: Filename, FS&: *FS);
2851 std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
2852 if (ShowDetailedSummary && Cutoffs.empty()) {
2853 Cutoffs = ProfileSummaryBuilder::DefaultCutoffs;
2854 }
2855 InstrProfSummaryBuilder Builder(std::move(Cutoffs));
2856 if (Error E = ReaderOrErr.takeError())
2857 exitWithError(E: std::move(E), Whence: Filename);
2858
2859 auto Reader = std::move(ReaderOrErr.get());
2860 bool IsIRInstr = Reader->isIRLevelProfile();
2861 size_t ShownFunctions = 0;
2862 size_t BelowCutoffFunctions = 0;
2863 int NumVPKind = IPVK_Last - IPVK_First + 1;
2864 std::vector<ValueSitesStats> VPStats(NumVPKind);
2865
2866 auto MinCmp = [](const std::pair<std::string, uint64_t> &v1,
2867 const std::pair<std::string, uint64_t> &v2) {
2868 return v1.second > v2.second;
2869 };
2870
2871 std::priority_queue<std::pair<std::string, uint64_t>,
2872 std::vector<std::pair<std::string, uint64_t>>,
2873 decltype(MinCmp)>
2874 HottestFuncs(MinCmp);
2875
2876 if (!TextFormat && OnlyListBelow) {
2877 OS << "The list of functions with the maximum counter less than "
2878 << ShowValueCutoff << ":\n";
2879 }
2880
2881 // Add marker so that IR-level instrumentation round-trips properly.
2882 if (TextFormat && IsIRInstr)
2883 OS << ":ir\n";
2884
2885 for (const auto &Func : *Reader) {
2886 if (Reader->isIRLevelProfile()) {
2887 bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(FuncHash: Func.Hash);
2888 if (FuncIsCS != ShowCS)
2889 continue;
2890 }
2891 bool Show = ShowAllFunctions ||
2892 (!FuncNameFilter.empty() && Func.Name.contains(Other: FuncNameFilter));
2893
2894 bool doTextFormatDump = (Show && TextFormat);
2895
2896 if (doTextFormatDump) {
2897 InstrProfSymtab &Symtab = Reader->getSymtab();
2898 InstrProfWriter::writeRecordInText(Name: Func.Name, Hash: Func.Hash, Counters: Func, Symtab,
2899 OS);
2900 continue;
2901 }
2902
2903 assert(Func.Counts.size() > 0 && "function missing entry counter");
2904 Builder.addRecord(Func);
2905
2906 if (ShowCovered) {
2907 if (llvm::any_of(Range: Func.Counts, P: [](uint64_t C) { return C; }))
2908 OS << Func.Name << "\n";
2909 continue;
2910 }
2911
2912 uint64_t FuncMax = 0;
2913 uint64_t FuncSum = 0;
2914
2915 auto PseudoKind = Func.getCountPseudoKind();
2916 if (PseudoKind != InstrProfRecord::NotPseudo) {
2917 if (Show) {
2918 if (!ShownFunctions)
2919 OS << "Counters:\n";
2920 ++ShownFunctions;
2921 OS << " " << Func.Name << ":\n"
2922 << " Hash: " << format(Fmt: "0x%016" PRIx64, Vals: Func.Hash) << "\n"
2923 << " Counters: " << Func.Counts.size();
2924 if (PseudoKind == InstrProfRecord::PseudoHot)
2925 OS << " <PseudoHot>\n";
2926 else if (PseudoKind == InstrProfRecord::PseudoWarm)
2927 OS << " <PseudoWarm>\n";
2928 else
2929 llvm_unreachable("Unknown PseudoKind");
2930 }
2931 continue;
2932 }
2933
2934 for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) {
2935 FuncMax = std::max(a: FuncMax, b: Func.Counts[I]);
2936 FuncSum += Func.Counts[I];
2937 }
2938
2939 if (FuncMax < ShowValueCutoff) {
2940 ++BelowCutoffFunctions;
2941 if (OnlyListBelow) {
2942 OS << " " << Func.Name << ": (Max = " << FuncMax
2943 << " Sum = " << FuncSum << ")\n";
2944 }
2945 continue;
2946 } else if (OnlyListBelow)
2947 continue;
2948
2949 if (TopNFunctions) {
2950 if (HottestFuncs.size() == TopNFunctions) {
2951 if (HottestFuncs.top().second < FuncMax) {
2952 HottestFuncs.pop();
2953 HottestFuncs.emplace(args: std::make_pair(x: std::string(Func.Name), y&: FuncMax));
2954 }
2955 } else
2956 HottestFuncs.emplace(args: std::make_pair(x: std::string(Func.Name), y&: FuncMax));
2957 }
2958
2959 if (Show) {
2960 if (!ShownFunctions)
2961 OS << "Counters:\n";
2962
2963 ++ShownFunctions;
2964
2965 OS << " " << Func.Name << ":\n"
2966 << " Hash: " << format(Fmt: "0x%016" PRIx64, Vals: Func.Hash) << "\n"
2967 << " Counters: " << Func.Counts.size() << "\n";
2968 if (!IsIRInstr)
2969 OS << " Function count: " << Func.Counts[0] << "\n";
2970
2971 if (ShowIndirectCallTargets)
2972 OS << " Indirect Call Site Count: "
2973 << Func.getNumValueSites(ValueKind: IPVK_IndirectCallTarget) << "\n";
2974
2975 if (ShowVTables)
2976 OS << " Number of instrumented vtables: "
2977 << Func.getNumValueSites(ValueKind: IPVK_VTableTarget) << "\n";
2978
2979 uint32_t NumMemOPCalls = Func.getNumValueSites(ValueKind: IPVK_MemOPSize);
2980 if (ShowMemOPSizes && NumMemOPCalls > 0)
2981 OS << " Number of Memory Intrinsics Calls: " << NumMemOPCalls
2982 << "\n";
2983
2984 if (ShowCounts) {
2985 OS << " Block counts: [";
2986 size_t Start = (IsIRInstr ? 0 : 1);
2987 for (size_t I = Start, E = Func.Counts.size(); I < E; ++I) {
2988 OS << (I == Start ? "" : ", ") << Func.Counts[I];
2989 }
2990 OS << "]\n";
2991 }
2992
2993 if (ShowIndirectCallTargets) {
2994 OS << " Indirect Target Results:\n";
2995 traverseAllValueSites(Func, VK: IPVK_IndirectCallTarget,
2996 Stats&: VPStats[IPVK_IndirectCallTarget], OS,
2997 Symtab: &(Reader->getSymtab()));
2998 }
2999
3000 if (ShowVTables) {
3001 OS << " VTable Results:\n";
3002 traverseAllValueSites(Func, VK: IPVK_VTableTarget,
3003 Stats&: VPStats[IPVK_VTableTarget], OS,
3004 Symtab: &(Reader->getSymtab()));
3005 }
3006
3007 if (ShowMemOPSizes && NumMemOPCalls > 0) {
3008 OS << " Memory Intrinsic Size Results:\n";
3009 traverseAllValueSites(Func, VK: IPVK_MemOPSize, Stats&: VPStats[IPVK_MemOPSize], OS,
3010 Symtab: nullptr);
3011 }
3012 }
3013 }
3014 if (Reader->hasError())
3015 exitWithError(E: Reader->getError(), Whence: Filename);
3016
3017 if (TextFormat || ShowCovered)
3018 return 0;
3019 std::unique_ptr<ProfileSummary> PS(Builder.getSummary());
3020 bool IsIR = Reader->isIRLevelProfile();
3021 OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end");
3022 if (IsIR) {
3023 OS << " entry_first = " << Reader->instrEntryBBEnabled();
3024 OS << " instrument_loop_entries = " << Reader->instrLoopEntriesEnabled();
3025 }
3026 OS << "\n";
3027 if (ShowAllFunctions || !FuncNameFilter.empty())
3028 OS << "Functions shown: " << ShownFunctions << "\n";
3029 PS->printSummary(OS);
3030 if (ShowValueCutoff > 0) {
3031 OS << "Number of functions with maximum count (< " << ShowValueCutoff
3032 << "): " << BelowCutoffFunctions << "\n";
3033 OS << "Number of functions with maximum count (>= " << ShowValueCutoff
3034 << "): " << PS->getNumFunctions() - BelowCutoffFunctions << "\n";
3035 }
3036
3037 if (TopNFunctions) {
3038 std::vector<std::pair<std::string, uint64_t>> SortedHottestFuncs;
3039 while (!HottestFuncs.empty()) {
3040 SortedHottestFuncs.emplace_back(args: HottestFuncs.top());
3041 HottestFuncs.pop();
3042 }
3043 OS << "Top " << TopNFunctions
3044 << " functions with the largest internal block counts: \n";
3045 for (auto &hotfunc : llvm::reverse(C&: SortedHottestFuncs))
3046 OS << " " << hotfunc.first << ", max count = " << hotfunc.second << "\n";
3047 }
3048
3049 if (ShownFunctions && ShowIndirectCallTargets) {
3050 OS << "Statistics for indirect call sites profile:\n";
3051 showValueSitesStats(OS, VK: IPVK_IndirectCallTarget,
3052 Stats&: VPStats[IPVK_IndirectCallTarget]);
3053 }
3054
3055 if (ShownFunctions && ShowVTables) {
3056 OS << "Statistics for vtable profile:\n";
3057 showValueSitesStats(OS, VK: IPVK_VTableTarget, Stats&: VPStats[IPVK_VTableTarget]);
3058 }
3059
3060 if (ShownFunctions && ShowMemOPSizes) {
3061 OS << "Statistics for memory intrinsic calls sizes profile:\n";
3062 showValueSitesStats(OS, VK: IPVK_MemOPSize, Stats&: VPStats[IPVK_MemOPSize]);
3063 }
3064
3065 if (ShowDetailedSummary)
3066 PS->printDetailedSummary(OS);
3067
3068 if (ShowBinaryIds)
3069 if (Error E = Reader->printBinaryIds(OS))
3070 exitWithError(E: std::move(E), Whence: Filename);
3071
3072 if (ShowProfileVersion)
3073 OS << "Profile version: " << Reader->getVersion() << "\n";
3074
3075 if (ShowTemporalProfTraces) {
3076 auto &Traces = Reader->getTemporalProfTraces();
3077 OS << "Temporal Profile Traces (samples=" << Traces.size()
3078 << " seen=" << Reader->getTemporalProfTraceStreamSize() << "):\n";
3079 for (unsigned i = 0; i < Traces.size(); i++) {
3080 OS << " Temporal Profile Trace " << i << " (weight=" << Traces[i].Weight
3081 << " count=" << Traces[i].FunctionNameRefs.size() << "):\n";
3082 for (auto &NameRef : Traces[i].FunctionNameRefs)
3083 OS << " " << Reader->getSymtab().getFuncOrVarName(MD5Hash: NameRef) << "\n";
3084 }
3085 }
3086
3087 return 0;
3088}
3089
3090static void showSectionInfo(sampleprof::SampleProfileReader *Reader,
3091 raw_fd_ostream &OS) {
3092 if (!Reader->dumpSectionInfo(OS)) {
3093 WithColor::warning() << "-show-sec-info-only is only supported for "
3094 << "sample profile in extbinary format and is "
3095 << "ignored for other formats.\n";
3096 return;
3097 }
3098}
3099
3100namespace {
3101struct HotFuncInfo {
3102 std::string FuncName;
3103 uint64_t TotalCount = 0;
3104 double TotalCountPercent = 0.0f;
3105 uint64_t MaxCount = 0;
3106 uint64_t EntryCount = 0;
3107
3108 HotFuncInfo() = default;
3109
3110 HotFuncInfo(StringRef FN, uint64_t TS, double TSP, uint64_t MS, uint64_t ES)
3111 : FuncName(FN.begin(), FN.end()), TotalCount(TS), TotalCountPercent(TSP),
3112 MaxCount(MS), EntryCount(ES) {}
3113};
3114} // namespace
3115
3116// Print out detailed information about hot functions in PrintValues vector.
3117// Users specify titles and offset of every columns through ColumnTitle and
3118// ColumnOffset. The size of ColumnTitle and ColumnOffset need to be the same
3119// and at least 4. Besides, users can optionally give a HotFuncMetric string to
3120// print out or let it be an empty string.
3121static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle,
3122 const std::vector<int> &ColumnOffset,
3123 const std::vector<HotFuncInfo> &PrintValues,
3124 uint64_t HotFuncCount, uint64_t TotalFuncCount,
3125 uint64_t HotProfCount, uint64_t TotalProfCount,
3126 const std::string &HotFuncMetric,
3127 uint32_t TopNFunctions, raw_fd_ostream &OS) {
3128 assert(ColumnOffset.size() == ColumnTitle.size() &&
3129 "ColumnOffset and ColumnTitle should have the same size");
3130 assert(ColumnTitle.size() >= 4 &&
3131 "ColumnTitle should have at least 4 elements");
3132 assert(TotalFuncCount > 0 &&
3133 "There should be at least one function in the profile");
3134 double TotalProfPercent = 0;
3135 if (TotalProfCount > 0)
3136 TotalProfPercent = static_cast<double>(HotProfCount) / TotalProfCount * 100;
3137
3138 formatted_raw_ostream FOS(OS);
3139 FOS << HotFuncCount << " out of " << TotalFuncCount
3140 << " functions with profile ("
3141 << format(Fmt: "%.2f%%",
3142 Vals: (static_cast<double>(HotFuncCount) / TotalFuncCount * 100))
3143 << ") are considered hot functions";
3144 if (!HotFuncMetric.empty())
3145 FOS << " (" << HotFuncMetric << ")";
3146 FOS << ".\n";
3147 FOS << HotProfCount << " out of " << TotalProfCount << " profile counts ("
3148 << format(Fmt: "%.2f%%", Vals: TotalProfPercent) << ") are from hot functions.\n";
3149
3150 for (size_t I = 0; I < ColumnTitle.size(); ++I) {
3151 FOS.PadToColumn(NewCol: ColumnOffset[I]);
3152 FOS << ColumnTitle[I];
3153 }
3154 FOS << "\n";
3155
3156 uint32_t Count = 0;
3157 for (const auto &R : PrintValues) {
3158 if (TopNFunctions && (Count++ == TopNFunctions))
3159 break;
3160 FOS.PadToColumn(NewCol: ColumnOffset[0]);
3161 FOS << R.TotalCount << " (" << format(Fmt: "%.2f%%", Vals: R.TotalCountPercent) << ")";
3162 FOS.PadToColumn(NewCol: ColumnOffset[1]);
3163 FOS << R.MaxCount;
3164 FOS.PadToColumn(NewCol: ColumnOffset[2]);
3165 FOS << R.EntryCount;
3166 FOS.PadToColumn(NewCol: ColumnOffset[3]);
3167 FOS << R.FuncName << "\n";
3168 }
3169}
3170
3171static int showHotFunctionList(const sampleprof::SampleProfileMap &Profiles,
3172 ProfileSummary &PS, uint32_t TopN,
3173 raw_fd_ostream &OS) {
3174 using namespace sampleprof;
3175
3176 const uint32_t HotFuncCutoff = 990000;
3177 auto &SummaryVector = PS.getDetailedSummary();
3178 uint64_t MinCountThreshold = 0;
3179 for (const ProfileSummaryEntry &SummaryEntry : SummaryVector) {
3180 if (SummaryEntry.Cutoff == HotFuncCutoff) {
3181 MinCountThreshold = SummaryEntry.MinCount;
3182 break;
3183 }
3184 }
3185
3186 // Traverse all functions in the profile and keep only hot functions.
3187 // The following loop also calculates the sum of total samples of all
3188 // functions.
3189 std::multimap<uint64_t, std::pair<const FunctionSamples *, const uint64_t>,
3190 std::greater<uint64_t>>
3191 HotFunc;
3192 uint64_t ProfileTotalSample = 0;
3193 uint64_t HotFuncSample = 0;
3194 uint64_t HotFuncCount = 0;
3195
3196 for (const auto &I : Profiles) {
3197 FuncSampleStats FuncStats;
3198 const FunctionSamples &FuncProf = I.second;
3199 ProfileTotalSample += FuncProf.getTotalSamples();
3200 getFuncSampleStats(Func: FuncProf, FuncStats, HotThreshold: MinCountThreshold);
3201
3202 if (isFunctionHot(FuncStats, HotThreshold: MinCountThreshold)) {
3203 HotFunc.emplace(args: FuncProf.getTotalSamples(),
3204 args: std::make_pair(x: &(I.second), y&: FuncStats.MaxSample));
3205 HotFuncSample += FuncProf.getTotalSamples();
3206 ++HotFuncCount;
3207 }
3208 }
3209
3210 std::vector<std::string> ColumnTitle{"Total sample (%)", "Max sample",
3211 "Entry sample", "Function name"};
3212 std::vector<int> ColumnOffset{0, 24, 42, 58};
3213 std::string Metric =
3214 std::string("max sample >= ") + std::to_string(val: MinCountThreshold);
3215 std::vector<HotFuncInfo> PrintValues;
3216 for (const auto &FuncPair : HotFunc) {
3217 const FunctionSamples &Func = *FuncPair.second.first;
3218 double TotalSamplePercent =
3219 (ProfileTotalSample > 0)
3220 ? (Func.getTotalSamples() * 100.0) / ProfileTotalSample
3221 : 0;
3222 PrintValues.emplace_back(
3223 args: HotFuncInfo(Func.getContext().toString(), Func.getTotalSamples(),
3224 TotalSamplePercent, FuncPair.second.second,
3225 Func.getHeadSamplesEstimate()));
3226 }
3227 dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount,
3228 TotalFuncCount: Profiles.size(), HotProfCount: HotFuncSample, TotalProfCount: ProfileTotalSample,
3229 HotFuncMetric: Metric, TopNFunctions: TopN, OS);
3230
3231 return 0;
3232}
3233
3234static int showSampleProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
3235 if (SFormat == ShowFormat::Yaml)
3236 exitWithError(Message: "YAML output is not supported for sample profiles");
3237 using namespace sampleprof;
3238 LLVMContext Context;
3239 auto FS = vfs::getRealFileSystem();
3240 auto ReaderOrErr = SampleProfileReader::create(Filename, C&: Context, FS&: *FS,
3241 P: FSDiscriminatorPassOption);
3242 if (std::error_code EC = ReaderOrErr.getError())
3243 exitWithErrorCode(EC, Whence: Filename);
3244
3245 auto Reader = std::move(ReaderOrErr.get());
3246 if (ShowSectionInfoOnly) {
3247 showSectionInfo(Reader: Reader.get(), OS);
3248 return 0;
3249 }
3250
3251 if (std::error_code EC = Reader->read())
3252 exitWithErrorCode(EC, Whence: Filename);
3253
3254 if (ShowAllFunctions || FuncNameFilter.empty()) {
3255 if (SFormat == ShowFormat::Json)
3256 Reader->dumpJson(OS);
3257 else
3258 Reader->dump(OS);
3259 } else {
3260 if (SFormat == ShowFormat::Json)
3261 exitWithError(
3262 Message: "the JSON format is supported only when all functions are to "
3263 "be printed");
3264
3265 // TODO: parse context string to support filtering by contexts.
3266 FunctionSamples *FS = Reader->getSamplesFor(Fname: StringRef(FuncNameFilter));
3267 Reader->dumpFunctionProfile(FS: FS ? *FS : FunctionSamples(), OS);
3268 }
3269
3270 if (ShowProfileSymbolList) {
3271 std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
3272 Reader->getProfileSymbolList();
3273 ReaderList->dump(OS);
3274 }
3275
3276 if (ShowDetailedSummary) {
3277 auto &PS = Reader->getSummary();
3278 PS.printSummary(OS);
3279 PS.printDetailedSummary(OS);
3280 }
3281
3282 if (ShowHotFuncList || TopNFunctions)
3283 showHotFunctionList(Profiles: Reader->getProfiles(), PS&: Reader->getSummary(),
3284 TopN: TopNFunctions, OS);
3285
3286 return 0;
3287}
3288
3289static int showMemProfProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
3290 if (SFormat == ShowFormat::Json)
3291 exitWithError(Message: "JSON output is not supported for MemProf");
3292
3293 // Show the raw profile in YAML.
3294 if (memprof::RawMemProfReader::hasFormat(Path: Filename)) {
3295 auto ReaderOr = llvm::memprof::RawMemProfReader::create(
3296 Path: Filename, ProfiledBinary, /*KeepNames=*/KeepName: true);
3297 if (Error E = ReaderOr.takeError()) {
3298 // Since the error can be related to the profile or the binary we do not
3299 // pass whence. Instead additional context is provided where necessary in
3300 // the error message.
3301 exitWithError(E: std::move(E), /*Whence*/ "");
3302 }
3303
3304 std::unique_ptr<llvm::memprof::RawMemProfReader> Reader(
3305 ReaderOr.get().release());
3306
3307 Reader->printYAML(OS);
3308 return 0;
3309 }
3310
3311 // Show the indexed MemProf profile in YAML.
3312 auto FS = vfs::getRealFileSystem();
3313 auto ReaderOrErr = IndexedInstrProfReader::create(Path: Filename, FS&: *FS);
3314 if (Error E = ReaderOrErr.takeError())
3315 exitWithError(E: std::move(E), Whence: Filename);
3316
3317 auto Reader = std::move(ReaderOrErr.get());
3318 memprof::AllMemProfData Data = Reader->getAllMemProfData();
3319
3320 // For v4 and above the summary is serialized in the indexed profile, and can
3321 // be accessed from the reader. Earlier versions build the summary below.
3322 // The summary is emitted as YAML comments at the start of the output.
3323 if (auto *MemProfSum = Reader->getMemProfSummary()) {
3324 MemProfSum->printSummaryYaml(OS);
3325 } else {
3326 memprof::MemProfSummaryBuilder MemProfSumBuilder;
3327 for (auto &Pair : Data.HeapProfileRecords)
3328 MemProfSumBuilder.addRecord(Pair.Record);
3329 MemProfSumBuilder.getSummary()->printSummaryYaml(OS);
3330 }
3331 // Construct yaml::Output with the maximum column width of 80 so that each
3332 // Frame fits in one line.
3333 yaml::Output Yout(OS, nullptr, 80);
3334 Yout << Data;
3335
3336 return 0;
3337}
3338
3339static int showDebugInfoCorrelation(const std::string &Filename,
3340 ShowFormat SFormat, raw_fd_ostream &OS) {
3341 if (SFormat == ShowFormat::Json)
3342 exitWithError(Message: "JSON output is not supported for debug info correlation");
3343 std::unique_ptr<InstrProfCorrelator> Correlator;
3344 if (auto Err =
3345 InstrProfCorrelator::get(Filename, FileKind: InstrProfCorrelator::DEBUG_INFO)
3346 .moveInto(Value&: Correlator))
3347 exitWithError(E: std::move(Err), Whence: Filename);
3348 if (SFormat == ShowFormat::Yaml) {
3349 if (auto Err = Correlator->dumpYaml(MaxWarnings: MaxDbgCorrelationWarnings, OS))
3350 exitWithError(E: std::move(Err), Whence: Filename);
3351 return 0;
3352 }
3353
3354 if (auto Err = Correlator->correlateProfileData(MaxWarnings: MaxDbgCorrelationWarnings))
3355 exitWithError(E: std::move(Err), Whence: Filename);
3356
3357 InstrProfSymtab Symtab;
3358 if (auto Err = Symtab.create(
3359 NameStrings: StringRef(Correlator->getNamesPointer(), Correlator->getNamesSize())))
3360 exitWithError(E: std::move(Err), Whence: Filename);
3361
3362 if (ShowProfileSymbolList)
3363 Symtab.dumpNames(OS);
3364 // TODO: Read "Profile Data Type" from debug info to compute and show how many
3365 // counters the section holds.
3366 if (ShowDetailedSummary)
3367 OS << "Counters section size: 0x"
3368 << Twine::utohexstr(Val: Correlator->getCountersSectionSize()) << " bytes\n";
3369 OS << "Found " << Correlator->getDataSize() << " functions\n";
3370
3371 return 0;
3372}
3373
3374static int show_main(StringRef ProgName) {
3375 if (Filename.empty() && DebugInfoFilename.empty())
3376 exitWithError(
3377 Message: "the positional argument '<profdata-file>' is required unless '--" +
3378 DebugInfoFilename.ArgStr + "' is provided");
3379
3380 if (Filename == OutputFilename) {
3381 errs() << ProgName
3382 << " show: Input file name cannot be the same as the output file "
3383 "name!\n";
3384 return 1;
3385 }
3386 if (JsonFormat)
3387 SFormat = ShowFormat::Json;
3388
3389 std::error_code EC;
3390 raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3391 if (EC)
3392 exitWithErrorCode(EC, Whence: OutputFilename);
3393
3394 if (ShowAllFunctions && !FuncNameFilter.empty())
3395 WithColor::warning() << "-function argument ignored: showing all functions\n";
3396
3397 if (!DebugInfoFilename.empty())
3398 return showDebugInfoCorrelation(Filename: DebugInfoFilename, SFormat, OS);
3399
3400 if (ShowProfileKind == instr)
3401 return showInstrProfile(SFormat, OS);
3402 if (ShowProfileKind == sample)
3403 return showSampleProfile(SFormat, OS);
3404 return showMemProfProfile(SFormat, OS);
3405}
3406
3407static int order_main() {
3408 std::error_code EC;
3409 raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3410 if (EC)
3411 exitWithErrorCode(EC, Whence: OutputFilename);
3412 auto FS = vfs::getRealFileSystem();
3413 auto ReaderOrErr = InstrProfReader::create(Path: Filename, FS&: *FS);
3414 if (Error E = ReaderOrErr.takeError())
3415 exitWithError(E: std::move(E), Whence: Filename);
3416
3417 auto Reader = std::move(ReaderOrErr.get());
3418 for (auto &I : *Reader) {
3419 // Read all entries
3420 (void)I;
3421 }
3422 ArrayRef Traces = Reader->getTemporalProfTraces();
3423 if (NumTestTraces && NumTestTraces >= Traces.size())
3424 exitWithError(
3425 Message: "--" + NumTestTraces.ArgStr +
3426 " must be smaller than the total number of traces: expected: < " +
3427 Twine(Traces.size()) + ", actual: " + Twine(NumTestTraces));
3428 ArrayRef TestTraces = Traces.take_back(N: NumTestTraces);
3429 Traces = Traces.drop_back(N: NumTestTraces);
3430
3431 std::vector<BPFunctionNode> Nodes;
3432 TemporalProfTraceTy::createBPFunctionNodes(Traces, Nodes);
3433 BalancedPartitioningConfig Config;
3434 BalancedPartitioning BP(Config);
3435 BP.run(Nodes);
3436
3437 OS << "# Ordered " << Nodes.size() << " functions\n";
3438 if (!TestTraces.empty()) {
3439 // Since we don't know the symbol sizes, we assume 32 functions per page.
3440 DenseMap<BPFunctionNode::IDT, unsigned> IdToPageNumber;
3441 for (auto &Node : Nodes)
3442 IdToPageNumber[Node.Id] = IdToPageNumber.size() / 32;
3443
3444 SmallSet<unsigned, 0> TouchedPages;
3445 unsigned Area = 0;
3446 for (auto &Trace : TestTraces) {
3447 for (auto Id : Trace.FunctionNameRefs) {
3448 auto It = IdToPageNumber.find(Val: Id);
3449 if (It == IdToPageNumber.end())
3450 continue;
3451 TouchedPages.insert(V: It->getSecond());
3452 Area += TouchedPages.size();
3453 }
3454 TouchedPages.clear();
3455 }
3456 OS << "# Total area under the page fault curve: " << (float)Area << "\n";
3457 }
3458 OS << "# Warning: Mach-O may prefix symbols with \"_\" depending on the "
3459 "linkage and this output does not take that into account. Some "
3460 "post-processing may be required before passing to the linker via "
3461 "-order_file.\n";
3462 for (auto &N : Nodes) {
3463 auto [Filename, ParsedFuncName] =
3464 getParsedIRPGOName(IRPGOName: Reader->getSymtab().getFuncOrVarName(MD5Hash: N.Id));
3465 if (!Filename.empty())
3466 OS << "# " << Filename << "\n";
3467 OS << ParsedFuncName << "\n";
3468 }
3469 return 0;
3470}
3471
3472int llvm_profdata_main(int argc, char **argvNonConst,
3473 const llvm::ToolContext &) {
3474 const char **argv = const_cast<const char **>(argvNonConst);
3475
3476 StringRef ProgName(sys::path::filename(path: argv[0]));
3477
3478 if (argc < 2) {
3479 errs()
3480 << ProgName
3481 << ": No subcommand specified! Run llvm-profdata --help for usage.\n";
3482 return 1;
3483 }
3484
3485 cl::ParseCommandLineOptions(argc, argv, Overview: "LLVM profile data\n");
3486
3487 if (ShowSubcommand)
3488 return show_main(ProgName);
3489
3490 if (OrderSubcommand)
3491 return order_main();
3492
3493 if (OverlapSubcommand)
3494 return overlap_main();
3495
3496 if (MergeSubcommand)
3497 return merge_main(ProgName);
3498
3499 errs() << ProgName
3500 << ": Unknown command. Run llvm-profdata --help for usage.\n";
3501 return 1;
3502}
3503