llvm-profdata.cpp source code [llvm_projects/llvm/tools/llvm-profdata/llvm-profdata.cpp]

1	//===- llvm-profdata.cpp - LLVM profile data tool -------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// llvm-profdata merges .profdata files.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/ADT/SmallSet.h"
14	#include "llvm/ADT/SmallVector.h"
15	#include "llvm/ADT/StringRef.h"
16	#include "llvm/IR/LLVMContext.h"
17	#include "llvm/Object/Binary.h"
18	#include "llvm/ProfileData/InstrProfCorrelator.h"
19	#include "llvm/ProfileData/InstrProfReader.h"
20	#include "llvm/ProfileData/InstrProfWriter.h"
21	#include "llvm/ProfileData/MemProf.h"
22	#include "llvm/ProfileData/MemProfReader.h"
23	#include "llvm/ProfileData/ProfileCommon.h"
24	#include "llvm/ProfileData/SampleProfReader.h"
25	#include "llvm/ProfileData/SampleProfWriter.h"
26	#include "llvm/Support/BalancedPartitioning.h"
27	#include "llvm/Support/CommandLine.h"
28	#include "llvm/Support/Discriminator.h"
29	#include "llvm/Support/Errc.h"
30	#include "llvm/Support/FileSystem.h"
31	#include "llvm/Support/Format.h"
32	#include "llvm/Support/FormattedStream.h"
33	#include "llvm/Support/LLVMDriver.h"
34	#include "llvm/Support/MD5.h"
35	#include "llvm/Support/MemoryBuffer.h"
36	#include "llvm/Support/Path.h"
37	#include "llvm/Support/Regex.h"
38	#include "llvm/Support/ThreadPool.h"
39	#include "llvm/Support/Threading.h"
40	#include "llvm/Support/VirtualFileSystem.h"
41	#include "llvm/Support/WithColor.h"
42	#include "llvm/Support/raw_ostream.h"
43	#include <algorithm>
44	#include <cmath>
45	#include <optional>
46	#include <queue>
47
48	using namespace llvm;
49	using ProfCorrelatorKind = InstrProfCorrelator::ProfCorrelatorKind;
50
51	// https://llvm.org/docs/CommandGuide/llvm-profdata.html has documentations
52	// on each subcommand.
53	cl::SubCommand ShowSubcommand(
54	"show",
55	"Takes a profile data file and displays the profiles. See detailed "
56	"documentation in "
57	"https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-show");
58	cl::SubCommand OrderSubcommand(
59	"order",
60	"Reads temporal profiling traces from a profile and outputs a function "
61	"order that reduces the number of page faults for those traces. See "
62	"detailed documentation in "
63	"https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-order");
64	cl::SubCommand OverlapSubcommand(
65	"overlap",
66	"Computes and displays the overlap between two profiles. See detailed "
67	"documentation in "
68	"https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-overlap");
69	cl::SubCommand MergeSubcommand(
70	"merge",
71	"Takes several profiles and merge them together. See detailed "
72	"documentation in "
73	"https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge");
74
75	namespace {
76	enum ProfileKinds { instr, sample, memory };
77	enum FailureMode { warnOnly, failIfAnyAreInvalid, failIfAllAreInvalid };
78
79	enum ProfileFormat {
80	PF_None = `0`,
81	PF_Text,
82	PF_Compact_Binary, // Deprecated
83	PF_Ext_Binary,
84	PF_GCC,
85	PF_Binary
86	};
87
88	enum class ShowFormat { Text, Json, Yaml };
89	} // namespace
90
91	// Common options.
92	cl::opt<std::string> OutputFilename("output", cl::value_desc ("output"),
93	cl::init(Val: "-"), cl::desc ("Output file"),
94	cl::sub (ShowSubcommand),
95	cl::sub (OrderSubcommand),
96	cl::sub (OverlapSubcommand),
97	cl::sub (MergeSubcommand));
98	// NOTE: cl::alias must not have cl::sub(), since aliased option's cl::sub()
99	// will be used. llvm::cl::alias::done() method asserts this condition.
100	cl::alias OutputFilenameA("o", cl::desc ("Alias for --output"),
101	cl::aliasopt (OutputFilename));
102
103	// Options common to at least two commands.
104	cl::opt<ProfileKinds> ProfileKind(
105	cl::desc ("Profile kind:"), cl::sub (MergeSubcommand),
106	cl::sub (OverlapSubcommand), cl::init(Val: instr),
107	cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
108	clEnumVal(sample, "Sample profile")));
109	cl::opt<std::string> Filename(cl::Positional, cl::desc ("<profdata-file>"),
110	cl::sub (ShowSubcommand),
111	cl::sub (OrderSubcommand));
112	cl::opt<unsigned> MaxDbgCorrelationWarnings(
113	"max-debug-info-correlation-warnings",
114	cl::desc ("The maximum number of warnings to emit when correlating "
115	"profile from debug info (0 = no limit)"),
116	cl::sub (MergeSubcommand), cl::sub (ShowSubcommand), cl::init(Val: `5`));
117	cl::opt<std::string> ProfiledBinary(
118	"profiled-binary", cl::init(Val: ""),
119	cl::desc ("Path to binary from which the profile was collected."),
120	cl::sub (ShowSubcommand), cl::sub (MergeSubcommand));
121	cl::opt<std::string> DebugInfoFilename(
122	"debug-info", cl::init(Val: ""),
123	cl::desc (
124	"For show, read and extract profile metadata from debug info and show "
125	"the functions it found. For merge, use the provided debug info to "
126	"correlate the raw profile."),
127	cl::sub (ShowSubcommand), cl::sub (MergeSubcommand));
128	cl::opt<std::string>
129	BinaryFilename("binary-file", cl::init(Val: ""),
130	cl::desc ("For merge, use the provided unstripped bianry to "
131	"correlate the raw profile."),
132	cl::sub (MergeSubcommand));
133	cl::opt<std::string> FuncNameFilter(
134	"function",
135	cl::desc ("Only functions matching the filter are shown in the output. For "
136	"overlapping CSSPGO, this takes a function name with calling "
137	"context."),
138	cl::sub (ShowSubcommand), cl::sub (OverlapSubcommand),
139	cl::sub (MergeSubcommand));
140
141	// TODO: Consider creating a template class (e.g., MergeOption, ShowOption) to
142	// factor out the common cl::sub in cl::opt constructor for subcommand-specific
143	// options.
144
145	// Options specific to merge subcommand.
146	cl::list<std::string> InputFilenames(cl::Positional, cl::sub (MergeSubcommand),
147	cl::desc ("<filename...>"));
148	cl::list<std::string> WeightedInputFilenames("weighted-input",
149	cl::sub (MergeSubcommand),
150	cl::desc ("<weight>,<filename>"));
151	cl::opt<ProfileFormat> OutputFormat(
152	cl::desc ("Format of output profile"), cl::sub (MergeSubcommand),
153	cl::init(Val: PF_Ext_Binary),
154	cl::values(clEnumValN(PF_Binary, "binary", "Binary encoding"),
155	clEnumValN(PF_Ext_Binary, "extbinary",
156	"Extensible binary encoding "
157	"(default)"),
158	clEnumValN(PF_Text, "text", "Text encoding"),
159	clEnumValN(PF_GCC, "gcc",
160	"GCC encoding (only meaningful for -sample)")));
161	cl::opt<std::string>
162	InputFilenamesFile("input-files", cl::init(Val: ""), cl::sub (MergeSubcommand),
163	cl::desc ("Path to file containing newline-separated "
164	"[<weight>,]<filename> entries"));
165	cl::alias InputFilenamesFileA("f", cl::desc ("Alias for --input-files"),
166	cl::aliasopt (InputFilenamesFile));
167	cl::opt<bool> DumpInputFileList(
168	"dump-input-file-list", cl::init(Val: false), cl::Hidden,
169	cl::sub (MergeSubcommand),
170	cl::desc ("Dump the list of input files and their weights, then exit"));
171	cl::opt<std::string> RemappingFile("remapping-file", cl::value_desc ("file"),
172	cl::sub (MergeSubcommand),
173	cl::desc ("Symbol remapping file"));
174	cl::alias RemappingFileA("r", cl::desc ("Alias for --remapping-file"),
175	cl::aliasopt (RemappingFile));
176	cl::opt<bool>
177	UseMD5("use-md5", cl::init(Val: false), cl::Hidden,
178	cl::desc ("Choose to use MD5 to represent string in name table (only "
179	"meaningful for -extbinary)"),
180	cl::sub (MergeSubcommand));
181	cl::opt<bool> CompressAllSections(
182	"compress-all-sections", cl::init(Val: false), cl::Hidden,
183	cl::sub (MergeSubcommand),
184	cl::desc ("Compress all sections when writing the profile (only "
185	"meaningful for -extbinary)"));
186	cl::opt<bool> SampleMergeColdContext(
187	"sample-merge-cold-context", cl::init(Val: false), cl::Hidden,
188	cl::sub (MergeSubcommand),
189	cl::desc (
190	"Merge context sample profiles whose count is below cold threshold"));
191	cl::opt<bool> SampleTrimColdContext(
192	"sample-trim-cold-context", cl::init(Val: false), cl::Hidden,
193	cl::sub (MergeSubcommand),
194	cl::desc (
195	"Trim context sample profiles whose count is below cold threshold"));
196	cl::opt<uint32_t> SampleColdContextFrameDepth(
197	"sample-frame-depth-for-cold-context", cl::init(Val: `1`),
198	cl::sub (MergeSubcommand),
199	cl::desc ("Keep the last K frames while merging cold profile. 1 means the "
200	"context-less base profile"));
201	cl::opt<size_t> OutputSizeLimit(
202	"output-size-limit", cl::init(Val: `0`), cl::Hidden, cl::sub (MergeSubcommand),
203	cl::desc ("Trim cold functions until profile size is below specified "
204	"limit in bytes. This uses a heursitic and functions may be "
205	"excessively trimmed"));
206	cl::opt<bool> GenPartialProfile(
207	"gen-partial-profile", cl::init(Val: false), cl::Hidden,
208	cl::sub (MergeSubcommand),
209	cl::desc ("Generate a partial profile (only meaningful for -extbinary)"));
210	cl::opt<std::string> SupplInstrWithSample(
211	"supplement-instr-with-sample", cl::init(Val: ""), cl::Hidden,
212	cl::sub (MergeSubcommand),
213	cl::desc ("Supplement an instr profile with sample profile, to correct "
214	"the profile unrepresentativeness issue. The sample "
215	"profile is the input of the flag. Output will be in instr "
216	"format (The flag only works with -instr)"));
217	cl::opt<float> ZeroCounterThreshold(
218	"zero-counter-threshold", cl::init(Val: `0.7`), cl::Hidden,
219	cl::sub (MergeSubcommand),
220	cl::desc ("For the function which is cold in instr profile but hot in "
221	"sample profile, if the ratio of the number of zero counters "
222	"divided by the total number of counters is above the "
223	"threshold, the profile of the function will be regarded as "
224	"being harmful for performance and will be dropped."));
225	cl::opt<unsigned> SupplMinSizeThreshold(
226	"suppl-min-size-threshold", cl::init(Val: `10`), cl::Hidden,
227	cl::sub (MergeSubcommand),
228	cl::desc ("If the size of a function is smaller than the threshold, "
229	"assume it can be inlined by PGO early inliner and it won't "
230	"be adjusted based on sample profile."));
231	cl::opt<unsigned> InstrProfColdThreshold(
232	"instr-prof-cold-threshold", cl::init(Val: `0`), cl::Hidden,
233	cl::sub (MergeSubcommand),
234	cl::desc ("User specified cold threshold for instr profile which will "
235	"override the cold threshold got from profile summary. "));
236	// WARNING: This reservoir size value is propagated to any input indexed
237	// profiles for simplicity. Changing this value between invocations could
238	// result in sample bias.
239	cl::opt<uint64_t> TemporalProfTraceReservoirSize(
240	"temporal-profile-trace-reservoir-size", cl::init(Val: `100`),
241	cl::sub (MergeSubcommand),
242	cl::desc ("The maximum number of stored temporal profile traces (default: "
243	"100)"));
244	cl::opt<uint64_t> TemporalProfMaxTraceLength(
245	"temporal-profile-max-trace-length", cl::init(Val: `10000`),
246	cl::sub (MergeSubcommand),
247	cl::desc ("The maximum length of a single temporal profile trace "
248	"(default: 10000)"));
249	cl::opt<std::string> FuncNameNegativeFilter(
250	"no-function", cl::init(Val: ""),
251	cl::sub (MergeSubcommand),
252	cl::desc ("Exclude functions matching the filter from the output."));
253
254	cl::opt<FailureMode>
255	FailMode("failure-mode", cl::init(Val: failIfAnyAreInvalid),
256	cl::desc ("Failure mode:"), cl::sub (MergeSubcommand),
257	cl::values(clEnumValN(warnOnly, "warn",
258	"Do not fail and just print warnings."),
259	clEnumValN(failIfAnyAreInvalid, "any",
260	"Fail if any profile is invalid."),
261	clEnumValN(failIfAllAreInvalid, "all",
262	"Fail only if all profiles are invalid.")));
263
264	cl::opt<bool> OutputSparse(
265	"sparse", cl::init(Val: false), cl::sub (MergeSubcommand),
266	cl::desc ("Generate a sparse profile (only meaningful for -instr)"));
267	cl::opt<unsigned> NumThreads(
268	"num-threads", cl::init(Val: `0`), cl::sub (MergeSubcommand),
269	cl::desc ("Number of merge threads to use (default: autodetect)"));
270	cl::alias NumThreadsA("j", cl::desc ("Alias for --num-threads"),
271	cl::aliasopt (NumThreads));
272
273	cl::opt<std::string> ProfileSymbolListFile(
274	"prof-sym-list", cl::init(Val: ""), cl::sub (MergeSubcommand),
275	cl::desc ("Path to file containing the list of function symbols "
276	"used to populate profile symbol list"));
277
278	cl::opt<SampleProfileLayout> ProfileLayout(
279	"convert-sample-profile-layout",
280	cl::desc ("Convert the generated profile to a profile with a new layout"),
281	cl::sub (MergeSubcommand), cl::init(Val: SPL_None),
282	cl::values(
283	clEnumValN(SPL_Nest, "nest",
284	"Nested profile, the input should be CS flat profile"),
285	clEnumValN(SPL_Flat, "flat",
286	"Profile with nested inlinee flatten out")));
287
288	cl::opt<bool> DropProfileSymbolList(
289	"drop-profile-symbol-list", cl::init(Val: false), cl::Hidden,
290	cl::sub (MergeSubcommand),
291	cl::desc ("Drop the profile symbol list when merging AutoFDO profiles "
292	"(only meaningful for -sample)"));
293
294	cl::opt<bool> KeepVTableSymbols(
295	"keep-vtable-symbols", cl::init(Val: false), cl::Hidden,
296	cl::sub (MergeSubcommand),
297	cl::desc ("If true, keep the vtable symbols in indexed profiles"));
298
299	// Temporary support for writing the previous version of the format, to enable
300	// some forward compatibility.
301	// TODO: Consider enabling this with future version changes as well, to ease
302	// deployment of newer versions of llvm-profdata.
303	cl::opt<bool> DoWritePrevVersion(
304	"write-prev-version", cl::init(Val: false), cl::Hidden,
305	cl::desc ("Write the previous version of indexed format, to enable "
306	"some forward compatibility."));
307
308	cl::opt<memprof::IndexedVersion> MemProfVersionRequested(
309	"memprof-version", cl::Hidden, cl::sub (MergeSubcommand),
310	cl::desc ("Specify the version of the memprof format to use"),
311	cl::init(Val: memprof::Version0),
312	cl::values(clEnumValN(memprof::Version0, "0", "version 0"),
313	clEnumValN(memprof::Version1, "1", "version 1"),
314	clEnumValN(memprof::Version2, "2", "version 2"),
315	clEnumValN(memprof::Version3, "3", "version 3")));
316
317	cl::opt<bool> MemProfFullSchema(
318	"memprof-full-schema", cl::Hidden, cl::sub (MergeSubcommand),
319	cl::desc ("Use the full schema for serialization"), cl::init(Val: false));
320
321	// Options specific to overlap subcommand.
322	cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
323	cl::desc ("<base profile file>"),
324	cl::sub (OverlapSubcommand));
325	cl::opt<std::string> TestFilename(cl::Positional, cl::Required,
326	cl::desc ("<test profile file>"),
327	cl::sub (OverlapSubcommand));
328
329	cl::opt<unsigned long long> SimilarityCutoff(
330	"similarity-cutoff", cl::init(Val: `0`),
331	cl::desc ("For sample profiles, list function names (with calling context "
332	"for csspgo) for overlapped functions "
333	"with similarities below the cutoff (percentage times 10000)."),
334	cl::sub (OverlapSubcommand));
335
336	cl::opt<bool> IsCS(
337	"cs", cl::init(Val: false),
338	cl::desc ("For context sensitive PGO counts. Does not work with CSSPGO."),
339	cl::sub (OverlapSubcommand));
340
341	cl::opt<unsigned long long> OverlapValueCutoff(
342	"value-cutoff", cl::init(Val: -`1`),
343	cl::desc (
344	"Function level overlap information for every function (with calling "
345	"context for csspgo) in test "
346	"profile with max count value greater then the parameter value"),
347	cl::sub (OverlapSubcommand));
348
349	// Options specific to show subcommand.
350	cl::opt<bool> ShowCounts("counts", cl::init(Val: false),
351	cl::desc ("Show counter values for shown functions"),
352	cl::sub (ShowSubcommand));
353	cl::opt<ShowFormat>
354	SFormat("show-format", cl::init(Val: ShowFormat::Text),
355	cl::desc ("Emit output in the selected format if supported"),
356	cl::sub (ShowSubcommand),
357	cl::values(clEnumValN(ShowFormat::Text, "text",
358	"emit normal text output (default)"),
359	clEnumValN(ShowFormat::Json, "json", "emit JSON"),
360	clEnumValN(ShowFormat::Yaml, "yaml", "emit YAML")));
361	// TODO: Consider replacing this with `--show-format=text-encoding`.
362	cl::opt<bool>
363	TextFormat("text", cl::init(Val: false),
364	cl::desc ("Show instr profile data in text dump format"),
365	cl::sub (ShowSubcommand));
366	cl::opt<bool>
367	JsonFormat("json",
368	cl::desc ("Show sample profile data in the JSON format "
369	"(deprecated, please use --show-format=json)"),
370	cl::sub (ShowSubcommand));
371	cl::opt<bool> ShowIndirectCallTargets(
372	"ic-targets", cl::init(Val: false),
373	cl::desc ("Show indirect call site target values for shown functions"),
374	cl::sub (ShowSubcommand));
375	cl::opt<bool> ShowVTables("show-vtables", cl::init(Val: false),
376	cl::desc ("Show vtable names for shown functions"),
377	cl::sub (ShowSubcommand));
378	cl::opt<bool> ShowMemOPSizes(
379	"memop-sizes", cl::init(Val: false),
380	cl::desc ("Show the profiled sizes of the memory intrinsic calls "
381	"for shown functions"),
382	cl::sub (ShowSubcommand));
383	cl::opt<bool> ShowDetailedSummary("detailed-summary", cl::init(Val: false),
384	cl::desc ("Show detailed profile summary"),
385	cl::sub (ShowSubcommand));
386	cl::list<uint32_t> DetailedSummaryCutoffs(
387	cl::CommaSeparated, "detailed-summary-cutoffs",
388	cl::desc (
389	"Cutoff percentages (times 10000) for generating detailed summary"),
390	cl::value_desc ("800000,901000,999999"), cl::sub (ShowSubcommand));
391	cl::opt<bool>
392	ShowHotFuncList("hot-func-list", cl::init(Val: false),
393	cl::desc ("Show profile summary of a list of hot functions"),
394	cl::sub (ShowSubcommand));
395	cl::opt<bool> ShowAllFunctions("all-functions", cl::init(Val: false),
396	cl::desc ("Details for each and every function"),
397	cl::sub (ShowSubcommand));
398	cl::opt<bool> ShowCS("showcs", cl::init(Val: false),
399	cl::desc ("Show context sensitive counts"),
400	cl::sub (ShowSubcommand));
401	cl::opt<ProfileKinds> ShowProfileKind(
402	cl::desc ("Profile kind supported by show:"), cl::sub (ShowSubcommand),
403	cl::init(Val: instr),
404	cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
405	clEnumVal(sample, "Sample profile"),
406	clEnumVal(memory, "MemProf memory access profile")));
407	cl::opt<uint32_t> TopNFunctions(
408	"topn", cl::init(Val: `0`),
409	cl::desc ("Show the list of functions with the largest internal counts"),
410	cl::sub (ShowSubcommand));
411	cl::opt<uint32_t> ShowValueCutoff(
412	"value-cutoff", cl::init(Val: `0`),
413	cl::desc ("Set the count value cutoff. Functions with the maximum count "
414	"less than this value will not be printed out. (Default is 0)"),
415	cl::sub (ShowSubcommand));
416	cl::opt<bool> OnlyListBelow(
417	"list-below-cutoff", cl::init(Val: false),
418	cl::desc ("Only output names of functions whose max count values are "
419	"below the cutoff value"),
420	cl::sub (ShowSubcommand));
421	cl::opt<bool> ShowProfileSymbolList(
422	"show-prof-sym-list", cl::init(Val: false),
423	cl::desc ("Show profile symbol list if it exists in the profile. "),
424	cl::sub (ShowSubcommand));
425	cl::opt<bool> ShowSectionInfoOnly(
426	"show-sec-info-only", cl::init(Val: false),
427	cl::desc ("Show the information of each section in the sample profile. "
428	"The flag is only usable when the sample profile is in "
429	"extbinary format"),
430	cl::sub (ShowSubcommand));
431	cl::opt<bool> ShowBinaryIds("binary-ids", cl::init(Val: false),
432	cl::desc ("Show binary ids in the profile. "),
433	cl::sub (ShowSubcommand));
434	cl::opt<bool> ShowTemporalProfTraces(
435	"temporal-profile-traces",
436	cl::desc ("Show temporal profile traces in the profile."),
437	cl::sub (ShowSubcommand));
438
439	cl::opt<bool>
440	ShowCovered("covered", cl::init(Val: false),
441	cl::desc ("Show only the functions that have been executed."),
442	cl::sub (ShowSubcommand));
443
444	cl::opt<bool> ShowProfileVersion("profile-version", cl::init(Val: false),
445	cl::desc ("Show profile version. "),
446	cl::sub (ShowSubcommand));
447
448	// Options specific to order subcommand.
449	cl::opt<unsigned>
450	NumTestTraces("num-test-traces", cl::init(Val: `0`),
451	cl::desc ("Keep aside the last <num-test-traces> traces in "
452	"the profile when computing the function order and "
453	"instead use them to evaluate that order"),
454	cl::sub (OrderSubcommand));
455
456	// We use this string to indicate that there are
457	// multiple static functions map to the same name.
458	const std::string DuplicateNameStr = "----";
459
460	static void warn(Twine Message, StringRef Whence = "", StringRef Hint = "") {
461	WithColor::warning();
462	if (!Whence.empty())
463	errs() << Whence << ": ";
464	errs() << Message << "\n";
465	if (!Hint.empty())
466	WithColor::note() << Hint << "\n";
467	}
468
469	static void warn(Error E, StringRef Whence = "") {
470	if (E.isA<InstrProfError>()) {
471	handleAllErrors(E: std::move(E), Handlers: [&](const InstrProfError &IPE) {
472	warn(Message: IPE.message(), Whence);
473	});
474	}
475	}
476
477	static void exitWithError(Twine Message, StringRef Whence = "",
478	StringRef Hint = "") {
479	WithColor::error();
480	if (!Whence.empty())
481	errs() << Whence << ": ";
482	errs() << Message << "\n";
483	if (!Hint.empty())
484	WithColor::note() << Hint << "\n";
485	::exit(status: `1`);
486	}
487
488	static void exitWithError(Error E, StringRef Whence = "") {
489	if (E.isA<InstrProfError>()) {
490	handleAllErrors(E: std::move(E), Handlers: [&](const InstrProfError &IPE) {
491	instrprof_error instrError = IPE.get();
492	StringRef Hint = "";
493	if (instrError == instrprof_error::unrecognized_format) {
494	// Hint in case user missed specifying the profile type.
495	Hint = "Perhaps you forgot to use the --sample or --memory option?";
496	}
497	exitWithError(Message: IPE.message(), Whence, Hint);
498	});
499	return;
500	}
501
502	exitWithError(Message: toString(E: std::move(E)), Whence);
503	}
504
505	static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
506	exitWithError(Message: EC.message(), Whence);
507	}
508
509	static void warnOrExitGivenError(FailureMode FailMode, std::error_code EC,
510	StringRef Whence = "") {
511	if (FailMode == failIfAnyAreInvalid)
512	exitWithErrorCode(EC, Whence);
513	else
514	warn(Message: EC.message(), Whence);
515	}
516
517	static void handleMergeWriterError(Error E, StringRef WhenceFile = "",
518	StringRef WhenceFunction = "",
519	bool ShowHint = true) {
520	if (!WhenceFile.empty())
521	errs() << WhenceFile << ": ";
522	if (!WhenceFunction.empty())
523	errs() << WhenceFunction << ": ";
524
525	auto IPE = instrprof_error::success;
526	E = handleErrors(E: std::move(E),
527	Hs: [&IPE](std::unique_ptr<InstrProfError> E) -> Error {
528	IPE = E ->get();
529	return Error (std::move(E));
530	});
531	errs() << toString(E: std::move(E)) << "\n";
532
533	if (ShowHint) {
534	StringRef Hint = "";
535	if (IPE != instrprof_error::success) {
536	switch (IPE) {
537	case instrprof_error::hash_mismatch:
538	case instrprof_error::count_mismatch:
539	case instrprof_error::value_site_count_mismatch:
540	Hint = "Make sure that all profile data to be merged is generated "
541	"from the same binary.";
542	break;
543	default:
544	break;
545	}
546	}
547
548	if (!Hint.empty())
549	errs() << Hint << "\n";
550	}
551	}
552
553	namespace {
554	/// A remapper from original symbol names to new symbol names based on a file
555	/// containing a list of mappings from old name to new name.
556	class SymbolRemapper {
557	std::unique_ptr<MemoryBuffer> File;
558	DenseMap<StringRef, StringRef> RemappingTable;
559
560	public:
561	/// Build a SymbolRemapper from a file containing a list of old/new symbols.
562	static std::unique_ptr<SymbolRemapper> create(StringRef InputFile) {
563	auto BufOrError = MemoryBuffer::getFileOrSTDIN(Filename: InputFile);
564	if (!BufOrError)
565	exitWithErrorCode(EC: BufOrError.getError(), Whence: InputFile);
566
567	auto Remapper = std::make_unique<SymbolRemapper>();
568	Remapper ->File = std::move(BufOrError.get());
569
570	for (line_iterator LineIt(Remapper ->File, /SkipBlanks=/*true, `'#'`);
571	!LineIt.is_at_eof(); ++LineIt) {
572	std::pair<StringRef, StringRef> Parts = LineIt ->split(Separator: `' '`);
573	if (Parts.first.empty() \|\| Parts.second.empty() \|\|
574	Parts.second.count(C: `' '`)) {
575	exitWithError(Message: "unexpected line in remapping file",
576	Whence: (InputFile + ":" + Twine (LineIt.line_number())).str(),
577	Hint: "expected 'old_symbol new_symbol'");
578	}
579	Remapper ->RemappingTable.insert(KV: Parts);
580	}
581	return Remapper;
582	}
583
584	/// Attempt to map the given old symbol into a new symbol.
585	///
586	/// \return The new symbol, or \p Name if no such symbol was found.
587	StringRef operator()(StringRef Name) {
588	StringRef New = RemappingTable.lookup(Val: Name);
589	return New.empty() ? Name : New;
590	}
591
592	FunctionId operator()(FunctionId Name) {
593	// MD5 name cannot be remapped.
594	if (!Name.isStringRef())
595	return Name;
596	StringRef New = RemappingTable.lookup(Val: Name.stringRef());
597	return New.empty() ? Name : FunctionId (New);
598	}
599	};
600	}
601
602	struct WeightedFile {
603	std::string Filename;
604	uint64_t Weight;
605	};
606	typedef SmallVector<WeightedFile, `5`> WeightedFileVector;
607
608	/// Keep track of merged data and reported errors.
609	struct WriterContext {
610	std::mutex Lock;
611	InstrProfWriter Writer;
612	std::vector<std::pair<Error, std::string>> Errors;
613	std::mutex &ErrLock;
614	SmallSet<instrprof_error, `4`> &WriterErrorCodes;
615
616	WriterContext(bool IsSparse, std::mutex &ErrLock,
617	SmallSet<instrprof_error, `4`> &WriterErrorCodes,
618	uint64_t ReservoirSize = `0`, uint64_t MaxTraceLength = `0`)
619	: Writer (IsSparse, ReservoirSize, MaxTraceLength, DoWritePrevVersion,
620	MemProfVersionRequested, MemProfFullSchema),
621	ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {}
622	};
623
624	/// Computer the overlap b/w profile BaseFilename and TestFileName,
625	/// and store the program level result to Overlap.
626	static void overlapInput(const std::string &BaseFilename,
627	const std::string &TestFilename, WriterContext *WC,
628	OverlapStats &Overlap,
629	const OverlapFuncFilters &FuncFilter,
630	raw_fd_ostream &OS, bool IsCS) {
631	auto FS = vfs::getRealFileSystem();
632	auto ReaderOrErr = InstrProfReader::create(Path: TestFilename, FS&: *FS);
633	if (Error E = ReaderOrErr.takeError()) {
634	// Skip the empty profiles by returning sliently.
635	auto [ErrorCode, Msg] = InstrProfError::take(E: std::move(E));
636	if (ErrorCode != instrprof_error::empty_raw_profile)
637	WC->Errors.emplace_back(args: make_error<InstrProfError>(Args&: ErrorCode, Args&: Msg),
638	args: TestFilename);
639	return;
640	}
641
642	auto Reader = std::move(ReaderOrErr.get());
643	for (auto &I : *Reader) {
644	OverlapStats FuncOverlap(OverlapStats::FunctionLevel);
645	FuncOverlap.setFuncInfo(Name: I.Name, Hash: I.Hash);
646
647	WC->Writer.overlapRecord(Other: std::move(I), Overlap, FuncLevelOverlap&: FuncOverlap, FuncFilter);
648	FuncOverlap.dump(OS);
649	}
650	}
651
652	/// Load an input into a writer context.
653	static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
654	const InstrProfCorrelator *Correlator,
655	const StringRef ProfiledBinary, WriterContext *WC) {
656	std::unique_lock<std::mutex> CtxGuard{WC->Lock};
657
658	// Copy the filename, because llvm::ThreadPool copied the input "const
659	// WeightedFile &" by value, making a reference to the filename within it
660	// invalid outside of this packaged task.
661	std::string Filename = Input.Filename;
662
663	using ::llvm::memprof::RawMemProfReader;
664	if (RawMemProfReader::hasFormat(Path: Input.Filename)) {
665	auto ReaderOrErr = RawMemProfReader::create(Path: Input.Filename, ProfiledBinary);
666	if (!ReaderOrErr) {
667	exitWithError(E: ReaderOrErr.takeError(), Whence: Input.Filename);
668	}
669	std::unique_ptr<RawMemProfReader> Reader = std::move(ReaderOrErr.get());
670	// Check if the profile types can be merged, e.g. clang frontend profiles
671	// should not be merged with memprof profiles.
672	if (Error E = WC->Writer.mergeProfileKind(Other: Reader ->getProfileKind())) {
673	consumeError(Err: std::move(E));
674	WC->Errors.emplace_back(
675	args: make_error<StringError>(
676	Args: "Cannot merge MemProf profile with Clang generated profile.",
677	Args: std::error_code ()),
678	args&: Filename);
679	return;
680	}
681
682	auto MemProfError = [&](Error E) {
683	auto [ErrorCode, Msg] = InstrProfError::take(E: std::move(E));
684	WC->Errors.emplace_back(args: make_error<InstrProfError>(Args&: ErrorCode, Args&: Msg),
685	args&: Filename);
686	};
687
688	// Add the frame mappings into the writer context.
689	const auto &IdToFrame = Reader ->getFrameMapping();
690	for (const auto &I : IdToFrame) {
691	bool Succeeded = WC->Writer.addMemProfFrame(
692	/Id=/I.first, /Frame=/F: I.getSecond(), Warn: MemProfError);
693	// If we weren't able to add the frame mappings then it doesn't make sense
694	// to try to add the records from this profile.
695	if (!Succeeded)
696	return;
697	}
698
699	// Add the call stacks into the writer context.
700	const auto &CSIdToCallStacks = Reader ->getCallStacks();
701	for (const auto &I : CSIdToCallStacks) {
702	bool Succeeded = WC->Writer.addMemProfCallStack(
703	/Id=/CSId: I.first, /Frame=/CallStack: I.getSecond(), Warn: MemProfError);
704	// If we weren't able to add the call stacks then it doesn't make sense
705	// to try to add the records from this profile.
706	if (!Succeeded)
707	return;
708	}
709
710	const auto &FunctionProfileData = Reader ->getProfileData();
711	// Add the memprof records into the writer context.
712	for (const auto &[GUID, Record] : FunctionProfileData) {
713	WC->Writer.addMemProfRecord(Id: GUID, Record);
714	}
715	return;
716	}
717
718	auto FS = vfs::getRealFileSystem();
719	// TODO: This only saves the first non-fatal error from InstrProfReader, and
720	// then added to WriterContext::Errors. However, this is not extensible, if
721	// we have more non-fatal errors from InstrProfReader in the future. How
722	// should this interact with different -failure-mode?
723	std::optional<std::pair<Error, std::string>> ReaderWarning;
724	auto Warn = [&](Error E) {
725	if (ReaderWarning) {
726	consumeError(Err: std::move(E));
727	return;
728	}
729	// Only show the first time an error occurs in this file.
730	auto [ErrCode, Msg] = InstrProfError::take(E: std::move(E));
731	ReaderWarning = {make_error<InstrProfError>(Args&: ErrCode, Args&: Msg), Filename};
732	};
733	auto ReaderOrErr =
734	InstrProfReader::create(Path: Input.Filename, FS&: *FS, Correlator, Warn);
735	if (Error E = ReaderOrErr.takeError()) {
736	// Skip the empty profiles by returning silently.
737	auto [ErrCode, Msg] = InstrProfError::take(E: std::move(E));
738	if (ErrCode != instrprof_error::empty_raw_profile)
739	WC->Errors.emplace_back(args: make_error<InstrProfError>(Args&: ErrCode, Args&: Msg),
740	args&: Filename);
741	return;
742	}
743
744	auto Reader = std::move(ReaderOrErr.get());
745	if (Error E = WC->Writer.mergeProfileKind(Other: Reader ->getProfileKind())) {
746	consumeError(Err: std::move(E));
747	WC->Errors.emplace_back(
748	args: make_error<StringError>(
749	Args: "Merge IR generated profile with Clang generated profile.",
750	Args: std::error_code ()),
751	args&: Filename);
752	return;
753	}
754
755	for (auto &I : *Reader) {
756	if (Remapper)
757	I.Name = (*Remapper)(I.Name);
758	const StringRef FuncName = I.Name;
759	bool Reported = false;
760	WC->Writer.addRecord(I: std::move(I), Weight: Input.Weight, Warn: [&](Error E) {
761	if (Reported) {
762	consumeError(Err: std::move(E));
763	return;
764	}
765	Reported = true;
766	// Only show hint the first time an error occurs.
767	auto [ErrCode, Msg] = InstrProfError::take(E: std::move(E));
768	std::unique_lock<std::mutex> ErrGuard{WC->ErrLock};
769	bool firstTime = WC->WriterErrorCodes.insert(V: ErrCode).second;
770	handleMergeWriterError(E: make_error<InstrProfError>(Args&: ErrCode, Args&: Msg),
771	WhenceFile: Input.Filename, WhenceFunction: FuncName, ShowHint: firstTime);
772	});
773	}
774
775	if (KeepVTableSymbols) {
776	const InstrProfSymtab &symtab = Reader ->getSymtab();
777	const auto &VTableNames = symtab.getVTableNames();
778
779	for (const auto &kv : VTableNames)
780	WC->Writer.addVTableName(VTableName: kv.getKey());
781	}
782
783	if (Reader ->hasTemporalProfile()) {
784	auto &Traces = Reader ->getTemporalProfTraces(Weight: Input.Weight);
785	if (!Traces.empty())
786	WC->Writer.addTemporalProfileTraces(
787	SrcTraces&: Traces, SrcStreamSize: Reader ->getTemporalProfTraceStreamSize());
788	}
789	if (Reader ->hasError()) {
790	if (Error E = Reader ->getError()) {
791	WC->Errors.emplace_back(args: std::move(E), args&: Filename);
792	return;
793	}
794	}
795
796	std::vector<llvm::object::BuildID> BinaryIds;
797	if (Error E = Reader ->readBinaryIds(BinaryIds)) {
798	WC->Errors.emplace_back(args: std::move(E), args&: Filename);
799	return;
800	}
801	WC->Writer.addBinaryIds(BIs: BinaryIds);
802
803	if (ReaderWarning) {
804	WC->Errors.emplace_back(args: std::move(ReaderWarning ->first),
805	args&: ReaderWarning ->second);
806	}
807	}
808
809	/// Merge the \p Src writer context into \p Dst.
810	static void mergeWriterContexts(WriterContext Dst, WriterContext Src) {
811	for (auto &ErrorPair : Src->Errors)
812	Dst->Errors.push_back(x: std::move(ErrorPair));
813	Src->Errors.clear();
814
815	if (Error E = Dst->Writer.mergeProfileKind(Other: Src->Writer.getProfileKind()))
816	exitWithError(E: std::move(E));
817
818	Dst->Writer.mergeRecordsFromWriter(IPW: std::move(Src->Writer), Warn: [&](Error E) {
819	auto [ErrorCode, Msg] = InstrProfError::take(E: std::move(E));
820	std::unique_lock<std::mutex> ErrGuard{Dst->ErrLock};
821	bool firstTime = Dst->WriterErrorCodes.insert(V: ErrorCode).second;
822	if (firstTime)
823	warn(Message: toString(E: make_error<InstrProfError>(Args&: ErrorCode, Args&: Msg)));
824	});
825	}
826
827	static StringRef
828	getFuncName(const StringMap<InstrProfWriter::ProfilingData>::value_type &Val) {
829	return Val.first();
830	}
831
832	static std::string
833	getFuncName(const SampleProfileMap::value_type &Val) {
834	return Val.second.getContext().toString();
835	}
836
837	template <typename T>
838	static void filterFunctions(T &ProfileMap) {
839	bool hasFilter = !FuncNameFilter.empty();
840	bool hasNegativeFilter = !FuncNameNegativeFilter.empty();
841	if (!hasFilter && !hasNegativeFilter)
842	return;
843
844	// If filter starts with '?' it is MSVC mangled name, not a regex.
845	llvm::Regex ProbablyMSVCMangledName("[?@$_0-9A-Za-z]+");
846	if (hasFilter && FuncNameFilter [`0`] == `'?'` &&
847	ProbablyMSVCMangledName.match(String: FuncNameFilter))
848	FuncNameFilter = llvm::Regex::escape(String: FuncNameFilter);
849	if (hasNegativeFilter && FuncNameNegativeFilter [`0`] == `'?'` &&
850	ProbablyMSVCMangledName.match(String: FuncNameNegativeFilter))
851	FuncNameNegativeFilter = llvm::Regex::escape(String: FuncNameNegativeFilter);
852
853	size_t Count = ProfileMap.size();
854	llvm::Regex Pattern(FuncNameFilter);
855	llvm::Regex NegativePattern(FuncNameNegativeFilter);
856	std::string Error;
857	if (hasFilter && !Pattern.isValid(Error))
858	exitWithError(Message: Error);
859	if (hasNegativeFilter && !NegativePattern.isValid(Error))
860	exitWithError(Message: Error);
861
862	// Handle MD5 profile, so it is still able to match using the original name.
863	std::string MD5Name = std::to_string(val: llvm::MD5Hash(Str: FuncNameFilter));
864	std::string NegativeMD5Name =
865	std::to_string(val: llvm::MD5Hash(Str: FuncNameNegativeFilter));
866
867	for (auto I = ProfileMap.begin(); I != ProfileMap.end();) {
868	auto Tmp = I++;
869	const auto &FuncName = getFuncName(*Tmp);
870	// Negative filter has higher precedence than positive filter.
871	if ((hasNegativeFilter &&
872	(NegativePattern.match(String: FuncName) \|\|
873	(FunctionSamples::UseMD5 && NegativeMD5Name == FuncName))) \|\|
874	(hasFilter && !(Pattern.match(String: FuncName) \|\|
875	(FunctionSamples::UseMD5 && MD5Name == FuncName))))
876	ProfileMap.erase(Tmp);
877	}
878
879	llvm::dbgs() << Count - ProfileMap.size() << " of " << Count << " functions "
880	<< "in the original profile are filtered.\n";
881	}
882
883	static void writeInstrProfile(StringRef OutputFilename,
884	ProfileFormat OutputFormat,
885	InstrProfWriter &Writer) {
886	std::error_code EC;
887	raw_fd_ostream Output(OutputFilename.data(), EC,
888	OutputFormat == PF_Text ? sys::fs::OF_TextWithCRLF
889	: sys::fs::OF_None);
890	if (EC)
891	exitWithErrorCode(EC, Whence: OutputFilename);
892
893	if (OutputFormat == PF_Text) {
894	if (Error E = Writer.writeText(OS&: Output))
895	warn(E: std::move(E));
896	} else {
897	if (Output.is_displayed())
898	exitWithError(Message: "cannot write a non-text format profile to the terminal");
899	if (Error E = Writer.write(OS&: Output))
900	warn(E: std::move(E));
901	}
902	}
903
904	static void mergeInstrProfile(const WeightedFileVector &Inputs,
905	SymbolRemapper *Remapper,
906	int MaxDbgCorrelationWarnings,
907	const StringRef ProfiledBinary) {
908	const uint64_t TraceReservoirSize = TemporalProfTraceReservoirSize.getValue();
909	const uint64_t MaxTraceLength = TemporalProfMaxTraceLength.getValue();
910	if (OutputFormat == PF_Compact_Binary)
911	exitWithError(Message: "Compact Binary is deprecated");
912	if (OutputFormat != PF_Binary && OutputFormat != PF_Ext_Binary &&
913	OutputFormat != PF_Text)
914	exitWithError(Message: "unknown format is specified");
915
916	// TODO: Maybe we should support correlation with mixture of different
917	// correlation modes(w/wo debug-info/object correlation).
918	if (!DebugInfoFilename.empty() && !BinaryFilename.empty())
919	exitWithError(Message: "Expected only one of -debug-info, -binary-file");
920	std::string CorrelateFilename;
921	ProfCorrelatorKind CorrelateKind = ProfCorrelatorKind::NONE;
922	if (!DebugInfoFilename.empty()) {
923	CorrelateFilename = DebugInfoFilename;
924	CorrelateKind = ProfCorrelatorKind::DEBUG_INFO;
925	} else if (!BinaryFilename.empty()) {
926	CorrelateFilename = BinaryFilename;
927	CorrelateKind = ProfCorrelatorKind::BINARY;
928	}
929
930	std::unique_ptr<InstrProfCorrelator> Correlator;
931	if (CorrelateKind != InstrProfCorrelator::NONE) {
932	if (auto Err = InstrProfCorrelator::get(Filename: CorrelateFilename, FileKind: CorrelateKind)
933	.moveInto(Value&: Correlator))
934	exitWithError(E: std::move(Err), Whence: CorrelateFilename);
935	if (auto Err = Correlator ->correlateProfileData(MaxWarnings: MaxDbgCorrelationWarnings))
936	exitWithError(E: std::move(Err), Whence: CorrelateFilename);
937	}
938
939	std::mutex ErrorLock;
940	SmallSet<instrprof_error, `4`> WriterErrorCodes;
941
942	// If NumThreads is not specified, auto-detect a good default.
943	if (NumThreads == `0`)
944	NumThreads = std::min(a: hardware_concurrency().compute_thread_count(),
945	b: unsigned((Inputs.size() + `1`) / `2`));
946
947	// Initialize the writer contexts.
948	SmallVector<std::unique_ptr<WriterContext>, `4`> Contexts;
949	for (unsigned I = `0`; I < NumThreads; ++I)
950	Contexts.emplace_back(Args: std::make_unique<WriterContext>(
951	args&: OutputSparse, args&: ErrorLock, args&: WriterErrorCodes, args: TraceReservoirSize,
952	args: MaxTraceLength));
953
954	if (NumThreads == `1`) {
955	for (const auto &Input : Inputs)
956	loadInput(Input, Remapper, Correlator: Correlator.get(), ProfiledBinary,
957	WC: Contexts [`0`].get());
958	} else {
959	DefaultThreadPool Pool(hardware_concurrency(ThreadCount: NumThreads));
960
961	// Load the inputs in parallel (N/NumThreads serial steps).
962	unsigned Ctx = `0`;
963	for (const auto &Input : Inputs) {
964	Pool.async(F&: loadInput, ArgList: Input, ArgList&: Remapper, ArgList: Correlator.get(), ArgList: ProfiledBinary,
965	ArgList: Contexts [Ctx].get());
966	Ctx = (Ctx + `1`) % NumThreads;
967	}
968	Pool.wait();
969
970	// Merge the writer contexts together (~ lg(NumThreads) serial steps).
971	unsigned Mid = Contexts.size() / `2`;
972	unsigned End = Contexts.size();
973	assert(Mid > `0` && "Expected more than one context");
974	do {
975	for (unsigned I = `0`; I < Mid; ++I)
976	Pool.async(F&: mergeWriterContexts, ArgList: Contexts [I].get(),
977	ArgList: Contexts [I + Mid].get());
978	Pool.wait();
979	if (End & `1`) {
980	Pool.async(F&: mergeWriterContexts, ArgList: Contexts [`0`].get(),
981	ArgList: Contexts [End - `1`].get());
982	Pool.wait();
983	}
984	End = Mid;
985	Mid /= `2`;
986	} while (Mid > `0`);
987	}
988
989	// Handle deferred errors encountered during merging. If the number of errors
990	// is equal to the number of inputs the merge failed.
991	unsigned NumErrors = `0`;
992	for (std::unique_ptr<WriterContext> &WC : Contexts) {
993	for (auto &ErrorPair : WC ->Errors) {
994	++NumErrors;
995	warn(Message: toString(E: std::move(ErrorPair.first)), Whence: ErrorPair.second);
996	}
997	}
998	if ((NumErrors == Inputs.size() && FailMode == failIfAllAreInvalid) \|\|
999	(NumErrors > `0` && FailMode == failIfAnyAreInvalid))
1000	exitWithError(Message: "no profile can be merged");
1001
1002	filterFunctions(ProfileMap&: Contexts [`0`]->Writer.getProfileData());
1003
1004	writeInstrProfile(OutputFilename, OutputFormat, Writer&: Contexts [`0`]->Writer);
1005	}
1006
1007	/// The profile entry for a function in instrumentation profile.
1008	struct InstrProfileEntry {
1009	uint64_t MaxCount = `0`;
1010	uint64_t NumEdgeCounters = `0`;
1011	float ZeroCounterRatio = `0.0`;
1012	InstrProfRecord *ProfRecord;
1013	InstrProfileEntry(InstrProfRecord *Record);
1014	InstrProfileEntry() = default;
1015	};
1016
1017	InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) {
1018	ProfRecord = Record;
1019	uint64_t CntNum = Record->Counts.size();
1020	uint64_t ZeroCntNum = `0`;
1021	for (size_t I = `0`; I < CntNum; ++I) {
1022	MaxCount = std::max(a: MaxCount, b: Record->Counts [I]);
1023	ZeroCntNum += !Record->Counts [I];
1024	}
1025	ZeroCounterRatio = (float)ZeroCntNum / CntNum;
1026	NumEdgeCounters = CntNum;
1027	}
1028
1029	/// Either set all the counters in the instr profile entry \p IFE to
1030	/// -1 / -2 /in order to drop the profile or scale up the
1031	/// counters in \p IFP to be above hot / cold threshold. We use
1032	/// the ratio of zero counters in the profile of a function to
1033	/// decide the profile is helpful or harmful for performance,
1034	/// and to choose whether to scale up or drop it.
1035	static void updateInstrProfileEntry(InstrProfileEntry &IFE, bool SetToHot,
1036	uint64_t HotInstrThreshold,
1037	uint64_t ColdInstrThreshold,
1038	float ZeroCounterThreshold) {
1039	InstrProfRecord *ProfRecord = IFE.ProfRecord;
1040	if (!IFE.MaxCount \|\| IFE.ZeroCounterRatio > ZeroCounterThreshold) {
1041	// If all or most of the counters of the function are zero, the
1042	// profile is unaccountable and should be dropped. Reset all the
1043	// counters to be -1 / -2 and PGO profile-use will drop the profile.
1044	// All counters being -1 also implies that the function is hot so
1045	// PGO profile-use will also set the entry count metadata to be
1046	// above hot threshold.
1047	// All counters being -2 implies that the function is warm so
1048	// PGO profile-use will also set the entry count metadata to be
1049	// above cold threshold.
1050	auto Kind =
1051	(SetToHot ? InstrProfRecord::PseudoHot : InstrProfRecord::PseudoWarm);
1052	ProfRecord->setPseudoCount(Kind);
1053	return;
1054	}
1055
1056	// Scale up the MaxCount to be multiple times above hot / cold threshold.
1057	const unsigned MultiplyFactor = `3`;
1058	uint64_t Threshold = (SetToHot ? HotInstrThreshold : ColdInstrThreshold);
1059	uint64_t Numerator = Threshold * MultiplyFactor;
1060
1061	// Make sure Threshold for warm counters is below the HotInstrThreshold.
1062	if (!SetToHot && Threshold >= HotInstrThreshold) {
1063	Threshold = (HotInstrThreshold + ColdInstrThreshold) / `2`;
1064	}
1065
1066	uint64_t Denominator = IFE.MaxCount;
1067	if (Numerator <= Denominator)
1068	return;
1069	ProfRecord->scale(N: Numerator, D: Denominator, Warn: [&](instrprof_error E) {
1070	warn(Message: toString(E: make_error<InstrProfError>(Args&: E)));
1071	});
1072	}
1073
1074	const uint64_t ColdPercentileIdx = `15`;
1075	const uint64_t HotPercentileIdx = `11`;
1076
1077	using sampleprof::FSDiscriminatorPass;
1078
1079	// Internal options to set FSDiscriminatorPass. Used in merge and show
1080	// commands.
1081	static cl::opt<FSDiscriminatorPass> FSDiscriminatorPassOption(
1082	"fs-discriminator-pass", cl::init(Val: PassLast), cl::Hidden,
1083	cl::desc ("Zero out the discriminator bits for the FS discrimiantor "
1084	"pass beyond this value. The enum values are defined in "
1085	"Support/Discriminator.h"),
1086	cl::values(clEnumVal(Base, "Use base discriminators only"),
1087	clEnumVal(Pass1, "Use base and pass 1 discriminators"),
1088	clEnumVal(Pass2, "Use base and pass 1-2 discriminators"),
1089	clEnumVal(Pass3, "Use base and pass 1-3 discriminators"),
1090	clEnumVal(PassLast, "Use all discriminator bits (default)")));
1091
1092	static unsigned getDiscriminatorMask() {
1093	return getN1Bits(N: getFSPassBitEnd(P: FSDiscriminatorPassOption.getValue()));
1094	}
1095
1096	/// Adjust the instr profile in \p WC based on the sample profile in
1097	/// \p Reader.
1098	static void
1099	adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
1100	std::unique_ptr<sampleprof::SampleProfileReader> &Reader,
1101	unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
1102	unsigned InstrProfColdThreshold) {
1103	// Function to its entry in instr profile.
1104	StringMap<InstrProfileEntry> InstrProfileMap;
1105	StringMap<StringRef> StaticFuncMap;
1106	InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs);
1107
1108	auto checkSampleProfileHasFUnique = [&Reader]() {
1109	for (const auto &PD : Reader ->getProfiles()) {
1110	auto &FContext = PD.second.getContext();
1111	if (FContext.toString().find(s: FunctionSamples::UniqSuffix) !=
1112	std::string::npos) {
1113	return true;
1114	}
1115	}
1116	return false;
1117	};
1118
1119	bool SampleProfileHasFUnique = checkSampleProfileHasFUnique ();
1120
1121	auto buildStaticFuncMap = [&StaticFuncMap,
1122	SampleProfileHasFUnique](const StringRef Name) {
1123	std::string FilePrefixes[] = {".cpp", "cc", ".c", ".hpp", ".h"};
1124	size_t PrefixPos = StringRef::npos;
1125	for (auto &FilePrefix : FilePrefixes) {
1126	std::string NamePrefix = FilePrefix + GlobalIdentifierDelimiter;
1127	PrefixPos = Name.find_insensitive(Str: NamePrefix);
1128	if (PrefixPos == StringRef::npos)
1129	continue;
1130	PrefixPos += NamePrefix.size();
1131	break;
1132	}
1133
1134	if (PrefixPos == StringRef::npos) {
1135	return;
1136	}
1137
1138	StringRef NewName = Name.drop_front(N: PrefixPos);
1139	StringRef FName = Name.substr(Start: `0`, N: PrefixPos - `1`);
1140	if (NewName.size() == `0`) {
1141	return;
1142	}
1143
1144	// This name should have a static linkage.
1145	size_t PostfixPos = NewName.find(Str: FunctionSamples::UniqSuffix);
1146	bool ProfileHasFUnique = (PostfixPos != StringRef::npos);
1147
1148	// If sample profile and instrumented profile do not agree on symbol
1149	// uniqification.
1150	if (SampleProfileHasFUnique != ProfileHasFUnique) {
1151	// If instrumented profile uses -funique-internal-linkage-symbols,
1152	// we need to trim the name.
1153	if (ProfileHasFUnique) {
1154	NewName = NewName.substr(Start: `0`, N: PostfixPos);
1155	} else {
1156	// If sample profile uses -funique-internal-linkage-symbols,
1157	// we build the map.
1158	std::string NStr =
1159	NewName.str() + getUniqueInternalLinkagePostfix(FName);
1160	NewName = StringRef (NStr);
1161	StaticFuncMap [NewName] = Name;
1162	return;
1163	}
1164	}
1165
1166	if (!StaticFuncMap.contains(Key: NewName)) {
1167	StaticFuncMap [NewName] = Name;
1168	} else {
1169	StaticFuncMap [NewName] = DuplicateNameStr;
1170	}
1171	};
1172
1173	// We need to flatten the SampleFDO profile as the InstrFDO
1174	// profile does not have inlined callsite profiles.
1175	// One caveat is the pre-inlined function -- their samples
1176	// should be collapsed into the caller function.
1177	// Here we do a DFS traversal to get the flatten profile
1178	// info: the sum of entrycount and the max of maxcount.
1179	// Here is the algorithm:
1180	// recursive (FS, root_name) {
1181	// name = FS->getName();
1182	// get samples for FS;
1183	// if (InstrProf.find(name) {
1184	// root_name = name;
1185	// } else {
1186	// if (name is in static_func map) {
1187	// root_name = static_name;
1188	// }
1189	// }
1190	// update the Map entry for root_name;
1191	// for (subfs: FS) {
1192	// recursive(subfs, root_name);
1193	// }
1194	// }
1195	//
1196	// Here is an example.
1197	//
1198	// SampleProfile:
1199	// foo:12345:1000
1200	// 1: 1000
1201	// 2.1: 1000
1202	// 15: 5000
1203	// 4: bar:1000
1204	// 1: 1000
1205	// 2: goo:3000
1206	// 1: 3000
1207	// 8: bar:40000
1208	// 1: 10000
1209	// 2: goo:30000
1210	// 1: 30000
1211	//
1212	// InstrProfile has two entries:
1213	// foo
1214	// bar.cc;bar
1215	//
1216	// After BuildMaxSampleMap, we should have the following in FlattenSampleMap:
1217	// {"foo", {1000, 5000}}
1218	// {"bar.cc;bar", {11000, 30000}}
1219	//
1220	// foo's has an entry count of 1000, and max body count of 5000.
1221	// bar.cc;bar has an entry count of 11000 (sum two callsites of 1000 and
1222	// 10000), and max count of 30000 (from the callsite in line 8).
1223	//
1224	// Note that goo's count will remain in bar.cc;bar() as it does not have an
1225	// entry in InstrProfile.
1226	llvm::StringMap<std::pair<uint64_t, uint64_t>> FlattenSampleMap;
1227	auto BuildMaxSampleMap = [&FlattenSampleMap, &StaticFuncMap,
1228	&InstrProfileMap](const FunctionSamples &FS,
1229	const StringRef &RootName) {
1230	auto BuildMaxSampleMapImpl = [&](const FunctionSamples &FS,
1231	const StringRef &RootName,
1232	auto &BuildImpl) -> void {
1233	std::string NameStr = FS.getFunction().str();
1234	const StringRef Name = NameStr;
1235	const StringRef *NewRootName = &RootName;
1236	uint64_t EntrySample = FS.getHeadSamplesEstimate();
1237	uint64_t MaxBodySample = FS.getMaxCountInside(/ SkipCallSite/ true);
1238
1239	auto It = InstrProfileMap.find(Key: Name);
1240	if (It != InstrProfileMap.end()) {
1241	NewRootName = &Name;
1242	} else {
1243	auto NewName = StaticFuncMap.find(Key: Name);
1244	if (NewName != StaticFuncMap.end()) {
1245	It = InstrProfileMap.find(Key: NewName ->second.str());
1246	if (NewName ->second != DuplicateNameStr) {
1247	NewRootName = &NewName ->second;
1248	}
1249	} else {
1250	// Here the EntrySample is of an inlined function, so we should not
1251	// update the EntrySample in the map.
1252	EntrySample = `0`;
1253	}
1254	}
1255	EntrySample += FlattenSampleMap [*NewRootName].first;
1256	MaxBodySample =
1257	std::max(a: FlattenSampleMap [*NewRootName].second, b: MaxBodySample);
1258	FlattenSampleMap [*NewRootName] =
1259	std::make_pair(x&: EntrySample, y&: MaxBodySample);
1260
1261	for (const auto &C : FS.getCallsiteSamples())
1262	for (const auto &F : C.second)
1263	BuildImpl(F.second, *NewRootName, BuildImpl);
1264	};
1265	BuildMaxSampleMapImpl(FS, RootName, BuildMaxSampleMapImpl);
1266	};
1267
1268	for (auto &PD : WC ->Writer.getProfileData()) {
1269	// Populate IPBuilder.
1270	for (const auto &PDV : PD.getValue()) {
1271	InstrProfRecord Record = PDV.second;
1272	IPBuilder.addRecord(Record);
1273	}
1274
1275	// If a function has multiple entries in instr profile, skip it.
1276	if (PD.getValue().size() != `1`)
1277	continue;
1278
1279	// Initialize InstrProfileMap.
1280	InstrProfRecord *R = &PD.getValue().begin()->second;
1281	StringRef FullName = PD.getKey();
1282	InstrProfileMap [FullName] = InstrProfileEntry (R);
1283	buildStaticFuncMap (FullName);
1284	}
1285
1286	for (auto &PD : Reader ->getProfiles()) {
1287	sampleprof::FunctionSamples &FS = PD.second;
1288	std::string Name = FS.getFunction().str();
1289	BuildMaxSampleMap (FS, Name);
1290	}
1291
1292	ProfileSummary InstrPS = *IPBuilder.getSummary();
1293	ProfileSummary SamplePS = Reader ->getSummary();
1294
1295	// Compute cold thresholds for instr profile and sample profile.
1296	uint64_t HotSampleThreshold =
1297	ProfileSummaryBuilder::getEntryForPercentile(
1298	DS: SamplePS.getDetailedSummary(),
1299	Percentile: ProfileSummaryBuilder::DefaultCutoffs [HotPercentileIdx])
1300	.MinCount;
1301	uint64_t ColdSampleThreshold =
1302	ProfileSummaryBuilder::getEntryForPercentile(
1303	DS: SamplePS.getDetailedSummary(),
1304	Percentile: ProfileSummaryBuilder::DefaultCutoffs [ColdPercentileIdx])
1305	.MinCount;
1306	uint64_t HotInstrThreshold =
1307	ProfileSummaryBuilder::getEntryForPercentile(
1308	DS: InstrPS.getDetailedSummary(),
1309	Percentile: ProfileSummaryBuilder::DefaultCutoffs [HotPercentileIdx])
1310	.MinCount;
1311	uint64_t ColdInstrThreshold =
1312	InstrProfColdThreshold
1313	? InstrProfColdThreshold
1314	: ProfileSummaryBuilder::getEntryForPercentile(
1315	DS: InstrPS.getDetailedSummary(),
1316	Percentile: ProfileSummaryBuilder::DefaultCutoffs [ColdPercentileIdx])
1317	.MinCount;
1318
1319	// Find hot/warm functions in sample profile which is cold in instr profile
1320	// and adjust the profiles of those functions in the instr profile.
1321	for (const auto &E : FlattenSampleMap) {
1322	uint64_t SampleMaxCount = std::max(a: E.second.first, b: E.second.second);
1323	if (SampleMaxCount < ColdSampleThreshold)
1324	continue;
1325	StringRef Name = E.first();
1326	auto It = InstrProfileMap.find(Key: Name);
1327	if (It == InstrProfileMap.end()) {
1328	auto NewName = StaticFuncMap.find(Key: Name);
1329	if (NewName != StaticFuncMap.end()) {
1330	It = InstrProfileMap.find(Key: NewName ->second.str());
1331	if (NewName ->second == DuplicateNameStr) {
1332	WithColor::warning()
1333	<< "Static function " << Name
1334	<< " has multiple promoted names, cannot adjust profile.\n";
1335	}
1336	}
1337	}
1338	if (It == InstrProfileMap.end() \|\|
1339	It ->second.MaxCount > ColdInstrThreshold \|\|
1340	It ->second.NumEdgeCounters < SupplMinSizeThreshold)
1341	continue;
1342	bool SetToHot = SampleMaxCount >= HotSampleThreshold;
1343	updateInstrProfileEntry(IFE&: It ->second, SetToHot, HotInstrThreshold,
1344	ColdInstrThreshold, ZeroCounterThreshold);
1345	}
1346	}
1347
1348	/// The main function to supplement instr profile with sample profile.
1349	/// \Inputs contains the instr profile. \p SampleFilename specifies the
1350	/// sample profile. \p OutputFilename specifies the output profile name.
1351	/// \p OutputFormat specifies the output profile format. \p OutputSparse
1352	/// specifies whether to generate sparse profile. \p SupplMinSizeThreshold
1353	/// specifies the minimal size for the functions whose profile will be
1354	/// adjusted. \p ZeroCounterThreshold is the threshold to check whether
1355	/// a function contains too many zero counters and whether its profile
1356	/// should be dropped. \p InstrProfColdThreshold is the user specified
1357	/// cold threshold which will override the cold threshold got from the
1358	/// instr profile summary.
1359	static void supplementInstrProfile(const WeightedFileVector &Inputs,
1360	StringRef SampleFilename, bool OutputSparse,
1361	unsigned SupplMinSizeThreshold,
1362	float ZeroCounterThreshold,
1363	unsigned InstrProfColdThreshold) {
1364	if (OutputFilename == "-")
1365	exitWithError(Message: "cannot write indexed profdata format to stdout");
1366	if (Inputs.size() != `1`)
1367	exitWithError(Message: "expect one input to be an instr profile");
1368	if (Inputs [`0`].Weight != `1`)
1369	exitWithError(Message: "expect instr profile doesn't have weight");
1370
1371	StringRef InstrFilename = Inputs [`0`].Filename;
1372
1373	// Read sample profile.
1374	LLVMContext Context;
1375	auto FS = vfs::getRealFileSystem();
1376	auto ReaderOrErr = sampleprof::SampleProfileReader::create(
1377	Filename: SampleFilename.str(), C&: Context, FS&: *FS, P: FSDiscriminatorPassOption);
1378	if (std::error_code EC = ReaderOrErr.getError())
1379	exitWithErrorCode(EC, Whence: SampleFilename);
1380	auto Reader = std::move(ReaderOrErr.get());
1381	if (std::error_code EC = Reader ->read())
1382	exitWithErrorCode(EC, Whence: SampleFilename);
1383
1384	// Read instr profile.
1385	std::mutex ErrorLock;
1386	SmallSet<instrprof_error, `4`> WriterErrorCodes;
1387	auto WC = std::make_unique<WriterContext>(args&: OutputSparse, args&: ErrorLock,
1388	args&: WriterErrorCodes);
1389	loadInput(Input: Inputs [`0`], Remapper: nullptr, Correlator: nullptr, /ProfiledBinary=/"", WC: WC.get());
1390	if (WC ->Errors.size() > `0`)
1391	exitWithError(E: std::move(WC ->Errors [`0`].first), Whence: InstrFilename);
1392
1393	adjustInstrProfile(WC, Reader, SupplMinSizeThreshold, ZeroCounterThreshold,
1394	InstrProfColdThreshold);
1395	writeInstrProfile(OutputFilename, OutputFormat, Writer&: WC ->Writer);
1396	}
1397
1398	/// Make a copy of the given function samples with all symbol names remapped
1399	/// by the provided symbol remapper.
1400	static sampleprof::FunctionSamples
1401	remapSamples(const sampleprof::FunctionSamples &Samples,
1402	SymbolRemapper &Remapper, sampleprof_error &Error) {
1403	sampleprof::FunctionSamples Result;
1404	Result.setFunction(Remapper (Samples.getFunction()));
1405	Result.addTotalSamples(Num: Samples.getTotalSamples());
1406	Result.addHeadSamples(Num: Samples.getHeadSamples());
1407	for (const auto &BodySample : Samples.getBodySamples()) {
1408	uint32_t MaskedDiscriminator =
1409	BodySample.first.Discriminator & getDiscriminatorMask();
1410	Result.addBodySamples(LineOffset: BodySample.first.LineOffset, Discriminator: MaskedDiscriminator,
1411	Num: BodySample.second.getSamples());
1412	for (const auto &Target : BodySample.second.getCallTargets()) {
1413	Result.addCalledTargetSamples(LineOffset: BodySample.first.LineOffset,
1414	Discriminator: MaskedDiscriminator,
1415	Func: Remapper (Target.first), Num: Target.second);
1416	}
1417	}
1418	for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
1419	sampleprof::FunctionSamplesMap &Target =
1420	Result.functionSamplesAt(Loc: CallsiteSamples.first);
1421	for (const auto &Callsite : CallsiteSamples.second) {
1422	sampleprof::FunctionSamples Remapped =
1423	remapSamples(Samples: Callsite.second, Remapper, Error);
1424	mergeSampleProfErrors(Accumulator&: Error,
1425	Result: Target [Remapped.getFunction()].merge(Other: Remapped));
1426	}
1427	}
1428	return Result;
1429	}
1430
1431	static sampleprof::SampleProfileFormat FormatMap[] = {
1432	sampleprof::SPF_None,
1433	sampleprof::SPF_Text,
1434	sampleprof::SPF_None,
1435	sampleprof::SPF_Ext_Binary,
1436	sampleprof::SPF_GCC,
1437	sampleprof::SPF_Binary};
1438
1439	static std::unique_ptr<MemoryBuffer>
1440	getInputFileBuf(const StringRef &InputFile) {
1441	if (InputFile == "")
1442	return {};
1443
1444	auto BufOrError = MemoryBuffer::getFileOrSTDIN(Filename: InputFile);
1445	if (!BufOrError)
1446	exitWithErrorCode(EC: BufOrError.getError(), Whence: InputFile);
1447
1448	return std::move(*BufOrError);
1449	}
1450
1451	static void populateProfileSymbolList(MemoryBuffer *Buffer,
1452	sampleprof::ProfileSymbolList &PSL) {
1453	if (!Buffer)
1454	return;
1455
1456	SmallVector<StringRef, `32`> SymbolVec;
1457	StringRef Data = Buffer->getBuffer();
1458	Data.split(A&: SymbolVec, Separator: `'\n'`, /MaxSplit=/-`1`, /KeepEmpty=/false);
1459
1460	for (StringRef SymbolStr : SymbolVec)
1461	PSL.add(Name: SymbolStr.trim());
1462	}
1463
1464	static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer,
1465	ProfileFormat OutputFormat,
1466	MemoryBuffer *Buffer,
1467	sampleprof::ProfileSymbolList &WriterList,
1468	bool CompressAllSections, bool UseMD5,
1469	bool GenPartialProfile) {
1470	populateProfileSymbolList(Buffer, PSL&: WriterList);
1471	if (WriterList.size() > `0` && OutputFormat != PF_Ext_Binary)
1472	warn(Message: "Profile Symbol list is not empty but the output format is not "
1473	"ExtBinary format. The list will be lost in the output. ");
1474
1475	Writer.setProfileSymbolList(&WriterList);
1476
1477	if (CompressAllSections) {
1478	if (OutputFormat != PF_Ext_Binary)
1479	warn(Message: "-compress-all-section is ignored. Specify -extbinary to enable it");
1480	else
1481	Writer.setToCompressAllSections();
1482	}
1483	if (UseMD5) {
1484	if (OutputFormat != PF_Ext_Binary)
1485	warn(Message: "-use-md5 is ignored. Specify -extbinary to enable it");
1486	else
1487	Writer.setUseMD5();
1488	}
1489	if (GenPartialProfile) {
1490	if (OutputFormat != PF_Ext_Binary)
1491	warn(Message: "-gen-partial-profile is ignored. Specify -extbinary to enable it");
1492	else
1493	Writer.setPartialProfile();
1494	}
1495	}
1496
1497	static void mergeSampleProfile(const WeightedFileVector &Inputs,
1498	SymbolRemapper *Remapper,
1499	StringRef ProfileSymbolListFile,
1500	size_t OutputSizeLimit) {
1501	using namespace sampleprof;
1502	SampleProfileMap ProfileMap;
1503	SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, `5`> Readers;
1504	LLVMContext Context;
1505	sampleprof::ProfileSymbolList WriterList;
1506	std::optional<bool> ProfileIsProbeBased;
1507	std::optional<bool> ProfileIsCS;
1508	for (const auto &Input : Inputs) {
1509	auto FS = vfs::getRealFileSystem();
1510	auto ReaderOrErr = SampleProfileReader::create(Filename: Input.Filename, C&: Context, FS&: *FS,
1511	P: FSDiscriminatorPassOption);
1512	if (std::error_code EC = ReaderOrErr.getError()) {
1513	warnOrExitGivenError(FailMode, EC, Whence: Input.Filename);
1514	continue;
1515	}
1516
1517	// We need to keep the readers around until after all the files are
1518	// read so that we do not lose the function names stored in each
1519	// reader's memory. The function names are needed to write out the
1520	// merged profile map.
1521	Readers.push_back(Elt: std::move(ReaderOrErr.get()));
1522	const auto Reader = Readers.back().get();
1523	if (std::error_code EC = Reader->read()) {
1524	warnOrExitGivenError(FailMode, EC, Whence: Input.Filename);
1525	Readers.pop_back();
1526	continue;
1527	}
1528
1529	SampleProfileMap &Profiles = Reader->getProfiles();
1530	if (ProfileIsProbeBased &&
1531	ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased)
1532	exitWithError(
1533	Message: "cannot merge probe-based profile with non-probe-based profile");
1534	ProfileIsProbeBased = FunctionSamples::ProfileIsProbeBased;
1535	if (ProfileIsCS && ProfileIsCS != FunctionSamples::ProfileIsCS)
1536	exitWithError(Message: "cannot merge CS profile with non-CS profile");
1537	ProfileIsCS = FunctionSamples::ProfileIsCS;
1538	for (SampleProfileMap::iterator I = Profiles.begin(), E = Profiles.end();
1539	I != E; ++I) {
1540	sampleprof_error Result = sampleprof_error::success;
1541	FunctionSamples Remapped =
1542	Remapper ? remapSamples(Samples: I ->second, Remapper&: *Remapper, Error&: Result)
1543	: FunctionSamples ();
1544	FunctionSamples &Samples = Remapper ? Remapped : I ->second;
1545	SampleContext FContext = Samples.getContext();
1546	mergeSampleProfErrors(Accumulator&: Result,
1547	Result: ProfileMap [FContext].merge(Other: Samples, Weight: Input.Weight));
1548	if (Result != sampleprof_error::success) {
1549	std::error_code EC = make_error_code(E: Result);
1550	handleMergeWriterError(E: errorCodeToError(EC), WhenceFile: Input.Filename,
1551	WhenceFunction: FContext.toString());
1552	}
1553	}
1554
1555	if (!DropProfileSymbolList) {
1556	std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
1557	Reader->getProfileSymbolList();
1558	if (ReaderList)
1559	WriterList.merge(List: *ReaderList);
1560	}
1561	}
1562
1563	if (ProfileIsCS && (SampleMergeColdContext \|\| SampleTrimColdContext)) {
1564	// Use threshold calculated from profile summary unless specified.
1565	SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1566	auto Summary = Builder.computeSummaryForProfiles(Profiles: ProfileMap);
1567	uint64_t SampleProfColdThreshold =
1568	ProfileSummaryBuilder::getColdCountThreshold(
1569	DS: (Summary ->getDetailedSummary()));
1570
1571	// Trim and merge cold context profile using cold threshold above;
1572	SampleContextTrimmer (ProfileMap)
1573	.trimAndMergeColdContextProfiles(
1574	ColdCountThreshold: SampleProfColdThreshold, TrimColdContext: SampleTrimColdContext,
1575	MergeColdContext: SampleMergeColdContext, ColdContextFrameLength: SampleColdContextFrameDepth, TrimBaseProfileOnly: false);
1576	}
1577
1578	if (ProfileLayout == llvm::sampleprof::SPL_Flat) {
1579	ProfileConverter::flattenProfile(ProfileMap, ProfileIsCS: FunctionSamples::ProfileIsCS);
1580	ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1581	} else if (ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Nest) {
1582	ProfileConverter CSConverter(ProfileMap);
1583	CSConverter.convertCSProfiles();
1584	ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1585	}
1586
1587	filterFunctions(ProfileMap);
1588
1589	auto WriterOrErr =
1590	SampleProfileWriter::create(Filename: OutputFilename, Format: FormatMap[OutputFormat]);
1591	if (std::error_code EC = WriterOrErr.getError())
1592	exitWithErrorCode(EC, Whence: OutputFilename);
1593
1594	auto Writer = std::move(WriterOrErr.get());
1595	// WriterList will have StringRef refering to string in Buffer.
1596	// Make sure Buffer lives as long as WriterList.
1597	auto Buffer = getInputFileBuf(InputFile: ProfileSymbolListFile);
1598	handleExtBinaryWriter(Writer&: *Writer, OutputFormat, Buffer: Buffer.get(), WriterList,
1599	CompressAllSections, UseMD5, GenPartialProfile);
1600
1601	// If OutputSizeLimit is 0 (default), it is the same as write().
1602	if (std::error_code EC =
1603	Writer ->writeWithSizeLimit(ProfileMap, OutputSizeLimit))
1604	exitWithErrorCode(EC);
1605	}
1606
1607	static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
1608	StringRef WeightStr, FileName;
1609	std::tie(args&: WeightStr, args&: FileName) = WeightedFilename.split(Separator: `','`);
1610
1611	uint64_t Weight;
1612	if (WeightStr.getAsInteger(Radix: `10`, Result&: Weight) \|\| Weight < `1`)
1613	exitWithError(Message: "input weight must be a positive integer");
1614
1615	return {.Filename: std::string (FileName), .Weight: Weight};
1616	}
1617
1618	static void addWeightedInput(WeightedFileVector &WNI, const WeightedFile &WF) {
1619	StringRef Filename = WF.Filename;
1620	uint64_t Weight = WF.Weight;
1621
1622	// If it's STDIN just pass it on.
1623	if (Filename == "-") {
1624	WNI.push_back(Elt: {.Filename: std::string (Filename), .Weight: Weight});
1625	return;
1626	}
1627
1628	llvm::sys::fs::file_status Status;
1629	llvm::sys::fs::status(path: Filename, result&: Status);
1630	if (!llvm::sys::fs::exists(status: Status))
1631	exitWithErrorCode(EC: make_error_code(E: errc::no_such_file_or_directory),
1632	Whence: Filename);
1633	// If it's a source file, collect it.
1634	if (llvm::sys::fs::is_regular_file(status: Status)) {
1635	WNI.push_back(Elt: {.Filename: std::string (Filename), .Weight: Weight});
1636	return;
1637	}
1638
1639	if (llvm::sys::fs::is_directory(status: Status)) {
1640	std::error_code EC;
1641	for (llvm::sys::fs::recursive_directory_iterator F(Filename, EC), E;
1642	F != E && !EC; F.increment(ec&: EC)) {
1643	if (llvm::sys::fs::is_regular_file(Path: F ->path())) {
1644	addWeightedInput(WNI, WF: {.Filename: F ->path(), .Weight: Weight});
1645	}
1646	}
1647	if (EC)
1648	exitWithErrorCode(EC, Whence: Filename);
1649	}
1650	}
1651
1652	static void parseInputFilenamesFile(MemoryBuffer *Buffer,
1653	WeightedFileVector &WFV) {
1654	if (!Buffer)
1655	return;
1656
1657	SmallVector<StringRef, `8`> Entries;
1658	StringRef Data = Buffer->getBuffer();
1659	Data.split(A&: Entries, Separator: `'\n'`, /MaxSplit=/-`1`, /KeepEmpty=/false);
1660	for (const StringRef &FileWeightEntry : Entries) {
1661	StringRef SanitizedEntry = FileWeightEntry.trim(Chars: " \t\v\f\r");
1662	// Skip comments.
1663	if (SanitizedEntry.starts_with(Prefix: "#"))
1664	continue;
1665	// If there's no comma, it's an unweighted profile.
1666	else if (!SanitizedEntry.contains(C: `','`))
1667	addWeightedInput(WNI&: WFV, WF: {.Filename: std::string (SanitizedEntry), .Weight: `1`});
1668	else
1669	addWeightedInput(WNI&: WFV, WF: parseWeightedFile(WeightedFilename: SanitizedEntry));
1670	}
1671	}
1672
1673	static int merge_main(StringRef ProgName) {
1674	WeightedFileVector WeightedInputs;
1675	for (StringRef Filename : InputFilenames)
1676	addWeightedInput(WNI&: WeightedInputs, WF: {.Filename: std::string (Filename), .Weight: `1`});
1677	for (StringRef WeightedFilename : WeightedInputFilenames)
1678	addWeightedInput(WNI&: WeightedInputs, WF: parseWeightedFile(WeightedFilename));
1679
1680	// Make sure that the file buffer stays alive for the duration of the
1681	// weighted input vector's lifetime.
1682	auto Buffer = getInputFileBuf(InputFile: InputFilenamesFile);
1683	parseInputFilenamesFile(Buffer: Buffer.get(), WFV&: WeightedInputs);
1684
1685	if (WeightedInputs.empty())
1686	exitWithError(Message: "no input files specified. See " + ProgName + " merge -help");
1687
1688	if (DumpInputFileList) {
1689	for (auto &WF : WeightedInputs)
1690	outs() << WF.Weight << "," << WF.Filename << "\n";
1691	return `0`;
1692	}
1693
1694	std::unique_ptr<SymbolRemapper> Remapper;
1695	if (!RemappingFile.empty())
1696	Remapper = SymbolRemapper::create(InputFile: RemappingFile);
1697
1698	if (!SupplInstrWithSample.empty()) {
1699	if (ProfileKind != instr)
1700	exitWithError(
1701	Message: "-supplement-instr-with-sample can only work with -instr. ");
1702
1703	supplementInstrProfile(Inputs: WeightedInputs, SampleFilename: SupplInstrWithSample, OutputSparse,
1704	SupplMinSizeThreshold, ZeroCounterThreshold,
1705	InstrProfColdThreshold);
1706	return `0`;
1707	}
1708
1709	if (ProfileKind == instr)
1710	mergeInstrProfile(Inputs: WeightedInputs, Remapper: Remapper.get(), MaxDbgCorrelationWarnings,
1711	ProfiledBinary);
1712	else
1713	mergeSampleProfile(Inputs: WeightedInputs, Remapper: Remapper.get(), ProfileSymbolListFile,
1714	OutputSizeLimit);
1715	return `0`;
1716	}
1717
1718	/// Computer the overlap b/w profile BaseFilename and profile TestFilename.
1719	static void overlapInstrProfile(const std::string &BaseFilename,
1720	const std::string &TestFilename,
1721	const OverlapFuncFilters &FuncFilter,
1722	raw_fd_ostream &OS, bool IsCS) {
1723	std::mutex ErrorLock;
1724	SmallSet<instrprof_error, `4`> WriterErrorCodes;
1725	WriterContext Context(false, ErrorLock, WriterErrorCodes);
1726	WeightedFile WeightedInput{.Filename: BaseFilename, .Weight: `1`};
1727	OverlapStats Overlap;
1728	Error E = Overlap.accumulateCounts(BaseFilename, TestFilename, IsCS);
1729	if (E)
1730	exitWithError(E: std::move(E), Whence: "error in getting profile count sums");
1731	if (Overlap.Base.CountSum < `1.0f`) {
1732	OS << "Sum of edge counts for profile " << BaseFilename << " is 0.\n";
1733	exit(status: `0`);
1734	}
1735	if (Overlap.Test.CountSum < `1.0f`) {
1736	OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n";
1737	exit(status: `0`);
1738	}
1739	loadInput(Input: WeightedInput, Remapper: nullptr, Correlator: nullptr, /ProfiledBinary=/"", WC: &Context);
1740	overlapInput(BaseFilename, TestFilename, WC: &Context, Overlap, FuncFilter, OS,
1741	IsCS);
1742	Overlap.dump(OS);
1743	}
1744
1745	namespace {
1746	struct SampleOverlapStats {
1747	SampleContext BaseName;
1748	SampleContext TestName;
1749	// Number of overlap units
1750	uint64_t OverlapCount = `0`;
1751	// Total samples of overlap units
1752	uint64_t OverlapSample = `0`;
1753	// Number of and total samples of units that only present in base or test
1754	// profile
1755	uint64_t BaseUniqueCount = `0`;
1756	uint64_t BaseUniqueSample = `0`;
1757	uint64_t TestUniqueCount = `0`;
1758	uint64_t TestUniqueSample = `0`;
1759	// Number of units and total samples in base or test profile
1760	uint64_t BaseCount = `0`;
1761	uint64_t BaseSample = `0`;
1762	uint64_t TestCount = `0`;
1763	uint64_t TestSample = `0`;
1764	// Number of and total samples of units that present in at least one profile
1765	uint64_t UnionCount = `0`;
1766	uint64_t UnionSample = `0`;
1767	// Weighted similarity
1768	double Similarity = `0.0`;
1769	// For SampleOverlapStats instances representing functions, weights of the
1770	// function in base and test profiles
1771	double BaseWeight = `0.0`;
1772	double TestWeight = `0.0`;
1773
1774	SampleOverlapStats() = default;
1775	};
1776	} // end anonymous namespace
1777
1778	namespace {
1779	struct FuncSampleStats {
1780	uint64_t SampleSum = `0`;
1781	uint64_t MaxSample = `0`;
1782	uint64_t HotBlockCount = `0`;
1783	FuncSampleStats() = default;
1784	FuncSampleStats(uint64_t SampleSum, uint64_t MaxSample,
1785	uint64_t HotBlockCount)
1786	: SampleSum(SampleSum), MaxSample(MaxSample),
1787	HotBlockCount(HotBlockCount) {}
1788	};
1789	} // end anonymous namespace
1790
1791	namespace {
1792	enum MatchStatus { MS_Match, MS_FirstUnique, MS_SecondUnique, MS_None };
1793
1794	// Class for updating merging steps for two sorted maps. The class should be
1795	// instantiated with a map iterator type.
1796	template <class T> class MatchStep {
1797	public:
1798	MatchStep() = delete;
1799
1800	MatchStep(T FirstIter, T FirstEnd, T SecondIter, T SecondEnd)
1801	: FirstIter(FirstIter), FirstEnd(FirstEnd), SecondIter(SecondIter),
1802	SecondEnd(SecondEnd), Status(MS_None) {}
1803
1804	bool areBothFinished() const {
1805	return (FirstIter == FirstEnd && SecondIter == SecondEnd);
1806	}
1807
1808	bool isFirstFinished() const { return FirstIter == FirstEnd; }
1809
1810	bool isSecondFinished() const { return SecondIter == SecondEnd; }
1811
1812	/// Advance one step based on the previous match status unless the previous
1813	/// status is MS_None. Then update Status based on the comparison between two
1814	/// container iterators at the current step. If the previous status is
1815	/// MS_None, it means two iterators are at the beginning and no comparison has
1816	/// been made, so we simply update Status without advancing the iterators.
1817	void updateOneStep();
1818
1819	T getFirstIter() const { return FirstIter; }
1820
1821	T getSecondIter() const { return SecondIter; }
1822
1823	MatchStatus getMatchStatus() const { return Status; }
1824
1825	private:
1826	// Current iterator and end iterator of the first container.
1827	T FirstIter;
1828	T FirstEnd;
1829	// Current iterator and end iterator of the second container.
1830	T SecondIter;
1831	T SecondEnd;
1832	// Match status of the current step.
1833	MatchStatus Status;
1834	};
1835	} // end anonymous namespace
1836
1837	template <class T> void MatchStep<T>::updateOneStep() {
1838	switch (Status) {
1839	case MS_Match:
1840	++FirstIter;
1841	++SecondIter;
1842	break;
1843	case MS_FirstUnique:
1844	++FirstIter;
1845	break;
1846	case MS_SecondUnique:
1847	++SecondIter;
1848	break;
1849	case MS_None:
1850	break;
1851	}
1852
1853	// Update Status according to iterators at the current step.
1854	if (areBothFinished())
1855	return;
1856	if (FirstIter != FirstEnd &&
1857	(SecondIter == SecondEnd \|\| FirstIter->first < SecondIter->first))
1858	Status = MS_FirstUnique;
1859	else if (SecondIter != SecondEnd &&
1860	(FirstIter == FirstEnd \|\| SecondIter->first < FirstIter->first))
1861	Status = MS_SecondUnique;
1862	else
1863	Status = MS_Match;
1864	}
1865
1866	// Return the sum of line/block samples, the max line/block sample, and the
1867	// number of line/block samples above the given threshold in a function
1868	// including its inlinees.
1869	static void getFuncSampleStats(const sampleprof::FunctionSamples &Func,
1870	FuncSampleStats &FuncStats,
1871	uint64_t HotThreshold) {
1872	for (const auto &L : Func.getBodySamples()) {
1873	uint64_t Sample = L.second.getSamples();
1874	FuncStats.SampleSum += Sample;
1875	FuncStats.MaxSample = std::max(a: FuncStats.MaxSample, b: Sample);
1876	if (Sample >= HotThreshold)
1877	++FuncStats.HotBlockCount;
1878	}
1879
1880	for (const auto &C : Func.getCallsiteSamples()) {
1881	for (const auto &F : C.second)
1882	getFuncSampleStats(Func: F.second, FuncStats, HotThreshold);
1883	}
1884	}
1885
1886	/// Predicate that determines if a function is hot with a given threshold. We
1887	/// keep it separate from its callsites for possible extension in the future.
1888	static bool isFunctionHot(const FuncSampleStats &FuncStats,
1889	uint64_t HotThreshold) {
1890	// We intentionally compare the maximum sample count in a function with the
1891	// HotThreshold to get an approximate determination on hot functions.
1892	return (FuncStats.MaxSample >= HotThreshold);
1893	}
1894
1895	namespace {
1896	class SampleOverlapAggregator {
1897	public:
1898	SampleOverlapAggregator(const std::string &BaseFilename,
1899	const std::string &TestFilename,
1900	double LowSimilarityThreshold, double Epsilon,
1901	const OverlapFuncFilters &FuncFilter)
1902	: BaseFilename (BaseFilename), TestFilename (TestFilename),
1903	LowSimilarityThreshold(LowSimilarityThreshold), Epsilon(Epsilon),
1904	FuncFilter (FuncFilter) {}
1905
1906	/// Detect 0-sample input profile and report to output stream. This interface
1907	/// should be called after loadProfiles().
1908	bool detectZeroSampleProfile(raw_fd_ostream &OS) const;
1909
1910	/// Write out function-level similarity statistics for functions specified by
1911	/// options --function, --value-cutoff, and --similarity-cutoff.
1912	void dumpFuncSimilarity(raw_fd_ostream &OS) const;
1913
1914	/// Write out program-level similarity and overlap statistics.
1915	void dumpProgramSummary(raw_fd_ostream &OS) const;
1916
1917	/// Write out hot-function and hot-block statistics for base_profile,
1918	/// test_profile, and their overlap. For both cases, the overlap HO is
1919	/// calculated as follows:
1920	/// Given the number of functions (or blocks) that are hot in both profiles
1921	/// HCommon and the number of functions (or blocks) that are hot in at
1922	/// least one profile HUnion, HO = HCommon / HUnion.
1923	void dumpHotFuncAndBlockOverlap(raw_fd_ostream &OS) const;
1924
1925	/// This function tries matching functions in base and test profiles. For each
1926	/// pair of matched functions, it aggregates the function-level
1927	/// similarity into a profile-level similarity. It also dump function-level
1928	/// similarity information of functions specified by --function,
1929	/// --value-cutoff, and --similarity-cutoff options. The program-level
1930	/// similarity PS is computed as follows:
1931	/// Given function-level similarity FS(A) for all function A, the
1932	/// weight of function A in base profile WB(A), and the weight of function
1933	/// A in test profile WT(A), compute PS(base_profile, test_profile) =
1934	/// sum_A(FS(A) avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0*
1935	/// meaning no-overlap.
1936	void computeSampleProfileOverlap(raw_fd_ostream &OS);
1937
1938	/// Initialize ProfOverlap with the sum of samples in base and test
1939	/// profiles. This function also computes and keeps the sum of samples and
1940	/// max sample counts of each function in BaseStats and TestStats for later
1941	/// use to avoid re-computations.
1942	void initializeSampleProfileOverlap();
1943
1944	/// Load profiles specified by BaseFilename and TestFilename.
1945	std::error_code loadProfiles();
1946
1947	using FuncSampleStatsMap =
1948	std::unordered_map<SampleContext, FuncSampleStats, SampleContext::Hash>;
1949
1950	private:
1951	SampleOverlapStats ProfOverlap;
1952	SampleOverlapStats HotFuncOverlap;
1953	SampleOverlapStats HotBlockOverlap;
1954	std::string BaseFilename;
1955	std::string TestFilename;
1956	std::unique_ptr<sampleprof::SampleProfileReader> BaseReader;
1957	std::unique_ptr<sampleprof::SampleProfileReader> TestReader;
1958	// BaseStats and TestStats hold FuncSampleStats for each function, with
1959	// function name as the key.
1960	FuncSampleStatsMap BaseStats;
1961	FuncSampleStatsMap TestStats;
1962	// Low similarity threshold in floating point number
1963	double LowSimilarityThreshold;
1964	// Block samples above BaseHotThreshold or TestHotThreshold are considered hot
1965	// for tracking hot blocks.
1966	uint64_t BaseHotThreshold;
1967	uint64_t TestHotThreshold;
1968	// A small threshold used to round the results of floating point accumulations
1969	// to resolve imprecision.
1970	const double Epsilon;
1971	std::multimap<double, SampleOverlapStats, std::greater<double>>
1972	FuncSimilarityDump;
1973	// FuncFilter carries specifications in options --value-cutoff and
1974	// --function.
1975	OverlapFuncFilters FuncFilter;
1976	// Column offsets for printing the function-level details table.
1977	static const unsigned int TestWeightCol = `15`;
1978	static const unsigned int SimilarityCol = `30`;
1979	static const unsigned int OverlapCol = `43`;
1980	static const unsigned int BaseUniqueCol = `53`;
1981	static const unsigned int TestUniqueCol = `67`;
1982	static const unsigned int BaseSampleCol = `81`;
1983	static const unsigned int TestSampleCol = `96`;
1984	static const unsigned int FuncNameCol = `111`;
1985
1986	/// Return a similarity of two line/block sample counters in the same
1987	/// function in base and test profiles. The line/block-similarity BS(i) is
1988	/// computed as follows:
1989	/// For an offsets i, given the sample count at i in base profile BB(i),
1990	/// the sample count at i in test profile BT(i), the sum of sample counts
1991	/// in this function in base profile SB, and the sum of sample counts in
1992	/// this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB -
1993	/// BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap.
1994	double computeBlockSimilarity(uint64_t BaseSample, uint64_t TestSample,
1995	const SampleOverlapStats &FuncOverlap) const;
1996
1997	void updateHotBlockOverlap(uint64_t BaseSample, uint64_t TestSample,
1998	uint64_t HotBlockCount);
1999
2000	void getHotFunctions(const FuncSampleStatsMap &ProfStats,
2001	FuncSampleStatsMap &HotFunc,
2002	uint64_t HotThreshold) const;
2003
2004	void computeHotFuncOverlap();
2005
2006	/// This function updates statistics in FuncOverlap, HotBlockOverlap, and
2007	/// Difference for two sample units in a matched function according to the
2008	/// given match status.
2009	void updateOverlapStatsForFunction(uint64_t BaseSample, uint64_t TestSample,
2010	uint64_t HotBlockCount,
2011	SampleOverlapStats &FuncOverlap,
2012	double &Difference, MatchStatus Status);
2013
2014	/// This function updates statistics in FuncOverlap, HotBlockOverlap, and
2015	/// Difference for unmatched callees that only present in one profile in a
2016	/// matched caller function.
2017	void updateForUnmatchedCallee(const sampleprof::FunctionSamples &Func,
2018	SampleOverlapStats &FuncOverlap,
2019	double &Difference, MatchStatus Status);
2020
2021	/// This function updates sample overlap statistics of an overlap function in
2022	/// base and test profile. It also calculates a function-internal similarity
2023	/// FIS as follows:
2024	/// For offsets i that have samples in at least one profile in this
2025	/// function A, given BS(i) returned by computeBlockSimilarity(), compute
2026	/// FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with
2027	/// 0.0 meaning no overlap.
2028	double computeSampleFunctionInternalOverlap(
2029	const sampleprof::FunctionSamples &BaseFunc,
2030	const sampleprof::FunctionSamples &TestFunc,
2031	SampleOverlapStats &FuncOverlap);
2032
2033	/// Function-level similarity (FS) is a weighted value over function internal
2034	/// similarity (FIS). This function computes a function's FS from its FIS by
2035	/// applying the weight.
2036	double weightForFuncSimilarity(double FuncSimilarity, uint64_t BaseFuncSample,
2037	uint64_t TestFuncSample) const;
2038
2039	/// The function-level similarity FS(A) for a function A is computed as
2040	/// follows:
2041	/// Compute a function-internal similarity FIS(A) by
2042	/// computeSampleFunctionInternalOverlap(). Then, with the weight of
2043	/// function A in base profile WB(A), and the weight of function A in test
2044	/// profile WT(A), compute FS(A) = FIS(A) (1.0 - fabs(WB(A) - WT(A)))*
2045	/// ranging in [0.0f to 1.0f] with 0.0 meaning no overlap.
2046	double
2047	computeSampleFunctionOverlap(const sampleprof::FunctionSamples *BaseFunc,
2048	const sampleprof::FunctionSamples *TestFunc,
2049	SampleOverlapStats *FuncOverlap,
2050	uint64_t BaseFuncSample,
2051	uint64_t TestFuncSample);
2052
2053	/// Profile-level similarity (PS) is a weighted aggregate over function-level
2054	/// similarities (FS). This method weights the FS value by the function
2055	/// weights in the base and test profiles for the aggregation.
2056	double weightByImportance(double FuncSimilarity, uint64_t BaseFuncSample,
2057	uint64_t TestFuncSample) const;
2058	};
2059	} // end anonymous namespace
2060
2061	bool SampleOverlapAggregator::detectZeroSampleProfile(
2062	raw_fd_ostream &OS) const {
2063	bool HaveZeroSample = false;
2064	if (ProfOverlap.BaseSample == `0`) {
2065	OS << "Sum of sample counts for profile " << BaseFilename << " is 0.\n";
2066	HaveZeroSample = true;
2067	}
2068	if (ProfOverlap.TestSample == `0`) {
2069	OS << "Sum of sample counts for profile " << TestFilename << " is 0.\n";
2070	HaveZeroSample = true;
2071	}
2072	return HaveZeroSample;
2073	}
2074
2075	double SampleOverlapAggregator::computeBlockSimilarity(
2076	uint64_t BaseSample, uint64_t TestSample,
2077	const SampleOverlapStats &FuncOverlap) const {
2078	double BaseFrac = `0.0`;
2079	double TestFrac = `0.0`;
2080	if (FuncOverlap.BaseSample > `0`)
2081	BaseFrac = static_cast<double>(BaseSample) / FuncOverlap.BaseSample;
2082	if (FuncOverlap.TestSample > `0`)
2083	TestFrac = static_cast<double>(TestSample) / FuncOverlap.TestSample;
2084	return `1.0` - std::fabs(x: BaseFrac - TestFrac);
2085	}
2086
2087	void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample,
2088	uint64_t TestSample,
2089	uint64_t HotBlockCount) {
2090	bool IsBaseHot = (BaseSample >= BaseHotThreshold);
2091	bool IsTestHot = (TestSample >= TestHotThreshold);
2092	if (!IsBaseHot && !IsTestHot)
2093	return;
2094
2095	HotBlockOverlap.UnionCount += HotBlockCount;
2096	if (IsBaseHot)
2097	HotBlockOverlap.BaseCount += HotBlockCount;
2098	if (IsTestHot)
2099	HotBlockOverlap.TestCount += HotBlockCount;
2100	if (IsBaseHot && IsTestHot)
2101	HotBlockOverlap.OverlapCount += HotBlockCount;
2102	}
2103
2104	void SampleOverlapAggregator::getHotFunctions(
2105	const FuncSampleStatsMap &ProfStats, FuncSampleStatsMap &HotFunc,
2106	uint64_t HotThreshold) const {
2107	for (const auto &F : ProfStats) {
2108	if (isFunctionHot(FuncStats: F.second, HotThreshold))
2109	HotFunc.emplace(args: F.first, args: F.second);
2110	}
2111	}
2112
2113	void SampleOverlapAggregator::computeHotFuncOverlap() {
2114	FuncSampleStatsMap BaseHotFunc;
2115	getHotFunctions(ProfStats: BaseStats, HotFunc&: BaseHotFunc, HotThreshold: BaseHotThreshold);
2116	HotFuncOverlap.BaseCount = BaseHotFunc.size();
2117
2118	FuncSampleStatsMap TestHotFunc;
2119	getHotFunctions(ProfStats: TestStats, HotFunc&: TestHotFunc, HotThreshold: TestHotThreshold);
2120	HotFuncOverlap.TestCount = TestHotFunc.size();
2121	HotFuncOverlap.UnionCount = HotFuncOverlap.TestCount;
2122
2123	for (const auto &F : BaseHotFunc) {
2124	if (TestHotFunc.count(x: F.first))
2125	++HotFuncOverlap.OverlapCount;
2126	else
2127	++HotFuncOverlap.UnionCount;
2128	}
2129	}
2130
2131	void SampleOverlapAggregator::updateOverlapStatsForFunction(
2132	uint64_t BaseSample, uint64_t TestSample, uint64_t HotBlockCount,
2133	SampleOverlapStats &FuncOverlap, double &Difference, MatchStatus Status) {
2134	assert(Status != MS_None &&
2135	"Match status should be updated before updating overlap statistics");
2136	if (Status == MS_FirstUnique) {
2137	TestSample = `0`;
2138	FuncOverlap.BaseUniqueSample += BaseSample;
2139	} else if (Status == MS_SecondUnique) {
2140	BaseSample = `0`;
2141	FuncOverlap.TestUniqueSample += TestSample;
2142	} else {
2143	++FuncOverlap.OverlapCount;
2144	}
2145
2146	FuncOverlap.UnionSample += std::max(a: BaseSample, b: TestSample);
2147	FuncOverlap.OverlapSample += std::min(a: BaseSample, b: TestSample);
2148	Difference +=
2149	`1.0` - computeBlockSimilarity(BaseSample, TestSample, FuncOverlap);
2150	updateHotBlockOverlap(BaseSample, TestSample, HotBlockCount);
2151	}
2152
2153	void SampleOverlapAggregator::updateForUnmatchedCallee(
2154	const sampleprof::FunctionSamples &Func, SampleOverlapStats &FuncOverlap,
2155	double &Difference, MatchStatus Status) {
2156	assert((Status == MS_FirstUnique \|\| Status == MS_SecondUnique) &&
2157	"Status must be either of the two unmatched cases");
2158	FuncSampleStats FuncStats;
2159	if (Status == MS_FirstUnique) {
2160	getFuncSampleStats(Func, FuncStats, HotThreshold: BaseHotThreshold);
2161	updateOverlapStatsForFunction(BaseSample: FuncStats.SampleSum, TestSample: `0`,
2162	HotBlockCount: FuncStats.HotBlockCount, FuncOverlap,
2163	Difference, Status);
2164	} else {
2165	getFuncSampleStats(Func, FuncStats, HotThreshold: TestHotThreshold);
2166	updateOverlapStatsForFunction(BaseSample: `0`, TestSample: FuncStats.SampleSum,
2167	HotBlockCount: FuncStats.HotBlockCount, FuncOverlap,
2168	Difference, Status);
2169	}
2170	}
2171
2172	double SampleOverlapAggregator::computeSampleFunctionInternalOverlap(
2173	const sampleprof::FunctionSamples &BaseFunc,
2174	const sampleprof::FunctionSamples &TestFunc,
2175	SampleOverlapStats &FuncOverlap) {
2176
2177	using namespace sampleprof;
2178
2179	double Difference = `0`;
2180
2181	// Accumulate Difference for regular line/block samples in the function.
2182	// We match them through sort-merge join algorithm because
2183	// FunctionSamples::getBodySamples() returns a map of sample counters ordered
2184	// by their offsets.
2185	MatchStep<BodySampleMap::const_iterator> BlockIterStep(
2186	BaseFunc.getBodySamples().cbegin(), BaseFunc.getBodySamples().cend(),
2187	TestFunc.getBodySamples().cbegin(), TestFunc.getBodySamples().cend());
2188	BlockIterStep.updateOneStep();
2189	while (!BlockIterStep.areBothFinished()) {
2190	uint64_t BaseSample =
2191	BlockIterStep.isFirstFinished()
2192	? `0`
2193	: BlockIterStep.getFirstIter()->second.getSamples();
2194	uint64_t TestSample =
2195	BlockIterStep.isSecondFinished()
2196	? `0`
2197	: BlockIterStep.getSecondIter()->second.getSamples();
2198	updateOverlapStatsForFunction(BaseSample, TestSample, HotBlockCount: `1`, FuncOverlap,
2199	Difference, Status: BlockIterStep.getMatchStatus());
2200
2201	BlockIterStep.updateOneStep();
2202	}
2203
2204	// Accumulate Difference for callsite lines in the function. We match
2205	// them through sort-merge algorithm because
2206	// FunctionSamples::getCallsiteSamples() returns a map of callsite records
2207	// ordered by their offsets.
2208	MatchStep<CallsiteSampleMap::const_iterator> CallsiteIterStep(
2209	BaseFunc.getCallsiteSamples().cbegin(),
2210	BaseFunc.getCallsiteSamples().cend(),
2211	TestFunc.getCallsiteSamples().cbegin(),
2212	TestFunc.getCallsiteSamples().cend());
2213	CallsiteIterStep.updateOneStep();
2214	while (!CallsiteIterStep.areBothFinished()) {
2215	MatchStatus CallsiteStepStatus = CallsiteIterStep.getMatchStatus();
2216	assert(CallsiteStepStatus != MS_None &&
2217	"Match status should be updated before entering loop body");
2218
2219	if (CallsiteStepStatus != MS_Match) {
2220	auto Callsite = (CallsiteStepStatus == MS_FirstUnique)
2221	? CallsiteIterStep.getFirstIter()
2222	: CallsiteIterStep.getSecondIter();
2223	for (const auto &F : Callsite ->second)
2224	updateForUnmatchedCallee(Func: F.second, FuncOverlap, Difference,
2225	Status: CallsiteStepStatus);
2226	} else {
2227	// There may be multiple inlinees at the same offset, so we need to try
2228	// matching all of them. This match is implemented through sort-merge
2229	// algorithm because callsite records at the same offset are ordered by
2230	// function names.
2231	MatchStep<FunctionSamplesMap::const_iterator> CalleeIterStep(
2232	CallsiteIterStep.getFirstIter()->second.cbegin(),
2233	CallsiteIterStep.getFirstIter()->second.cend(),
2234	CallsiteIterStep.getSecondIter()->second.cbegin(),
2235	CallsiteIterStep.getSecondIter()->second.cend());
2236	CalleeIterStep.updateOneStep();
2237	while (!CalleeIterStep.areBothFinished()) {
2238	MatchStatus CalleeStepStatus = CalleeIterStep.getMatchStatus();
2239	if (CalleeStepStatus != MS_Match) {
2240	auto Callee = (CalleeStepStatus == MS_FirstUnique)
2241	? CalleeIterStep.getFirstIter()
2242	: CalleeIterStep.getSecondIter();
2243	updateForUnmatchedCallee(Func: Callee ->second, FuncOverlap, Difference,
2244	Status: CalleeStepStatus);
2245	} else {
2246	// An inlined function can contain other inlinees inside, so compute
2247	// the Difference recursively.
2248	Difference += `2.0` - `2` * computeSampleFunctionInternalOverlap(
2249	BaseFunc: CalleeIterStep.getFirstIter()->second,
2250	TestFunc: CalleeIterStep.getSecondIter()->second,
2251	FuncOverlap);
2252	}
2253	CalleeIterStep.updateOneStep();
2254	}
2255	}
2256	CallsiteIterStep.updateOneStep();
2257	}
2258
2259	// Difference reflects the total differences of line/block samples in this
2260	// function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to
2261	// reflect the similarity between function profiles in [0.0f to 1.0f].
2262	return (`2.0` - Difference) / `2`;
2263	}
2264
2265	double SampleOverlapAggregator::weightForFuncSimilarity(
2266	double FuncInternalSimilarity, uint64_t BaseFuncSample,
2267	uint64_t TestFuncSample) const {
2268	// Compute the weight as the distance between the function weights in two
2269	// profiles.
2270	double BaseFrac = `0.0`;
2271	double TestFrac = `0.0`;
2272	assert(ProfOverlap.BaseSample > `0` &&
2273	"Total samples in base profile should be greater than 0");
2274	BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample;
2275	assert(ProfOverlap.TestSample > `0` &&
2276	"Total samples in test profile should be greater than 0");
2277	TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample;
2278	double WeightDistance = std::fabs(x: BaseFrac - TestFrac);
2279
2280	// Take WeightDistance into the similarity.
2281	return FuncInternalSimilarity * (`1` - WeightDistance);
2282	}
2283
2284	double
2285	SampleOverlapAggregator::weightByImportance(double FuncSimilarity,
2286	uint64_t BaseFuncSample,
2287	uint64_t TestFuncSample) const {
2288
2289	double BaseFrac = `0.0`;
2290	double TestFrac = `0.0`;
2291	assert(ProfOverlap.BaseSample > `0` &&
2292	"Total samples in base profile should be greater than 0");
2293	BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample / `2.0`;
2294	assert(ProfOverlap.TestSample > `0` &&
2295	"Total samples in test profile should be greater than 0");
2296	TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample / `2.0`;
2297	return FuncSimilarity * (BaseFrac + TestFrac);
2298	}
2299
2300	double SampleOverlapAggregator::computeSampleFunctionOverlap(
2301	const sampleprof::FunctionSamples *BaseFunc,
2302	const sampleprof::FunctionSamples *TestFunc,
2303	SampleOverlapStats *FuncOverlap, uint64_t BaseFuncSample,
2304	uint64_t TestFuncSample) {
2305	// Default function internal similarity before weighted, meaning two functions
2306	// has no overlap.
2307	const double DefaultFuncInternalSimilarity = `0`;
2308	double FuncSimilarity;
2309	double FuncInternalSimilarity;
2310
2311	// If BaseFunc or TestFunc is nullptr, it means the functions do not overlap.
2312	// In this case, we use DefaultFuncInternalSimilarity as the function internal
2313	// similarity.
2314	if (!BaseFunc \|\| !TestFunc) {
2315	FuncInternalSimilarity = DefaultFuncInternalSimilarity;
2316	} else {
2317	assert(FuncOverlap != nullptr &&
2318	"FuncOverlap should be provided in this case");
2319	FuncInternalSimilarity = computeSampleFunctionInternalOverlap(
2320	BaseFunc: BaseFunc, TestFunc: TestFunc, FuncOverlap&: *FuncOverlap);
2321	// Now, FuncInternalSimilarity may be a little less than 0 due to
2322	// imprecision of floating point accumulations. Make it zero if the
2323	// difference is below Epsilon.
2324	FuncInternalSimilarity = (std::fabs(x: FuncInternalSimilarity - `0`) < Epsilon)
2325	? `0`
2326	: FuncInternalSimilarity;
2327	}
2328	FuncSimilarity = weightForFuncSimilarity(FuncInternalSimilarity,
2329	BaseFuncSample, TestFuncSample);
2330	return FuncSimilarity;
2331	}
2332
2333	void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
2334	using namespace sampleprof;
2335
2336	std::unordered_map<SampleContext, const FunctionSamples *,
2337	SampleContext::Hash>
2338	BaseFuncProf;
2339	const auto &BaseProfiles = BaseReader ->getProfiles();
2340	for (const auto &BaseFunc : BaseProfiles) {
2341	BaseFuncProf.emplace(args&: BaseFunc.second.getContext(), args: &(BaseFunc.second));
2342	}
2343	ProfOverlap.UnionCount = BaseFuncProf.size();
2344
2345	const auto &TestProfiles = TestReader ->getProfiles();
2346	for (const auto &TestFunc : TestProfiles) {
2347	SampleOverlapStats FuncOverlap;
2348	FuncOverlap.TestName = TestFunc.second.getContext();
2349	assert(TestStats.count(FuncOverlap.TestName) &&
2350	"TestStats should have records for all functions in test profile "
2351	"except inlinees");
2352	FuncOverlap.TestSample = TestStats [FuncOverlap.TestName].SampleSum;
2353
2354	bool Matched = false;
2355	const auto Match = BaseFuncProf.find(x: FuncOverlap.TestName);
2356	if (Match == BaseFuncProf.end()) {
2357	const FuncSampleStats &FuncStats = TestStats [FuncOverlap.TestName];
2358	++ProfOverlap.TestUniqueCount;
2359	ProfOverlap.TestUniqueSample += FuncStats.SampleSum;
2360	FuncOverlap.TestUniqueSample = FuncStats.SampleSum;
2361
2362	updateHotBlockOverlap(BaseSample: `0`, TestSample: FuncStats.SampleSum, HotBlockCount: FuncStats.HotBlockCount);
2363
2364	double FuncSimilarity = computeSampleFunctionOverlap(
2365	BaseFunc: nullptr, TestFunc: nullptr, FuncOverlap: nullptr, BaseFuncSample: `0`, TestFuncSample: FuncStats.SampleSum);
2366	ProfOverlap.Similarity +=
2367	weightByImportance(FuncSimilarity, BaseFuncSample: `0`, TestFuncSample: FuncStats.SampleSum);
2368
2369	++ProfOverlap.UnionCount;
2370	ProfOverlap.UnionSample += FuncStats.SampleSum;
2371	} else {
2372	++ProfOverlap.OverlapCount;
2373
2374	// Two functions match with each other. Compute function-level overlap and
2375	// aggregate them into profile-level overlap.
2376	FuncOverlap.BaseName = Match ->second->getContext();
2377	assert(BaseStats.count(FuncOverlap.BaseName) &&
2378	"BaseStats should have records for all functions in base profile "
2379	"except inlinees");
2380	FuncOverlap.BaseSample = BaseStats [FuncOverlap.BaseName].SampleSum;
2381
2382	FuncOverlap.Similarity = computeSampleFunctionOverlap(
2383	BaseFunc: Match ->second, TestFunc: &TestFunc.second, FuncOverlap: &FuncOverlap, BaseFuncSample: FuncOverlap.BaseSample,
2384	TestFuncSample: FuncOverlap.TestSample);
2385	ProfOverlap.Similarity +=
2386	weightByImportance(FuncSimilarity: FuncOverlap.Similarity, BaseFuncSample: FuncOverlap.BaseSample,
2387	TestFuncSample: FuncOverlap.TestSample);
2388	ProfOverlap.OverlapSample += FuncOverlap.OverlapSample;
2389	ProfOverlap.UnionSample += FuncOverlap.UnionSample;
2390
2391	// Accumulate the percentage of base unique and test unique samples into
2392	// ProfOverlap.
2393	ProfOverlap.BaseUniqueSample += FuncOverlap.BaseUniqueSample;
2394	ProfOverlap.TestUniqueSample += FuncOverlap.TestUniqueSample;
2395
2396	// Remove matched base functions for later reporting functions not found
2397	// in test profile.
2398	BaseFuncProf.erase(position: Match);
2399	Matched = true;
2400	}
2401
2402	// Print function-level similarity information if specified by options.
2403	assert(TestStats.count(FuncOverlap.TestName) &&
2404	"TestStats should have records for all functions in test profile "
2405	"except inlinees");
2406	if (TestStats [FuncOverlap.TestName].MaxSample >= FuncFilter.ValueCutoff \|\|
2407	(Matched && FuncOverlap.Similarity < LowSimilarityThreshold) \|\|
2408	(Matched && !FuncFilter.NameFilter.empty() &&
2409	FuncOverlap.BaseName.toString().find(str: FuncFilter.NameFilter) !=
2410	std::string::npos)) {
2411	assert(ProfOverlap.BaseSample > `0` &&
2412	"Total samples in base profile should be greater than 0");
2413	FuncOverlap.BaseWeight =
2414	static_cast<double>(FuncOverlap.BaseSample) / ProfOverlap.BaseSample;
2415	assert(ProfOverlap.TestSample > `0` &&
2416	"Total samples in test profile should be greater than 0");
2417	FuncOverlap.TestWeight =
2418	static_cast<double>(FuncOverlap.TestSample) / ProfOverlap.TestSample;
2419	FuncSimilarityDump.emplace(args&: FuncOverlap.BaseWeight, args&: FuncOverlap);
2420	}
2421	}
2422
2423	// Traverse through functions in base profile but not in test profile.
2424	for (const auto &F : BaseFuncProf) {
2425	assert(BaseStats.count(F.second->getContext()) &&
2426	"BaseStats should have records for all functions in base profile "
2427	"except inlinees");
2428	const FuncSampleStats &FuncStats = BaseStats [F.second->getContext()];
2429	++ProfOverlap.BaseUniqueCount;
2430	ProfOverlap.BaseUniqueSample += FuncStats.SampleSum;
2431
2432	updateHotBlockOverlap(BaseSample: FuncStats.SampleSum, TestSample: `0`, HotBlockCount: FuncStats.HotBlockCount);
2433
2434	double FuncSimilarity = computeSampleFunctionOverlap(
2435	BaseFunc: nullptr, TestFunc: nullptr, FuncOverlap: nullptr, BaseFuncSample: FuncStats.SampleSum, TestFuncSample: `0`);
2436	ProfOverlap.Similarity +=
2437	weightByImportance(FuncSimilarity, BaseFuncSample: FuncStats.SampleSum, TestFuncSample: `0`);
2438
2439	ProfOverlap.UnionSample += FuncStats.SampleSum;
2440	}
2441
2442	// Now, ProfSimilarity may be a little greater than 1 due to imprecision
2443	// of floating point accumulations. Make it 1.0 if the difference is below
2444	// Epsilon.
2445	ProfOverlap.Similarity = (std::fabs(x: ProfOverlap.Similarity - `1`) < Epsilon)
2446	? `1`
2447	: ProfOverlap.Similarity;
2448
2449	computeHotFuncOverlap();
2450	}
2451
2452	void SampleOverlapAggregator::initializeSampleProfileOverlap() {
2453	const auto &BaseProf = BaseReader ->getProfiles();
2454	for (const auto &I : BaseProf) {
2455	++ProfOverlap.BaseCount;
2456	FuncSampleStats FuncStats;
2457	getFuncSampleStats(Func: I.second, FuncStats, HotThreshold: BaseHotThreshold);
2458	ProfOverlap.BaseSample += FuncStats.SampleSum;
2459	BaseStats.emplace(args&: I.second.getContext(), args&: FuncStats);
2460	}
2461
2462	const auto &TestProf = TestReader ->getProfiles();
2463	for (const auto &I : TestProf) {
2464	++ProfOverlap.TestCount;
2465	FuncSampleStats FuncStats;
2466	getFuncSampleStats(Func: I.second, FuncStats, HotThreshold: TestHotThreshold);
2467	ProfOverlap.TestSample += FuncStats.SampleSum;
2468	TestStats.emplace(args&: I.second.getContext(), args&: FuncStats);
2469	}
2470
2471	ProfOverlap.BaseName = StringRef (BaseFilename);
2472	ProfOverlap.TestName = StringRef (TestFilename);
2473	}
2474
2475	void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream &OS) const {
2476	using namespace sampleprof;
2477
2478	if (FuncSimilarityDump.empty())
2479	return;
2480
2481	formatted_raw_ostream FOS(OS);
2482	FOS << "Function-level details:\n";
2483	FOS << "Base weight";
2484	FOS.PadToColumn(NewCol: TestWeightCol);
2485	FOS << "Test weight";
2486	FOS.PadToColumn(NewCol: SimilarityCol);
2487	FOS << "Similarity";
2488	FOS.PadToColumn(NewCol: OverlapCol);
2489	FOS << "Overlap";
2490	FOS.PadToColumn(NewCol: BaseUniqueCol);
2491	FOS << "Base unique";
2492	FOS.PadToColumn(NewCol: TestUniqueCol);
2493	FOS << "Test unique";
2494	FOS.PadToColumn(NewCol: BaseSampleCol);
2495	FOS << "Base samples";
2496	FOS.PadToColumn(NewCol: TestSampleCol);
2497	FOS << "Test samples";
2498	FOS.PadToColumn(NewCol: FuncNameCol);
2499	FOS << "Function name\n";
2500	for (const auto &F : FuncSimilarityDump) {
2501	double OverlapPercent =
2502	F.second.UnionSample > `0`
2503	? static_cast<double>(F.second.OverlapSample) / F.second.UnionSample
2504	: `0`;
2505	double BaseUniquePercent =
2506	F.second.BaseSample > `0`
2507	? static_cast<double>(F.second.BaseUniqueSample) /
2508	F.second.BaseSample
2509	: `0`;
2510	double TestUniquePercent =
2511	F.second.TestSample > `0`
2512	? static_cast<double>(F.second.TestUniqueSample) /
2513	F.second.TestSample
2514	: `0`;
2515
2516	FOS << format(Fmt: "%.2f%%", Vals: F.second.BaseWeight * `100`);
2517	FOS.PadToColumn(NewCol: TestWeightCol);
2518	FOS << format(Fmt: "%.2f%%", Vals: F.second.TestWeight * `100`);
2519	FOS.PadToColumn(NewCol: SimilarityCol);
2520	FOS << format(Fmt: "%.2f%%", Vals: F.second.Similarity * `100`);
2521	FOS.PadToColumn(NewCol: OverlapCol);
2522	FOS << format(Fmt: "%.2f%%", Vals: OverlapPercent * `100`);
2523	FOS.PadToColumn(NewCol: BaseUniqueCol);
2524	FOS << format(Fmt: "%.2f%%", Vals: BaseUniquePercent * `100`);
2525	FOS.PadToColumn(NewCol: TestUniqueCol);
2526	FOS << format(Fmt: "%.2f%%", Vals: TestUniquePercent * `100`);
2527	FOS.PadToColumn(NewCol: BaseSampleCol);
2528	FOS << F.second.BaseSample;
2529	FOS.PadToColumn(NewCol: TestSampleCol);
2530	FOS << F.second.TestSample;
2531	FOS.PadToColumn(NewCol: FuncNameCol);
2532	FOS << F.second.TestName.toString() << "\n";
2533	}
2534	}
2535
2536	void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream &OS) const {
2537	OS << "Profile overlap infomation for base_profile: "
2538	<< ProfOverlap.BaseName.toString()
2539	<< " and test_profile: " << ProfOverlap.TestName.toString()
2540	<< "\nProgram level:\n";
2541
2542	OS << " Whole program profile similarity: "
2543	<< format(Fmt: "%.3f%%", Vals: ProfOverlap.Similarity * `100`) << "\n";
2544
2545	assert(ProfOverlap.UnionSample > `0` &&
2546	"Total samples in two profile should be greater than 0");
2547	double OverlapPercent =
2548	static_cast<double>(ProfOverlap.OverlapSample) / ProfOverlap.UnionSample;
2549	assert(ProfOverlap.BaseSample > `0` &&
2550	"Total samples in base profile should be greater than 0");
2551	double BaseUniquePercent = static_cast<double>(ProfOverlap.BaseUniqueSample) /
2552	ProfOverlap.BaseSample;
2553	assert(ProfOverlap.TestSample > `0` &&
2554	"Total samples in test profile should be greater than 0");
2555	double TestUniquePercent = static_cast<double>(ProfOverlap.TestUniqueSample) /
2556	ProfOverlap.TestSample;
2557
2558	OS << " Whole program sample overlap: "
2559	<< format(Fmt: "%.3f%%", Vals: OverlapPercent * `100`) << "\n";
2560	OS << " percentage of samples unique in base profile: "
2561	<< format(Fmt: "%.3f%%", Vals: BaseUniquePercent * `100`) << "\n";
2562	OS << " percentage of samples unique in test profile: "
2563	<< format(Fmt: "%.3f%%", Vals: TestUniquePercent * `100`) << "\n";
2564	OS << " total samples in base profile: " << ProfOverlap.BaseSample << "\n"
2565	<< " total samples in test profile: " << ProfOverlap.TestSample << "\n";
2566
2567	assert(ProfOverlap.UnionCount > `0` &&
2568	"There should be at least one function in two input profiles");
2569	double FuncOverlapPercent =
2570	static_cast<double>(ProfOverlap.OverlapCount) / ProfOverlap.UnionCount;
2571	OS << " Function overlap: " << format(Fmt: "%.3f%%", Vals: FuncOverlapPercent * `100`)
2572	<< "\n";
2573	OS << " overlap functions: " << ProfOverlap.OverlapCount << "\n";
2574	OS << " functions unique in base profile: " << ProfOverlap.BaseUniqueCount
2575	<< "\n";
2576	OS << " functions unique in test profile: " << ProfOverlap.TestUniqueCount
2577	<< "\n";
2578	}
2579
2580	void SampleOverlapAggregator::dumpHotFuncAndBlockOverlap(
2581	raw_fd_ostream &OS) const {
2582	assert(HotFuncOverlap.UnionCount > `0` &&
2583	"There should be at least one hot function in two input profiles");
2584	OS << " Hot-function overlap: "
2585	<< format(Fmt: "%.3f%%", Vals: static_cast<double>(HotFuncOverlap.OverlapCount) /
2586	HotFuncOverlap.UnionCount * `100`)
2587	<< "\n";
2588	OS << " overlap hot functions: " << HotFuncOverlap.OverlapCount << "\n";
2589	OS << " hot functions unique in base profile: "
2590	<< HotFuncOverlap.BaseCount - HotFuncOverlap.OverlapCount << "\n";
2591	OS << " hot functions unique in test profile: "
2592	<< HotFuncOverlap.TestCount - HotFuncOverlap.OverlapCount << "\n";
2593
2594	assert(HotBlockOverlap.UnionCount > `0` &&
2595	"There should be at least one hot block in two input profiles");
2596	OS << " Hot-block overlap: "
2597	<< format(Fmt: "%.3f%%", Vals: static_cast<double>(HotBlockOverlap.OverlapCount) /
2598	HotBlockOverlap.UnionCount * `100`)
2599	<< "\n";
2600	OS << " overlap hot blocks: " << HotBlockOverlap.OverlapCount << "\n";
2601	OS << " hot blocks unique in base profile: "
2602	<< HotBlockOverlap.BaseCount - HotBlockOverlap.OverlapCount << "\n";
2603	OS << " hot blocks unique in test profile: "
2604	<< HotBlockOverlap.TestCount - HotBlockOverlap.OverlapCount << "\n";
2605	}
2606
2607	std::error_code SampleOverlapAggregator::loadProfiles() {
2608	using namespace sampleprof;
2609
2610	LLVMContext Context;
2611	auto FS = vfs::getRealFileSystem();
2612	auto BaseReaderOrErr = SampleProfileReader::create(Filename: BaseFilename, C&: Context, FS&: *FS,
2613	P: FSDiscriminatorPassOption);
2614	if (std::error_code EC = BaseReaderOrErr.getError())
2615	exitWithErrorCode(EC, Whence: BaseFilename);
2616
2617	auto TestReaderOrErr = SampleProfileReader::create(Filename: TestFilename, C&: Context, FS&: *FS,
2618	P: FSDiscriminatorPassOption);
2619	if (std::error_code EC = TestReaderOrErr.getError())
2620	exitWithErrorCode(EC, Whence: TestFilename);
2621
2622	BaseReader = std::move(BaseReaderOrErr.get());
2623	TestReader = std::move(TestReaderOrErr.get());
2624
2625	if (std::error_code EC = BaseReader ->read())
2626	exitWithErrorCode(EC, Whence: BaseFilename);
2627	if (std::error_code EC = TestReader ->read())
2628	exitWithErrorCode(EC, Whence: TestFilename);
2629	if (BaseReader ->profileIsProbeBased() != TestReader ->profileIsProbeBased())
2630	exitWithError(
2631	Message: "cannot compare probe-based profile with non-probe-based profile");
2632	if (BaseReader ->profileIsCS() != TestReader ->profileIsCS())
2633	exitWithError(Message: "cannot compare CS profile with non-CS profile");
2634
2635	// Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in
2636	// profile summary.
2637	ProfileSummary &BasePS = BaseReader ->getSummary();
2638	ProfileSummary &TestPS = TestReader ->getSummary();
2639	BaseHotThreshold =
2640	ProfileSummaryBuilder::getHotCountThreshold(DS: BasePS.getDetailedSummary());
2641	TestHotThreshold =
2642	ProfileSummaryBuilder::getHotCountThreshold(DS: TestPS.getDetailedSummary());
2643
2644	return std::error_code ();
2645	}
2646
2647	void overlapSampleProfile(const std::string &BaseFilename,
2648	const std::string &TestFilename,
2649	const OverlapFuncFilters &FuncFilter,
2650	uint64_t SimilarityCutoff, raw_fd_ostream &OS) {
2651	using namespace sampleprof;
2652
2653	// We use 0.000005 to initialize OverlapAggr.Epsilon because the final metrics
2654	// report 2--3 places after decimal point in percentage numbers.
2655	SampleOverlapAggregator OverlapAggr(
2656	BaseFilename, TestFilename,
2657	static_cast<double>(SimilarityCutoff) / `1000000`, `0.000005`, FuncFilter);
2658	if (std::error_code EC = OverlapAggr.loadProfiles())
2659	exitWithErrorCode(EC);
2660
2661	OverlapAggr.initializeSampleProfileOverlap();
2662	if (OverlapAggr.detectZeroSampleProfile(OS))
2663	return;
2664
2665	OverlapAggr.computeSampleProfileOverlap(OS);
2666
2667	OverlapAggr.dumpProgramSummary(OS);
2668	OverlapAggr.dumpHotFuncAndBlockOverlap(OS);
2669	OverlapAggr.dumpFuncSimilarity(OS);
2670	}
2671
2672	static int overlap_main() {
2673	std::error_code EC;
2674	raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
2675	if (EC)
2676	exitWithErrorCode(EC, Whence: OutputFilename);
2677
2678	if (ProfileKind == instr)
2679	overlapInstrProfile(BaseFilename, TestFilename,
2680	FuncFilter: OverlapFuncFilters{.ValueCutoff: OverlapValueCutoff, .NameFilter: FuncNameFilter},
2681	OS, IsCS);
2682	else
2683	overlapSampleProfile(BaseFilename, TestFilename,
2684	FuncFilter: OverlapFuncFilters{.ValueCutoff: OverlapValueCutoff, .NameFilter: FuncNameFilter},
2685	SimilarityCutoff, OS);
2686
2687	return `0`;
2688	}
2689
2690	namespace {
2691	struct ValueSitesStats {
2692	ValueSitesStats() = default;
2693	uint64_t TotalNumValueSites = `0`;
2694	uint64_t TotalNumValueSitesWithValueProfile = `0`;
2695	uint64_t TotalNumValues = `0`;
2696	std::vector<unsigned> ValueSitesHistogram;
2697	};
2698	} // namespace
2699
2700	static void traverseAllValueSites(const InstrProfRecord &Func, uint32_t VK,
2701	ValueSitesStats &Stats, raw_fd_ostream &OS,
2702	InstrProfSymtab *Symtab) {
2703	uint32_t NS = Func.getNumValueSites(ValueKind: VK);
2704	Stats.TotalNumValueSites += NS;
2705	for (size_t I = `0`; I < NS; ++I) {
2706	auto VD = Func.getValueArrayForSite(ValueKind: VK, Site: I);
2707	uint32_t NV = VD.size();
2708	if (NV == `0`)
2709	continue;
2710	Stats.TotalNumValues += NV;
2711	Stats.TotalNumValueSitesWithValueProfile++;
2712	if (NV > Stats.ValueSitesHistogram.size())
2713	Stats.ValueSitesHistogram.resize(new_size: NV, x: `0`);
2714	Stats.ValueSitesHistogram [NV - `1`]++;
2715
2716	uint64_t SiteSum = `0`;
2717	for (const auto &V : VD)
2718	SiteSum += V.Count;
2719	if (SiteSum == `0`)
2720	SiteSum = `1`;
2721
2722	for (const auto &V : VD) {
2723	OS << "\t[ " << format(Fmt: "%2u", Vals: I) << ", ";
2724	if (Symtab == nullptr)
2725	OS << format(Fmt: "%4" PRIu64, Vals: V.Value);
2726	else
2727	OS << Symtab->getFuncOrVarName(MD5Hash: V.Value);
2728	OS << ", " << format(Fmt: "%10" PRId64, Vals: V.Count) << " ] ("
2729	<< format(Fmt: "%.2f%%", Vals: (V.Count * `100.0` / SiteSum)) << ")\n";
2730	}
2731	}
2732	}
2733
2734	static void showValueSitesStats(raw_fd_ostream &OS, uint32_t VK,
2735	ValueSitesStats &Stats) {
2736	OS << " Total number of sites: " << Stats.TotalNumValueSites << "\n";
2737	OS << " Total number of sites with values: "
2738	<< Stats.TotalNumValueSitesWithValueProfile << "\n";
2739	OS << " Total number of profiled values: " << Stats.TotalNumValues << "\n";
2740
2741	OS << " Value sites histogram:\n\tNumTargets, SiteCount\n";
2742	for (unsigned I = `0`; I < Stats.ValueSitesHistogram.size(); I++) {
2743	if (Stats.ValueSitesHistogram [I] > `0`)
2744	OS << "\t" << I + `1` << ", " << Stats.ValueSitesHistogram [I] << "\n";
2745	}
2746	}
2747
2748	static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
2749	if (SFormat == ShowFormat::Json)
2750	exitWithError(Message: "JSON output is not supported for instr profiles");
2751	if (SFormat == ShowFormat::Yaml)
2752	exitWithError(Message: "YAML output is not supported for instr profiles");
2753	auto FS = vfs::getRealFileSystem();
2754	auto ReaderOrErr = InstrProfReader::create(Path: Filename, FS&: *FS);
2755	std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
2756	if (ShowDetailedSummary && Cutoffs.empty()) {
2757	Cutoffs = ProfileSummaryBuilder::DefaultCutoffs;
2758	}
2759	InstrProfSummaryBuilder Builder(std::move(Cutoffs));
2760	if (Error E = ReaderOrErr.takeError())
2761	exitWithError(E: std::move(E), Whence: Filename);
2762
2763	auto Reader = std::move(ReaderOrErr.get());
2764	bool IsIRInstr = Reader ->isIRLevelProfile();
2765	size_t ShownFunctions = `0`;
2766	size_t BelowCutoffFunctions = `0`;
2767	int NumVPKind = IPVK_Last - IPVK_First + `1`;
2768	std::vector<ValueSitesStats> VPStats(NumVPKind);
2769
2770	auto MinCmp = [](const std::pair<std::string, uint64_t> &v1,
2771	const std::pair<std::string, uint64_t> &v2) {
2772	return v1.second > v2.second;
2773	};
2774
2775	std::priority_queue<std::pair<std::string, uint64_t>,
2776	std::vector<std::pair<std::string, uint64_t>>,
2777	decltype(MinCmp)>
2778	HottestFuncs(MinCmp);
2779
2780	if (!TextFormat && OnlyListBelow) {
2781	OS << "The list of functions with the maximum counter less than "
2782	<< ShowValueCutoff << ":\n";
2783	}
2784
2785	// Add marker so that IR-level instrumentation round-trips properly.
2786	if (TextFormat && IsIRInstr)
2787	OS << ":ir\n";
2788
2789	for (const auto &Func : *Reader) {
2790	if (Reader ->isIRLevelProfile()) {
2791	bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(FuncHash: Func.Hash);
2792	if (FuncIsCS != ShowCS)
2793	continue;
2794	}
2795	bool Show = ShowAllFunctions \|\|
2796	(!FuncNameFilter.empty() && Func.Name.contains(Other: FuncNameFilter));
2797
2798	bool doTextFormatDump = (Show && TextFormat);
2799
2800	if (doTextFormatDump) {
2801	InstrProfSymtab &Symtab = Reader ->getSymtab();
2802	InstrProfWriter::writeRecordInText(Name: Func.Name, Hash: Func.Hash, Counters: Func, Symtab,
2803	OS);
2804	continue;
2805	}
2806
2807	assert(Func.Counts.size() > `0` && "function missing entry counter");
2808	Builder.addRecord(Func);
2809
2810	if (ShowCovered) {
2811	if (llvm::any_of(Range: Func.Counts, P: [](uint64_t C) { return C; }))
2812	OS << Func.Name << "\n";
2813	continue;
2814	}
2815
2816	uint64_t FuncMax = `0`;
2817	uint64_t FuncSum = `0`;
2818
2819	auto PseudoKind = Func.getCountPseudoKind();
2820	if (PseudoKind != InstrProfRecord::NotPseudo) {
2821	if (Show) {
2822	if (!ShownFunctions)
2823	OS << "Counters:\n";
2824	++ShownFunctions;
2825	OS << " " << Func.Name << ":\n"
2826	<< " Hash: " << format(Fmt: "0x%016" PRIx64, Vals: Func.Hash) << "\n"
2827	<< " Counters: " << Func.Counts.size();
2828	if (PseudoKind == InstrProfRecord::PseudoHot)
2829	OS << " <PseudoHot>\n";
2830	else if (PseudoKind == InstrProfRecord::PseudoWarm)
2831	OS << " <PseudoWarm>\n";
2832	else
2833	llvm_unreachable("Unknown PseudoKind");
2834	}
2835	continue;
2836	}
2837
2838	for (size_t I = `0`, E = Func.Counts.size(); I < E; ++I) {
2839	FuncMax = std::max(a: FuncMax, b: Func.Counts [I]);
2840	FuncSum += Func.Counts [I];
2841	}
2842
2843	if (FuncMax < ShowValueCutoff) {
2844	++BelowCutoffFunctions;
2845	if (OnlyListBelow) {
2846	OS << " " << Func.Name << ": (Max = " << FuncMax
2847	<< " Sum = " << FuncSum << ")\n";
2848	}
2849	continue;
2850	} else if (OnlyListBelow)
2851	continue;
2852
2853	if (TopNFunctions) {
2854	if (HottestFuncs.size() == TopNFunctions) {
2855	if (HottestFuncs.top().second < FuncMax) {
2856	HottestFuncs.pop();
2857	HottestFuncs.emplace(args: std::make_pair(x: std::string (Func.Name), y&: FuncMax));
2858	}
2859	} else
2860	HottestFuncs.emplace(args: std::make_pair(x: std::string (Func.Name), y&: FuncMax));
2861	}
2862
2863	if (Show) {
2864	if (!ShownFunctions)
2865	OS << "Counters:\n";
2866
2867	++ShownFunctions;
2868
2869	OS << " " << Func.Name << ":\n"
2870	<< " Hash: " << format(Fmt: "0x%016" PRIx64, Vals: Func.Hash) << "\n"
2871	<< " Counters: " << Func.Counts.size() << "\n";
2872	if (!IsIRInstr)
2873	OS << " Function count: " << Func.Counts [`0`] << "\n";
2874
2875	if (ShowIndirectCallTargets)
2876	OS << " Indirect Call Site Count: "
2877	<< Func.getNumValueSites(ValueKind: IPVK_IndirectCallTarget) << "\n";
2878
2879	if (ShowVTables)
2880	OS << " Number of instrumented vtables: "
2881	<< Func.getNumValueSites(ValueKind: IPVK_VTableTarget) << "\n";
2882
2883	uint32_t NumMemOPCalls = Func.getNumValueSites(ValueKind: IPVK_MemOPSize);
2884	if (ShowMemOPSizes && NumMemOPCalls > `0`)
2885	OS << " Number of Memory Intrinsics Calls: " << NumMemOPCalls
2886	<< "\n";
2887
2888	if (ShowCounts) {
2889	OS << " Block counts: [";
2890	size_t Start = (IsIRInstr ? `0` : `1`);
2891	for (size_t I = Start, E = Func.Counts.size(); I < E; ++I) {
2892	OS << (I == Start ? "" : ", ") << Func.Counts [I];
2893	}
2894	OS << "]\n";
2895	}
2896
2897	if (ShowIndirectCallTargets) {
2898	OS << " Indirect Target Results:\n";
2899	traverseAllValueSites(Func, VK: IPVK_IndirectCallTarget,
2900	Stats&: VPStats [IPVK_IndirectCallTarget], OS,
2901	Symtab: &(Reader ->getSymtab()));
2902	}
2903
2904	if (ShowVTables) {
2905	OS << " VTable Results:\n";
2906	traverseAllValueSites(Func, VK: IPVK_VTableTarget,
2907	Stats&: VPStats [IPVK_VTableTarget], OS,
2908	Symtab: &(Reader ->getSymtab()));
2909	}
2910
2911	if (ShowMemOPSizes && NumMemOPCalls > `0`) {
2912	OS << " Memory Intrinsic Size Results:\n";
2913	traverseAllValueSites(Func, VK: IPVK_MemOPSize, Stats&: VPStats [IPVK_MemOPSize], OS,
2914	Symtab: nullptr);
2915	}
2916	}
2917	}
2918	if (Reader ->hasError())
2919	exitWithError(E: Reader ->getError(), Whence: Filename);
2920
2921	if (TextFormat \|\| ShowCovered)
2922	return `0`;
2923	std::unique_ptr<ProfileSummary> PS(Builder.getSummary());
2924	bool IsIR = Reader ->isIRLevelProfile();
2925	OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end");
2926	if (IsIR)
2927	OS << " entry_first = " << Reader ->instrEntryBBEnabled();
2928	OS << "\n";
2929	if (ShowAllFunctions \|\| !FuncNameFilter.empty())
2930	OS << "Functions shown: " << ShownFunctions << "\n";
2931	OS << "Total functions: " << PS ->getNumFunctions() << "\n";
2932	if (ShowValueCutoff > `0`) {
2933	OS << "Number of functions with maximum count (< " << ShowValueCutoff
2934	<< "): " << BelowCutoffFunctions << "\n";
2935	OS << "Number of functions with maximum count (>= " << ShowValueCutoff
2936	<< "): " << PS ->getNumFunctions() - BelowCutoffFunctions << "\n";
2937	}
2938	OS << "Maximum function count: " << PS ->getMaxFunctionCount() << "\n";
2939	OS << "Maximum internal block count: " << PS ->getMaxInternalCount() << "\n";
2940
2941	if (TopNFunctions) {
2942	std::vector<std::pair<std::string, uint64_t>> SortedHottestFuncs;
2943	while (!HottestFuncs.empty()) {
2944	SortedHottestFuncs.emplace_back(args: HottestFuncs.top());
2945	HottestFuncs.pop();
2946	}
2947	OS << "Top " << TopNFunctions
2948	<< " functions with the largest internal block counts: \n";
2949	for (auto &hotfunc : llvm::reverse(C&: SortedHottestFuncs))
2950	OS << " " << hotfunc.first << ", max count = " << hotfunc.second << "\n";
2951	}
2952
2953	if (ShownFunctions && ShowIndirectCallTargets) {
2954	OS << "Statistics for indirect call sites profile:\n";
2955	showValueSitesStats(OS, VK: IPVK_IndirectCallTarget,
2956	Stats&: VPStats [IPVK_IndirectCallTarget]);
2957	}
2958
2959	if (ShownFunctions && ShowVTables) {
2960	OS << "Statistics for vtable profile:\n";
2961	showValueSitesStats(OS, VK: IPVK_VTableTarget, Stats&: VPStats [IPVK_VTableTarget]);
2962	}
2963
2964	if (ShownFunctions && ShowMemOPSizes) {
2965	OS << "Statistics for memory intrinsic calls sizes profile:\n";
2966	showValueSitesStats(OS, VK: IPVK_MemOPSize, Stats&: VPStats [IPVK_MemOPSize]);
2967	}
2968
2969	if (ShowDetailedSummary) {
2970	OS << "Total number of blocks: " << PS ->getNumCounts() << "\n";
2971	OS << "Total count: " << PS ->getTotalCount() << "\n";
2972	PS ->printDetailedSummary(OS);
2973	}
2974
2975	if (ShowBinaryIds)
2976	if (Error E = Reader ->printBinaryIds(OS))
2977	exitWithError(E: std::move(E), Whence: Filename);
2978
2979	if (ShowProfileVersion)
2980	OS << "Profile version: " << Reader ->getVersion() << "\n";
2981
2982	if (ShowTemporalProfTraces) {
2983	auto &Traces = Reader ->getTemporalProfTraces();
2984	OS << "Temporal Profile Traces (samples=" << Traces.size()
2985	<< " seen=" << Reader ->getTemporalProfTraceStreamSize() << "):\n";
2986	for (unsigned i = `0`; i < Traces.size(); i++) {
2987	OS << " Temporal Profile Trace " << i << " (weight=" << Traces [i].Weight
2988	<< " count=" << Traces [i].FunctionNameRefs.size() << "):\n";
2989	for (auto &NameRef : Traces [i].FunctionNameRefs)
2990	OS << " " << Reader ->getSymtab().getFuncOrVarName(MD5Hash: NameRef) << "\n";
2991	}
2992	}
2993
2994	return `0`;
2995	}
2996
2997	static void showSectionInfo(sampleprof::SampleProfileReader *Reader,
2998	raw_fd_ostream &OS) {
2999	if (!Reader->dumpSectionInfo(OS)) {
3000	WithColor::warning() << "-show-sec-info-only is only supported for "
3001	<< "sample profile in extbinary format and is "
3002	<< "ignored for other formats.\n";
3003	return;
3004	}
3005	}
3006
3007	namespace {
3008	struct HotFuncInfo {
3009	std::string FuncName;
3010	uint64_t TotalCount = `0`;
3011	double TotalCountPercent = `0.0f`;
3012	uint64_t MaxCount = `0`;
3013	uint64_t EntryCount = `0`;
3014
3015	HotFuncInfo() = default;
3016
3017	HotFuncInfo(StringRef FN, uint64_t TS, double TSP, uint64_t MS, uint64_t ES)
3018	: FuncName (FN.begin(), FN.end()), TotalCount(TS), TotalCountPercent(TSP),
3019	MaxCount(MS), EntryCount(ES) {}
3020	};
3021	} // namespace
3022
3023	// Print out detailed information about hot functions in PrintValues vector.
3024	// Users specify titles and offset of every columns through ColumnTitle and
3025	// ColumnOffset. The size of ColumnTitle and ColumnOffset need to be the same
3026	// and at least 4. Besides, users can optionally give a HotFuncMetric string to
3027	// print out or let it be an empty string.
3028	static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle,
3029	const std::vector<int> &ColumnOffset,
3030	const std::vector<HotFuncInfo> &PrintValues,
3031	uint64_t HotFuncCount, uint64_t TotalFuncCount,
3032	uint64_t HotProfCount, uint64_t TotalProfCount,
3033	const std::string &HotFuncMetric,
3034	uint32_t TopNFunctions, raw_fd_ostream &OS) {
3035	assert(ColumnOffset.size() == ColumnTitle.size() &&
3036	"ColumnOffset and ColumnTitle should have the same size");
3037	assert(ColumnTitle.size() >= `4` &&
3038	"ColumnTitle should have at least 4 elements");
3039	assert(TotalFuncCount > `0` &&
3040	"There should be at least one function in the profile");
3041	double TotalProfPercent = `0`;
3042	if (TotalProfCount > `0`)
3043	TotalProfPercent = static_cast<double>(HotProfCount) / TotalProfCount * `100`;
3044
3045	formatted_raw_ostream FOS(OS);
3046	FOS << HotFuncCount << " out of " << TotalFuncCount
3047	<< " functions with profile ("
3048	<< format(Fmt: "%.2f%%",
3049	Vals: (static_cast<double>(HotFuncCount) / TotalFuncCount * `100`))
3050	<< ") are considered hot functions";
3051	if (!HotFuncMetric.empty())
3052	FOS << " (" << HotFuncMetric << ")";
3053	FOS << ".\n";
3054	FOS << HotProfCount << " out of " << TotalProfCount << " profile counts ("
3055	<< format(Fmt: "%.2f%%", Vals: TotalProfPercent) << ") are from hot functions.\n";
3056
3057	for (size_t I = `0`; I < ColumnTitle.size(); ++I) {
3058	FOS.PadToColumn(NewCol: ColumnOffset [I]);
3059	FOS << ColumnTitle [I];
3060	}
3061	FOS << "\n";
3062
3063	uint32_t Count = `0`;
3064	for (const auto &R : PrintValues) {
3065	if (TopNFunctions && (Count++ == TopNFunctions))
3066	break;
3067	FOS.PadToColumn(NewCol: ColumnOffset [`0`]);
3068	FOS << R.TotalCount << " (" << format(Fmt: "%.2f%%", Vals: R.TotalCountPercent) << ")";
3069	FOS.PadToColumn(NewCol: ColumnOffset [`1`]);
3070	FOS << R.MaxCount;
3071	FOS.PadToColumn(NewCol: ColumnOffset [`2`]);
3072	FOS << R.EntryCount;
3073	FOS.PadToColumn(NewCol: ColumnOffset [`3`]);
3074	FOS << R.FuncName << "\n";
3075	}
3076	}
3077
3078	static int showHotFunctionList(const sampleprof::SampleProfileMap &Profiles,
3079	ProfileSummary &PS, uint32_t TopN,
3080	raw_fd_ostream &OS) {
3081	using namespace sampleprof;
3082
3083	const uint32_t HotFuncCutoff = `990000`;
3084	auto &SummaryVector = PS.getDetailedSummary();
3085	uint64_t MinCountThreshold = `0`;
3086	for (const ProfileSummaryEntry &SummaryEntry : SummaryVector) {
3087	if (SummaryEntry.Cutoff == HotFuncCutoff) {
3088	MinCountThreshold = SummaryEntry.MinCount;
3089	break;
3090	}
3091	}
3092
3093	// Traverse all functions in the profile and keep only hot functions.
3094	// The following loop also calculates the sum of total samples of all
3095	// functions.
3096	std::multimap<uint64_t, std::pair<const FunctionSamples , const* uint64_t>,
3097	std::greater<uint64_t>>
3098	HotFunc;
3099	uint64_t ProfileTotalSample = `0`;
3100	uint64_t HotFuncSample = `0`;
3101	uint64_t HotFuncCount = `0`;
3102
3103	for (const auto &I : Profiles) {
3104	FuncSampleStats FuncStats;
3105	const FunctionSamples &FuncProf = I.second;
3106	ProfileTotalSample += FuncProf.getTotalSamples();
3107	getFuncSampleStats(Func: FuncProf, FuncStats, HotThreshold: MinCountThreshold);
3108
3109	if (isFunctionHot(FuncStats, HotThreshold: MinCountThreshold)) {
3110	HotFunc.emplace(args: FuncProf.getTotalSamples(),
3111	args: std::make_pair(x: &(I.second), y&: FuncStats.MaxSample));
3112	HotFuncSample += FuncProf.getTotalSamples();
3113	++HotFuncCount;
3114	}
3115	}
3116
3117	std::vector<std::string> ColumnTitle{"Total sample (%)", "Max sample",
3118	"Entry sample", "Function name"};
3119	std::vector<int> ColumnOffset{`0`, `24`, `42`, `58`};
3120	std::string Metric =
3121	std::string ("max sample >= ") + std::to_string(val: MinCountThreshold);
3122	std::vector<HotFuncInfo> PrintValues;
3123	for (const auto &FuncPair : HotFunc) {
3124	const FunctionSamples &Func = *FuncPair.second.first;
3125	double TotalSamplePercent =
3126	(ProfileTotalSample > `0`)
3127	? (Func.getTotalSamples() * `100.0`) / ProfileTotalSample
3128	: `0`;
3129	PrintValues.emplace_back(
3130	args: HotFuncInfo (Func.getContext().toString(), Func.getTotalSamples(),
3131	TotalSamplePercent, FuncPair.second.second,
3132	Func.getHeadSamplesEstimate()));
3133	}
3134	dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount,
3135	TotalFuncCount: Profiles.size(), HotProfCount: HotFuncSample, TotalProfCount: ProfileTotalSample,
3136	HotFuncMetric: Metric, TopNFunctions: TopN, OS);
3137
3138	return `0`;
3139	}
3140
3141	static int showSampleProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
3142	if (SFormat == ShowFormat::Yaml)
3143	exitWithError(Message: "YAML output is not supported for sample profiles");
3144	using namespace sampleprof;
3145	LLVMContext Context;
3146	auto FS = vfs::getRealFileSystem();
3147	auto ReaderOrErr = SampleProfileReader::create(Filename, C&: Context, FS&: *FS,
3148	P: FSDiscriminatorPassOption);
3149	if (std::error_code EC = ReaderOrErr.getError())
3150	exitWithErrorCode(EC, Whence: Filename);
3151
3152	auto Reader = std::move(ReaderOrErr.get());
3153	if (ShowSectionInfoOnly) {
3154	showSectionInfo(Reader: Reader.get(), OS);
3155	return `0`;
3156	}
3157
3158	if (std::error_code EC = Reader ->read())
3159	exitWithErrorCode(EC, Whence: Filename);
3160
3161	if (ShowAllFunctions \|\| FuncNameFilter.empty()) {
3162	if (SFormat == ShowFormat::Json)
3163	Reader ->dumpJson(OS);
3164	else
3165	Reader ->dump(OS);
3166	} else {
3167	if (SFormat == ShowFormat::Json)
3168	exitWithError(
3169	Message: "the JSON format is supported only when all functions are to "
3170	"be printed");
3171
3172	// TODO: parse context string to support filtering by contexts.
3173	FunctionSamples *FS = Reader ->getSamplesFor(Fname: StringRef (FuncNameFilter));
3174	Reader ->dumpFunctionProfile(FS: FS ? *FS : FunctionSamples (), OS);
3175	}
3176
3177	if (ShowProfileSymbolList) {
3178	std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
3179	Reader ->getProfileSymbolList();
3180	ReaderList ->dump(OS);
3181	}
3182
3183	if (ShowDetailedSummary) {
3184	auto &PS = Reader ->getSummary();
3185	PS.printSummary(OS);
3186	PS.printDetailedSummary(OS);
3187	}
3188
3189	if (ShowHotFuncList \|\| TopNFunctions)
3190	showHotFunctionList(Profiles: Reader ->getProfiles(), PS&: Reader ->getSummary(),
3191	TopN: TopNFunctions, OS);
3192
3193	return `0`;
3194	}
3195
3196	static int showMemProfProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
3197	if (SFormat == ShowFormat::Json)
3198	exitWithError(Message: "JSON output is not supported for MemProf");
3199	auto ReaderOr = llvm::memprof::RawMemProfReader::create(
3200	Path: Filename, ProfiledBinary, /KeepNames=/KeepName: true);
3201	if (Error E = ReaderOr.takeError())
3202	// Since the error can be related to the profile or the binary we do not
3203	// pass whence. Instead additional context is provided where necessary in
3204	// the error message.
3205	exitWithError(E: std::move(E), /Whence/ "");
3206
3207	std::unique_ptr<llvm::memprof::RawMemProfReader> Reader(
3208	ReaderOr.get().release());
3209
3210	Reader ->printYAML(OS);
3211	return `0`;
3212	}
3213
3214	static int showDebugInfoCorrelation(const std::string &Filename,
3215	ShowFormat SFormat, raw_fd_ostream &OS) {
3216	if (SFormat == ShowFormat::Json)
3217	exitWithError(Message: "JSON output is not supported for debug info correlation");
3218	std::unique_ptr<InstrProfCorrelator> Correlator;
3219	if (auto Err =
3220	InstrProfCorrelator::get(Filename, FileKind: InstrProfCorrelator::DEBUG_INFO)
3221	.moveInto(Value&: Correlator))
3222	exitWithError(E: std::move(Err), Whence: Filename);
3223	if (SFormat == ShowFormat::Yaml) {
3224	if (auto Err = Correlator ->dumpYaml(MaxWarnings: MaxDbgCorrelationWarnings, OS))
3225	exitWithError(E: std::move(Err), Whence: Filename);
3226	return `0`;
3227	}
3228
3229	if (auto Err = Correlator ->correlateProfileData(MaxWarnings: MaxDbgCorrelationWarnings))
3230	exitWithError(E: std::move(Err), Whence: Filename);
3231
3232	InstrProfSymtab Symtab;
3233	if (auto Err = Symtab.create(
3234	NameStrings: StringRef (Correlator ->getNamesPointer(), Correlator ->getNamesSize())))
3235	exitWithError(E: std::move(Err), Whence: Filename);
3236
3237	if (ShowProfileSymbolList)
3238	Symtab.dumpNames(OS);
3239	// TODO: Read "Profile Data Type" from debug info to compute and show how many
3240	// counters the section holds.
3241	if (ShowDetailedSummary)
3242	OS << "Counters section size: 0x"
3243	<< Twine::utohexstr(Val: Correlator ->getCountersSectionSize()) << " bytes\n";
3244	OS << "Found " << Correlator ->getDataSize() << " functions\n";
3245
3246	return `0`;
3247	}
3248
3249	static int show_main(StringRef ProgName) {
3250	if (Filename.empty() && DebugInfoFilename.empty())
3251	exitWithError(
3252	Message: "the positional argument '<profdata-file>' is required unless '--" +
3253	DebugInfoFilename.ArgStr + "' is provided");
3254
3255	if (Filename == OutputFilename) {
3256	errs() << ProgName
3257	<< " show: Input file name cannot be the same as the output file "
3258	"name!\n";
3259	return `1`;
3260	}
3261	if (JsonFormat)
3262	SFormat = ShowFormat::Json;
3263
3264	std::error_code EC;
3265	raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3266	if (EC)
3267	exitWithErrorCode(EC, Whence: OutputFilename);
3268
3269	if (ShowAllFunctions && !FuncNameFilter.empty())
3270	WithColor::warning() << "-function argument ignored: showing all functions\n";
3271
3272	if (!DebugInfoFilename.empty())
3273	return showDebugInfoCorrelation(Filename: DebugInfoFilename, SFormat, OS);
3274
3275	if (ShowProfileKind == instr)
3276	return showInstrProfile(SFormat, OS);
3277	if (ShowProfileKind == sample)
3278	return showSampleProfile(SFormat, OS);
3279	return showMemProfProfile(SFormat, OS);
3280	}
3281
3282	static int order_main() {
3283	std::error_code EC;
3284	raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3285	if (EC)
3286	exitWithErrorCode(EC, Whence: OutputFilename);
3287	auto FS = vfs::getRealFileSystem();
3288	auto ReaderOrErr = InstrProfReader::create(Path: Filename, FS&: *FS);
3289	if (Error E = ReaderOrErr.takeError())
3290	exitWithError(E: std::move(E), Whence: Filename);
3291
3292	auto Reader = std::move(ReaderOrErr.get());
3293	for (auto &I : *Reader) {
3294	// Read all entries
3295	(void)I;
3296	}
3297	ArrayRef Traces = Reader ->getTemporalProfTraces();
3298	if (NumTestTraces && NumTestTraces >= Traces.size())
3299	exitWithError(
3300	Message: "--" + NumTestTraces.ArgStr +
3301	" must be smaller than the total number of traces: expected: < " +
3302	Twine (Traces.size()) + ", actual: " + Twine (NumTestTraces));
3303	ArrayRef TestTraces = Traces.take_back(N: NumTestTraces);
3304	Traces = Traces.drop_back(N: NumTestTraces);
3305
3306	std::vector<BPFunctionNode> Nodes;
3307	TemporalProfTraceTy::createBPFunctionNodes(Traces, Nodes);
3308	BalancedPartitioningConfig Config;
3309	BalancedPartitioning BP(Config);
3310	BP.run(Nodes);
3311
3312	OS << "# Ordered " << Nodes.size() << " functions\n";
3313	if (!TestTraces.empty()) {
3314	// Since we don't know the symbol sizes, we assume 32 functions per page.
3315	DenseMap<BPFunctionNode::IDT, unsigned> IdToPageNumber;
3316	for (auto &Node : Nodes)
3317	IdToPageNumber [Node.Id] = IdToPageNumber.size() / `32`;
3318
3319	SmallSet<unsigned, `0`> TouchedPages;
3320	unsigned Area = `0`;
3321	for (auto &Trace : TestTraces) {
3322	for (auto Id : Trace.FunctionNameRefs) {
3323	auto It = IdToPageNumber.find(Val: Id);
3324	if (It == IdToPageNumber.end())
3325	continue;
3326	TouchedPages.insert(V: It ->getSecond());
3327	Area += TouchedPages.size();
3328	}
3329	TouchedPages.clear();
3330	}
3331	OS << "# Total area under the page fault curve: " << (float)Area << "\n";
3332	}
3333	OS << "# Warning: Mach-O may prefix symbols with \"_\" depending on the "
3334	"linkage and this output does not take that into account. Some "
3335	"post-processing may be required before passing to the linker via "
3336	"-order_file.\n";
3337	for (auto &N : Nodes) {
3338	auto [Filename, ParsedFuncName] =
3339	getParsedIRPGOName(IRPGOName: Reader ->getSymtab().getFuncOrVarName(MD5Hash: N.Id));
3340	if (!Filename.empty())
3341	OS << "# " << Filename << "\n";
3342	OS << ParsedFuncName << "\n";
3343	}
3344	return `0`;
3345	}
3346
3347	int llvm_profdata_main(int argc, char **argvNonConst,
3348	const llvm::ToolContext &) {
3349	const char argv = const_cast*<const* char **>(argvNonConst);
3350
3351	StringRef ProgName(sys::path::filename(path: argv[`0`]));
3352
3353	if (argc < `2`) {
3354	errs() << ProgName
3355	<< ": No subcommand specified! Run llvm-profata --help for usage.\n";
3356	return `1`;
3357	}
3358
3359	cl::ParseCommandLineOptions(argc, argv, Overview: "LLVM profile data\n");
3360
3361	if (ShowSubcommand)
3362	return show_main(ProgName);
3363
3364	if (OrderSubcommand)
3365	return order_main();
3366
3367	if (OverlapSubcommand)
3368	return overlap_main();
3369
3370	if (MergeSubcommand)
3371	return merge_main(ProgName);
3372
3373	errs() << ProgName
3374	<< ": Unknown command. Run llvm-profdata --help for usage.\n";
3375	return `1`;
3376	}
3377

Browse the source code of llvm_projects/llvm/tools/llvm-profdata/llvm-profdata.cpp