1//=-- ProfilesummaryBuilder.cpp - Profile summary computation ---------------=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains support for computing profile summary data.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/IR/ProfileSummary.h"
14#include "llvm/ProfileData/InstrProf.h"
15#include "llvm/ProfileData/ProfileCommon.h"
16#include "llvm/ProfileData/SampleProf.h"
17#include "llvm/Support/CommandLine.h"
18
19using namespace llvm;
20
21namespace llvm {
22cl::opt<bool> UseContextLessSummary(
23 "profile-summary-contextless", cl::Hidden,
24 cl::desc("Merge context profiles before calculating thresholds."));
25
26// The following two parameters determine the threshold for a count to be
27// considered hot/cold. These two parameters are percentile values (multiplied
28// by 10000). If the counts are sorted in descending order, the minimum count to
29// reach ProfileSummaryCutoffHot gives the threshold to determine a hot count.
30// Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the
31// threshold for determining cold count (everything <= this threshold is
32// considered cold).
33cl::opt<int> ProfileSummaryCutoffHot(
34 "profile-summary-cutoff-hot", cl::Hidden, cl::init(Val: 990000),
35 cl::desc("A count is hot if it exceeds the minimum count to"
36 " reach this percentile of total counts."));
37
38cl::opt<int> ProfileSummaryCutoffCold(
39 "profile-summary-cutoff-cold", cl::Hidden, cl::init(Val: 999999),
40 cl::desc("A count is cold if it is below the minimum count"
41 " to reach this percentile of total counts."));
42
43cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
44 "profile-summary-huge-working-set-size-threshold", cl::Hidden,
45 cl::init(Val: 15000),
46 cl::desc("The code working set size is considered huge if the number of"
47 " blocks required to reach the -profile-summary-cutoff-hot"
48 " percentile exceeds this count."));
49
50cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold(
51 "profile-summary-large-working-set-size-threshold", cl::Hidden,
52 cl::init(Val: 12500),
53 cl::desc("The code working set size is considered large if the number of"
54 " blocks required to reach the -profile-summary-cutoff-hot"
55 " percentile exceeds this count."));
56
57// The next two options override the counts derived from summary computation and
58// are useful for debugging purposes.
59cl::opt<uint64_t> ProfileSummaryHotCount(
60 "profile-summary-hot-count", cl::ReallyHidden,
61 cl::desc("A fixed hot count that overrides the count derived from"
62 " profile-summary-cutoff-hot"));
63
64cl::opt<uint64_t> ProfileSummaryColdCount(
65 "profile-summary-cold-count", cl::ReallyHidden,
66 cl::desc("A fixed cold count that overrides the count derived from"
67 " profile-summary-cutoff-cold"));
68} // namespace llvm
69
70// A set of cutoff values. Each value, when divided by ProfileSummary::Scale
71// (which is 1000000) is a desired percentile of total counts.
72static const uint32_t DefaultCutoffsData[] = {
73 10000, /* 1% */
74 100000, /* 10% */
75 200000, 300000, 400000, 500000, 600000, 700000, 800000,
76 900000, 950000, 990000, 999000, 999900, 999990, 999999};
77const ArrayRef<uint32_t> ProfileSummaryBuilder::DefaultCutoffs =
78 DefaultCutoffsData;
79
80const ProfileSummaryEntry &
81ProfileSummaryBuilder::getEntryForPercentile(const SummaryEntryVector &DS,
82 uint64_t Percentile) {
83 auto It = partition_point(Range: DS, P: [=](const ProfileSummaryEntry &Entry) {
84 return Entry.Cutoff < Percentile;
85 });
86 // The required percentile has to be <= one of the percentiles in the
87 // detailed summary.
88 if (It == DS.end())
89 report_fatal_error(reason: "Desired percentile exceeds the maximum cutoff");
90 return *It;
91}
92
93void InstrProfSummaryBuilder::addRecord(const InstrProfRecord &R) {
94 // The first counter is not necessarily an entry count for IR
95 // instrumentation profiles.
96 // Eventually MaxFunctionCount will become obsolete and this can be
97 // removed.
98
99 if (R.getCountPseudoKind() != InstrProfRecord::NotPseudo)
100 return;
101
102 addEntryCount(Count: R.Counts[0]);
103 for (size_t I = 1, E = R.Counts.size(); I < E; ++I)
104 addInternalCount(Count: R.Counts[I]);
105}
106
107// To compute the detailed summary, we consider each line containing samples as
108// equivalent to a block with a count in the instrumented profile.
109void SampleProfileSummaryBuilder::addRecord(
110 const sampleprof::FunctionSamples &FS, bool isCallsiteSample) {
111 if (!isCallsiteSample) {
112 NumFunctions++;
113 if (FS.getHeadSamples() > MaxFunctionCount)
114 MaxFunctionCount = FS.getHeadSamples();
115 } else if (FS.getContext().hasAttribute(
116 A: sampleprof::ContextDuplicatedIntoBase)) {
117 // Do not recount callee samples if they are already merged into their base
118 // profiles. This can happen to CS nested profile.
119 return;
120 }
121
122 for (const auto &I : FS.getBodySamples()) {
123 uint64_t Count = I.second.getSamples();
124 addCount(Count);
125 }
126 for (const auto &I : FS.getCallsiteSamples())
127 for (const auto &CS : I.second)
128 addRecord(FS: CS.second, isCallsiteSample: true);
129}
130
131// The argument to this method is a vector of cutoff percentages and the return
132// value is a vector of (Cutoff, MinCount, NumCounts) triplets.
133void ProfileSummaryBuilder::computeDetailedSummary() {
134 if (DetailedSummaryCutoffs.empty())
135 return;
136 llvm::sort(C&: DetailedSummaryCutoffs);
137 auto Iter = CountFrequencies.begin();
138 const auto End = CountFrequencies.end();
139
140 uint32_t CountsSeen = 0;
141 uint64_t CurrSum = 0, Count = 0;
142
143 for (const uint32_t Cutoff : DetailedSummaryCutoffs) {
144 assert(Cutoff <= 999999);
145 APInt Temp(128, TotalCount);
146 APInt N(128, Cutoff);
147 APInt D(128, ProfileSummary::Scale);
148 Temp *= N;
149 Temp = Temp.sdiv(RHS: D);
150 uint64_t DesiredCount = Temp.getZExtValue();
151 assert(DesiredCount <= TotalCount);
152 while (CurrSum < DesiredCount && Iter != End) {
153 Count = Iter->first;
154 uint32_t Freq = Iter->second;
155 CurrSum += (Count * Freq);
156 CountsSeen += Freq;
157 Iter++;
158 }
159 assert(CurrSum >= DesiredCount);
160 ProfileSummaryEntry PSE = {Cutoff, Count, CountsSeen};
161 DetailedSummary.push_back(x: PSE);
162 }
163}
164
165uint64_t
166ProfileSummaryBuilder::getHotCountThreshold(const SummaryEntryVector &DS) {
167 auto &HotEntry =
168 ProfileSummaryBuilder::getEntryForPercentile(DS, Percentile: ProfileSummaryCutoffHot);
169 uint64_t HotCountThreshold = HotEntry.MinCount;
170 if (ProfileSummaryHotCount.getNumOccurrences() > 0)
171 HotCountThreshold = ProfileSummaryHotCount;
172 return HotCountThreshold;
173}
174
175uint64_t
176ProfileSummaryBuilder::getColdCountThreshold(const SummaryEntryVector &DS) {
177 auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile(
178 DS, Percentile: ProfileSummaryCutoffCold);
179 uint64_t ColdCountThreshold = ColdEntry.MinCount;
180 if (ProfileSummaryColdCount.getNumOccurrences() > 0)
181 ColdCountThreshold = ProfileSummaryColdCount;
182 return ColdCountThreshold;
183}
184
185std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() {
186 computeDetailedSummary();
187 return std::make_unique<ProfileSummary>(
188 args: ProfileSummary::PSK_Sample, args&: DetailedSummary, args&: TotalCount, args&: MaxCount, args: 0,
189 args&: MaxFunctionCount, args&: NumCounts, args&: NumFunctions);
190}
191
192std::unique_ptr<ProfileSummary>
193SampleProfileSummaryBuilder::computeSummaryForProfiles(
194 const SampleProfileMap &Profiles) {
195 assert(NumFunctions == 0 &&
196 "This can only be called on an empty summary builder");
197 sampleprof::SampleProfileMap ContextLessProfiles;
198 const sampleprof::SampleProfileMap *ProfilesToUse = &Profiles;
199 // For CSSPGO, context-sensitive profile effectively split a function profile
200 // into many copies each representing the CFG profile of a particular calling
201 // context. That makes the count distribution looks more flat as we now have
202 // more function profiles each with lower counts, which in turn leads to lower
203 // hot thresholds. To compensate for that, by default we merge context
204 // profiles before computing profile summary.
205 if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS &&
206 !UseContextLessSummary.getNumOccurrences())) {
207 ProfileConverter::flattenProfile(InputProfiles: Profiles, OutputProfiles&: ContextLessProfiles, ProfileIsCS: true);
208 ProfilesToUse = &ContextLessProfiles;
209 }
210
211 for (const auto &I : *ProfilesToUse) {
212 const sampleprof::FunctionSamples &Profile = I.second;
213 addRecord(FS: Profile);
214 }
215
216 return getSummary();
217}
218
219std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() {
220 computeDetailedSummary();
221 return std::make_unique<ProfileSummary>(
222 args: ProfileSummary::PSK_Instr, args&: DetailedSummary, args&: TotalCount, args&: MaxCount,
223 args&: MaxInternalBlockCount, args&: MaxFunctionCount, args&: NumCounts, args&: NumFunctions);
224}
225
226void InstrProfSummaryBuilder::addEntryCount(uint64_t Count) {
227 assert(Count <= getInstrMaxCountValue() &&
228 "Count value should be less than the max count value.");
229 NumFunctions++;
230 addCount(Count);
231 if (Count > MaxFunctionCount)
232 MaxFunctionCount = Count;
233}
234
235void InstrProfSummaryBuilder::addInternalCount(uint64_t Count) {
236 assert(Count <= getInstrMaxCountValue() &&
237 "Count value should be less than the max count value.");
238 addCount(Count);
239 if (Count > MaxInternalBlockCount)
240 MaxInternalBlockCount = Count;
241}
242