1 | //=-- ProfilesummaryBuilder.cpp - Profile summary computation ---------------=// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains support for computing profile summary data. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/IR/ProfileSummary.h" |
14 | #include "llvm/ProfileData/InstrProf.h" |
15 | #include "llvm/ProfileData/ProfileCommon.h" |
16 | #include "llvm/ProfileData/SampleProf.h" |
17 | #include "llvm/Support/CommandLine.h" |
18 | |
19 | using namespace llvm; |
20 | |
21 | namespace llvm { |
22 | cl::opt<bool> UseContextLessSummary( |
23 | "profile-summary-contextless" , cl::Hidden, |
24 | cl::desc("Merge context profiles before calculating thresholds." )); |
25 | |
26 | // The following two parameters determine the threshold for a count to be |
27 | // considered hot/cold. These two parameters are percentile values (multiplied |
28 | // by 10000). If the counts are sorted in descending order, the minimum count to |
29 | // reach ProfileSummaryCutoffHot gives the threshold to determine a hot count. |
30 | // Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the |
31 | // threshold for determining cold count (everything <= this threshold is |
32 | // considered cold). |
33 | cl::opt<int> ProfileSummaryCutoffHot( |
34 | "profile-summary-cutoff-hot" , cl::Hidden, cl::init(Val: 990000), |
35 | cl::desc("A count is hot if it exceeds the minimum count to" |
36 | " reach this percentile of total counts." )); |
37 | |
38 | cl::opt<int> ProfileSummaryCutoffCold( |
39 | "profile-summary-cutoff-cold" , cl::Hidden, cl::init(Val: 999999), |
40 | cl::desc("A count is cold if it is below the minimum count" |
41 | " to reach this percentile of total counts." )); |
42 | |
43 | cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold( |
44 | "profile-summary-huge-working-set-size-threshold" , cl::Hidden, |
45 | cl::init(Val: 15000), |
46 | cl::desc("The code working set size is considered huge if the number of" |
47 | " blocks required to reach the -profile-summary-cutoff-hot" |
48 | " percentile exceeds this count." )); |
49 | |
50 | cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold( |
51 | "profile-summary-large-working-set-size-threshold" , cl::Hidden, |
52 | cl::init(Val: 12500), |
53 | cl::desc("The code working set size is considered large if the number of" |
54 | " blocks required to reach the -profile-summary-cutoff-hot" |
55 | " percentile exceeds this count." )); |
56 | |
57 | // The next two options override the counts derived from summary computation and |
58 | // are useful for debugging purposes. |
59 | cl::opt<uint64_t> ProfileSummaryHotCount( |
60 | "profile-summary-hot-count" , cl::ReallyHidden, |
61 | cl::desc("A fixed hot count that overrides the count derived from" |
62 | " profile-summary-cutoff-hot" )); |
63 | |
64 | cl::opt<uint64_t> ProfileSummaryColdCount( |
65 | "profile-summary-cold-count" , cl::ReallyHidden, |
66 | cl::desc("A fixed cold count that overrides the count derived from" |
67 | " profile-summary-cutoff-cold" )); |
68 | } // namespace llvm |
69 | |
70 | // A set of cutoff values. Each value, when divided by ProfileSummary::Scale |
71 | // (which is 1000000) is a desired percentile of total counts. |
72 | static const uint32_t DefaultCutoffsData[] = { |
73 | 10000, /* 1% */ |
74 | 100000, /* 10% */ |
75 | 200000, 300000, 400000, 500000, 600000, 700000, 800000, |
76 | 900000, 950000, 990000, 999000, 999900, 999990, 999999}; |
77 | const ArrayRef<uint32_t> ProfileSummaryBuilder::DefaultCutoffs = |
78 | DefaultCutoffsData; |
79 | |
80 | // An entry for the 0th percentile to correctly calculate hot/cold count |
81 | // thresholds when -profile-summary-cutoff-hot/cold is 0. If the hot cutoff is |
82 | // 0, no sample counts are treated as hot. If the cold cutoff is 0, all sample |
83 | // counts are treated as cold. Assumes there is no UINT64_MAX sample counts. |
84 | static const ProfileSummaryEntry ZeroCutoffEntry = {0, UINT64_MAX, 0}; |
85 | |
86 | const ProfileSummaryEntry & |
87 | ProfileSummaryBuilder::getEntryForPercentile(const SummaryEntryVector &DS, |
88 | uint64_t Percentile) { |
89 | if (Percentile == 0) |
90 | return ZeroCutoffEntry; |
91 | |
92 | auto It = partition_point(Range: DS, P: [=](const ProfileSummaryEntry &Entry) { |
93 | return Entry.Cutoff < Percentile; |
94 | }); |
95 | // The required percentile has to be <= one of the percentiles in the |
96 | // detailed summary. |
97 | if (It == DS.end()) |
98 | report_fatal_error(reason: "Desired percentile exceeds the maximum cutoff" ); |
99 | return *It; |
100 | } |
101 | |
102 | void InstrProfSummaryBuilder::addRecord(const InstrProfRecord &R) { |
103 | // The first counter is not necessarily an entry count for IR |
104 | // instrumentation profiles. |
105 | // Eventually MaxFunctionCount will become obsolete and this can be |
106 | // removed. |
107 | |
108 | if (R.getCountPseudoKind() != InstrProfRecord::NotPseudo) |
109 | return; |
110 | |
111 | addEntryCount(Count: R.Counts[0]); |
112 | for (size_t I = 1, E = R.Counts.size(); I < E; ++I) |
113 | addInternalCount(Count: R.Counts[I]); |
114 | } |
115 | |
116 | // To compute the detailed summary, we consider each line containing samples as |
117 | // equivalent to a block with a count in the instrumented profile. |
118 | void SampleProfileSummaryBuilder::addRecord( |
119 | const sampleprof::FunctionSamples &FS, bool isCallsiteSample) { |
120 | if (!isCallsiteSample) { |
121 | NumFunctions++; |
122 | if (FS.getHeadSamples() > MaxFunctionCount) |
123 | MaxFunctionCount = FS.getHeadSamples(); |
124 | } else if (FS.getContext().hasAttribute( |
125 | A: sampleprof::ContextDuplicatedIntoBase)) { |
126 | // Do not recount callee samples if they are already merged into their base |
127 | // profiles. This can happen to CS nested profile. |
128 | return; |
129 | } |
130 | |
131 | for (const auto &I : FS.getBodySamples()) { |
132 | uint64_t Count = I.second.getSamples(); |
133 | addCount(Count); |
134 | } |
135 | for (const auto &I : FS.getCallsiteSamples()) |
136 | for (const auto &CS : I.second) |
137 | addRecord(FS: CS.second, isCallsiteSample: true); |
138 | } |
139 | |
140 | // The argument to this method is a vector of cutoff percentages and the return |
141 | // value is a vector of (Cutoff, MinCount, NumCounts) triplets. |
142 | void ProfileSummaryBuilder::computeDetailedSummary() { |
143 | if (DetailedSummaryCutoffs.empty()) |
144 | return; |
145 | llvm::sort(C&: DetailedSummaryCutoffs); |
146 | auto Iter = CountFrequencies.begin(); |
147 | const auto End = CountFrequencies.end(); |
148 | |
149 | uint32_t CountsSeen = 0; |
150 | uint64_t CurrSum = 0, Count = 0; |
151 | |
152 | for (const uint32_t Cutoff : DetailedSummaryCutoffs) { |
153 | assert(Cutoff <= 999999); |
154 | APInt Temp(128, TotalCount); |
155 | APInt N(128, Cutoff); |
156 | APInt D(128, ProfileSummary::Scale); |
157 | Temp *= N; |
158 | Temp = Temp.sdiv(RHS: D); |
159 | uint64_t DesiredCount = Temp.getZExtValue(); |
160 | assert(DesiredCount <= TotalCount); |
161 | while (CurrSum < DesiredCount && Iter != End) { |
162 | Count = Iter->first; |
163 | uint32_t Freq = Iter->second; |
164 | CurrSum += (Count * Freq); |
165 | CountsSeen += Freq; |
166 | Iter++; |
167 | } |
168 | assert(CurrSum >= DesiredCount); |
169 | ProfileSummaryEntry PSE = {Cutoff, Count, CountsSeen}; |
170 | DetailedSummary.push_back(x: PSE); |
171 | } |
172 | } |
173 | |
174 | uint64_t |
175 | ProfileSummaryBuilder::getHotCountThreshold(const SummaryEntryVector &DS) { |
176 | auto &HotEntry = |
177 | ProfileSummaryBuilder::getEntryForPercentile(DS, Percentile: ProfileSummaryCutoffHot); |
178 | uint64_t HotCountThreshold = HotEntry.MinCount; |
179 | if (ProfileSummaryHotCount.getNumOccurrences() > 0) |
180 | HotCountThreshold = ProfileSummaryHotCount; |
181 | return HotCountThreshold; |
182 | } |
183 | |
184 | uint64_t |
185 | ProfileSummaryBuilder::getColdCountThreshold(const SummaryEntryVector &DS) { |
186 | auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile( |
187 | DS, Percentile: ProfileSummaryCutoffCold); |
188 | uint64_t ColdCountThreshold = ColdEntry.MinCount; |
189 | if (ProfileSummaryColdCount.getNumOccurrences() > 0) |
190 | ColdCountThreshold = ProfileSummaryColdCount; |
191 | return ColdCountThreshold; |
192 | } |
193 | |
194 | std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() { |
195 | computeDetailedSummary(); |
196 | return std::make_unique<ProfileSummary>( |
197 | args: ProfileSummary::PSK_Sample, args&: DetailedSummary, args&: TotalCount, args&: MaxCount, args: 0, |
198 | args&: MaxFunctionCount, args&: NumCounts, args&: NumFunctions); |
199 | } |
200 | |
201 | std::unique_ptr<ProfileSummary> |
202 | SampleProfileSummaryBuilder::computeSummaryForProfiles( |
203 | const SampleProfileMap &Profiles) { |
204 | assert(NumFunctions == 0 && |
205 | "This can only be called on an empty summary builder" ); |
206 | sampleprof::SampleProfileMap ContextLessProfiles; |
207 | const sampleprof::SampleProfileMap *ProfilesToUse = &Profiles; |
208 | // For CSSPGO, context-sensitive profile effectively split a function profile |
209 | // into many copies each representing the CFG profile of a particular calling |
210 | // context. That makes the count distribution looks more flat as we now have |
211 | // more function profiles each with lower counts, which in turn leads to lower |
212 | // hot thresholds. To compensate for that, by default we merge context |
213 | // profiles before computing profile summary. |
214 | if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS && |
215 | !UseContextLessSummary.getNumOccurrences())) { |
216 | ProfileConverter::flattenProfile(InputProfiles: Profiles, OutputProfiles&: ContextLessProfiles, ProfileIsCS: true); |
217 | ProfilesToUse = &ContextLessProfiles; |
218 | } |
219 | |
220 | for (const auto &I : *ProfilesToUse) { |
221 | const sampleprof::FunctionSamples &Profile = I.second; |
222 | addRecord(FS: Profile); |
223 | } |
224 | |
225 | return getSummary(); |
226 | } |
227 | |
228 | std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() { |
229 | computeDetailedSummary(); |
230 | return std::make_unique<ProfileSummary>( |
231 | args: ProfileSummary::PSK_Instr, args&: DetailedSummary, args&: TotalCount, args&: MaxCount, |
232 | args&: MaxInternalBlockCount, args&: MaxFunctionCount, args&: NumCounts, args&: NumFunctions); |
233 | } |
234 | |
235 | void InstrProfSummaryBuilder::addEntryCount(uint64_t Count) { |
236 | assert(Count <= getInstrMaxCountValue() && |
237 | "Count value should be less than the max count value." ); |
238 | NumFunctions++; |
239 | addCount(Count); |
240 | if (Count > MaxFunctionCount) |
241 | MaxFunctionCount = Count; |
242 | } |
243 | |
244 | void InstrProfSummaryBuilder::addInternalCount(uint64_t Count) { |
245 | assert(Count <= getInstrMaxCountValue() && |
246 | "Count value should be less than the max count value." ); |
247 | addCount(Count); |
248 | if (Count > MaxInternalBlockCount) |
249 | MaxInternalBlockCount = Count; |
250 | } |
251 | |