1 | //=-- ProfilesummaryBuilder.cpp - Profile summary computation ---------------=// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains support for computing profile summary data. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/IR/ProfileSummary.h" |
14 | #include "llvm/ProfileData/InstrProf.h" |
15 | #include "llvm/ProfileData/ProfileCommon.h" |
16 | #include "llvm/ProfileData/SampleProf.h" |
17 | #include "llvm/Support/CommandLine.h" |
18 | |
19 | using namespace llvm; |
20 | |
21 | namespace llvm { |
22 | cl::opt<bool> UseContextLessSummary( |
23 | "profile-summary-contextless" , cl::Hidden, |
24 | cl::desc("Merge context profiles before calculating thresholds." )); |
25 | |
26 | // The following two parameters determine the threshold for a count to be |
27 | // considered hot/cold. These two parameters are percentile values (multiplied |
28 | // by 10000). If the counts are sorted in descending order, the minimum count to |
29 | // reach ProfileSummaryCutoffHot gives the threshold to determine a hot count. |
30 | // Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the |
31 | // threshold for determining cold count (everything <= this threshold is |
32 | // considered cold). |
33 | cl::opt<int> ProfileSummaryCutoffHot( |
34 | "profile-summary-cutoff-hot" , cl::Hidden, cl::init(Val: 990000), |
35 | cl::desc("A count is hot if it exceeds the minimum count to" |
36 | " reach this percentile of total counts." )); |
37 | |
38 | cl::opt<int> ProfileSummaryCutoffCold( |
39 | "profile-summary-cutoff-cold" , cl::Hidden, cl::init(Val: 999999), |
40 | cl::desc("A count is cold if it is below the minimum count" |
41 | " to reach this percentile of total counts." )); |
42 | |
43 | cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold( |
44 | "profile-summary-huge-working-set-size-threshold" , cl::Hidden, |
45 | cl::init(Val: 15000), |
46 | cl::desc("The code working set size is considered huge if the number of" |
47 | " blocks required to reach the -profile-summary-cutoff-hot" |
48 | " percentile exceeds this count." )); |
49 | |
50 | cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold( |
51 | "profile-summary-large-working-set-size-threshold" , cl::Hidden, |
52 | cl::init(Val: 12500), |
53 | cl::desc("The code working set size is considered large if the number of" |
54 | " blocks required to reach the -profile-summary-cutoff-hot" |
55 | " percentile exceeds this count." )); |
56 | |
57 | // The next two options override the counts derived from summary computation and |
58 | // are useful for debugging purposes. |
59 | cl::opt<uint64_t> ProfileSummaryHotCount( |
60 | "profile-summary-hot-count" , cl::ReallyHidden, |
61 | cl::desc("A fixed hot count that overrides the count derived from" |
62 | " profile-summary-cutoff-hot" )); |
63 | |
64 | cl::opt<uint64_t> ProfileSummaryColdCount( |
65 | "profile-summary-cold-count" , cl::ReallyHidden, |
66 | cl::desc("A fixed cold count that overrides the count derived from" |
67 | " profile-summary-cutoff-cold" )); |
68 | } // namespace llvm |
69 | |
70 | // A set of cutoff values. Each value, when divided by ProfileSummary::Scale |
71 | // (which is 1000000) is a desired percentile of total counts. |
72 | static const uint32_t DefaultCutoffsData[] = { |
73 | 10000, /* 1% */ |
74 | 100000, /* 10% */ |
75 | 200000, 300000, 400000, 500000, 600000, 700000, 800000, |
76 | 900000, 950000, 990000, 999000, 999900, 999990, 999999}; |
77 | const ArrayRef<uint32_t> ProfileSummaryBuilder::DefaultCutoffs = |
78 | DefaultCutoffsData; |
79 | |
80 | const ProfileSummaryEntry & |
81 | ProfileSummaryBuilder::getEntryForPercentile(const SummaryEntryVector &DS, |
82 | uint64_t Percentile) { |
83 | auto It = partition_point(Range: DS, P: [=](const ProfileSummaryEntry &Entry) { |
84 | return Entry.Cutoff < Percentile; |
85 | }); |
86 | // The required percentile has to be <= one of the percentiles in the |
87 | // detailed summary. |
88 | if (It == DS.end()) |
89 | report_fatal_error(reason: "Desired percentile exceeds the maximum cutoff" ); |
90 | return *It; |
91 | } |
92 | |
93 | void InstrProfSummaryBuilder::addRecord(const InstrProfRecord &R) { |
94 | // The first counter is not necessarily an entry count for IR |
95 | // instrumentation profiles. |
96 | // Eventually MaxFunctionCount will become obsolete and this can be |
97 | // removed. |
98 | |
99 | if (R.getCountPseudoKind() != InstrProfRecord::NotPseudo) |
100 | return; |
101 | |
102 | addEntryCount(Count: R.Counts[0]); |
103 | for (size_t I = 1, E = R.Counts.size(); I < E; ++I) |
104 | addInternalCount(Count: R.Counts[I]); |
105 | } |
106 | |
107 | // To compute the detailed summary, we consider each line containing samples as |
108 | // equivalent to a block with a count in the instrumented profile. |
109 | void SampleProfileSummaryBuilder::addRecord( |
110 | const sampleprof::FunctionSamples &FS, bool isCallsiteSample) { |
111 | if (!isCallsiteSample) { |
112 | NumFunctions++; |
113 | if (FS.getHeadSamples() > MaxFunctionCount) |
114 | MaxFunctionCount = FS.getHeadSamples(); |
115 | } else if (FS.getContext().hasAttribute( |
116 | A: sampleprof::ContextDuplicatedIntoBase)) { |
117 | // Do not recount callee samples if they are already merged into their base |
118 | // profiles. This can happen to CS nested profile. |
119 | return; |
120 | } |
121 | |
122 | for (const auto &I : FS.getBodySamples()) { |
123 | uint64_t Count = I.second.getSamples(); |
124 | addCount(Count); |
125 | } |
126 | for (const auto &I : FS.getCallsiteSamples()) |
127 | for (const auto &CS : I.second) |
128 | addRecord(FS: CS.second, isCallsiteSample: true); |
129 | } |
130 | |
131 | // The argument to this method is a vector of cutoff percentages and the return |
132 | // value is a vector of (Cutoff, MinCount, NumCounts) triplets. |
133 | void ProfileSummaryBuilder::computeDetailedSummary() { |
134 | if (DetailedSummaryCutoffs.empty()) |
135 | return; |
136 | llvm::sort(C&: DetailedSummaryCutoffs); |
137 | auto Iter = CountFrequencies.begin(); |
138 | const auto End = CountFrequencies.end(); |
139 | |
140 | uint32_t CountsSeen = 0; |
141 | uint64_t CurrSum = 0, Count = 0; |
142 | |
143 | for (const uint32_t Cutoff : DetailedSummaryCutoffs) { |
144 | assert(Cutoff <= 999999); |
145 | APInt Temp(128, TotalCount); |
146 | APInt N(128, Cutoff); |
147 | APInt D(128, ProfileSummary::Scale); |
148 | Temp *= N; |
149 | Temp = Temp.sdiv(RHS: D); |
150 | uint64_t DesiredCount = Temp.getZExtValue(); |
151 | assert(DesiredCount <= TotalCount); |
152 | while (CurrSum < DesiredCount && Iter != End) { |
153 | Count = Iter->first; |
154 | uint32_t Freq = Iter->second; |
155 | CurrSum += (Count * Freq); |
156 | CountsSeen += Freq; |
157 | Iter++; |
158 | } |
159 | assert(CurrSum >= DesiredCount); |
160 | ProfileSummaryEntry PSE = {Cutoff, Count, CountsSeen}; |
161 | DetailedSummary.push_back(x: PSE); |
162 | } |
163 | } |
164 | |
165 | uint64_t |
166 | ProfileSummaryBuilder::getHotCountThreshold(const SummaryEntryVector &DS) { |
167 | auto &HotEntry = |
168 | ProfileSummaryBuilder::getEntryForPercentile(DS, Percentile: ProfileSummaryCutoffHot); |
169 | uint64_t HotCountThreshold = HotEntry.MinCount; |
170 | if (ProfileSummaryHotCount.getNumOccurrences() > 0) |
171 | HotCountThreshold = ProfileSummaryHotCount; |
172 | return HotCountThreshold; |
173 | } |
174 | |
175 | uint64_t |
176 | ProfileSummaryBuilder::getColdCountThreshold(const SummaryEntryVector &DS) { |
177 | auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile( |
178 | DS, Percentile: ProfileSummaryCutoffCold); |
179 | uint64_t ColdCountThreshold = ColdEntry.MinCount; |
180 | if (ProfileSummaryColdCount.getNumOccurrences() > 0) |
181 | ColdCountThreshold = ProfileSummaryColdCount; |
182 | return ColdCountThreshold; |
183 | } |
184 | |
185 | std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() { |
186 | computeDetailedSummary(); |
187 | return std::make_unique<ProfileSummary>( |
188 | args: ProfileSummary::PSK_Sample, args&: DetailedSummary, args&: TotalCount, args&: MaxCount, args: 0, |
189 | args&: MaxFunctionCount, args&: NumCounts, args&: NumFunctions); |
190 | } |
191 | |
192 | std::unique_ptr<ProfileSummary> |
193 | SampleProfileSummaryBuilder::computeSummaryForProfiles( |
194 | const SampleProfileMap &Profiles) { |
195 | assert(NumFunctions == 0 && |
196 | "This can only be called on an empty summary builder" ); |
197 | sampleprof::SampleProfileMap ContextLessProfiles; |
198 | const sampleprof::SampleProfileMap *ProfilesToUse = &Profiles; |
199 | // For CSSPGO, context-sensitive profile effectively split a function profile |
200 | // into many copies each representing the CFG profile of a particular calling |
201 | // context. That makes the count distribution looks more flat as we now have |
202 | // more function profiles each with lower counts, which in turn leads to lower |
203 | // hot thresholds. To compensate for that, by default we merge context |
204 | // profiles before computing profile summary. |
205 | if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS && |
206 | !UseContextLessSummary.getNumOccurrences())) { |
207 | ProfileConverter::flattenProfile(InputProfiles: Profiles, OutputProfiles&: ContextLessProfiles, ProfileIsCS: true); |
208 | ProfilesToUse = &ContextLessProfiles; |
209 | } |
210 | |
211 | for (const auto &I : *ProfilesToUse) { |
212 | const sampleprof::FunctionSamples &Profile = I.second; |
213 | addRecord(FS: Profile); |
214 | } |
215 | |
216 | return getSummary(); |
217 | } |
218 | |
219 | std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() { |
220 | computeDetailedSummary(); |
221 | return std::make_unique<ProfileSummary>( |
222 | args: ProfileSummary::PSK_Instr, args&: DetailedSummary, args&: TotalCount, args&: MaxCount, |
223 | args&: MaxInternalBlockCount, args&: MaxFunctionCount, args&: NumCounts, args&: NumFunctions); |
224 | } |
225 | |
226 | void InstrProfSummaryBuilder::addEntryCount(uint64_t Count) { |
227 | assert(Count <= getInstrMaxCountValue() && |
228 | "Count value should be less than the max count value." ); |
229 | NumFunctions++; |
230 | addCount(Count); |
231 | if (Count > MaxFunctionCount) |
232 | MaxFunctionCount = Count; |
233 | } |
234 | |
235 | void InstrProfSummaryBuilder::addInternalCount(uint64_t Count) { |
236 | assert(Count <= getInstrMaxCountValue() && |
237 | "Count value should be less than the max count value." ); |
238 | addCount(Count); |
239 | if (Count > MaxInternalBlockCount) |
240 | MaxInternalBlockCount = Count; |
241 | } |
242 | |