1 | //===- ProfileSummaryInfo.cpp - Global profile summary information --------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains a pass that provides access to the global profile summary |
10 | // information. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
15 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
16 | #include "llvm/IR/BasicBlock.h" |
17 | #include "llvm/IR/Instructions.h" |
18 | #include "llvm/IR/Module.h" |
19 | #include "llvm/IR/ProfileSummary.h" |
20 | #include "llvm/InitializePasses.h" |
21 | #include "llvm/ProfileData/ProfileCommon.h" |
22 | #include "llvm/Support/CommandLine.h" |
23 | #include "llvm/Support/Compiler.h" |
24 | #include <optional> |
25 | using namespace llvm; |
26 | |
27 | static cl::opt<bool> PartialProfile( |
28 | "partial-profile" , cl::Hidden, cl::init(Val: false), |
29 | cl::desc("Specify the current profile is used as a partial profile." )); |
30 | |
31 | LLVM_ABI cl::opt<bool> ScalePartialSampleProfileWorkingSetSize( |
32 | "scale-partial-sample-profile-working-set-size" , cl::Hidden, cl::init(Val: true), |
33 | cl::desc( |
34 | "If true, scale the working set size of the partial sample profile " |
35 | "by the partial profile ratio to reflect the size of the program " |
36 | "being compiled." )); |
37 | |
38 | static cl::opt<double> PartialSampleProfileWorkingSetSizeScaleFactor( |
39 | "partial-sample-profile-working-set-size-scale-factor" , cl::Hidden, |
40 | cl::init(Val: 0.008), |
41 | cl::desc("The scale factor used to scale the working set size of the " |
42 | "partial sample profile along with the partial profile ratio. " |
43 | "This includes the factor of the profile counter per block " |
44 | "and the factor to scale the working set size to use the same " |
45 | "shared thresholds as PGO." )); |
46 | |
47 | // The profile summary metadata may be attached either by the frontend or by |
48 | // any backend passes (IR level instrumentation, for example). This method |
49 | // checks if the Summary is null and if so checks if the summary metadata is now |
50 | // available in the module and parses it to get the Summary object. |
51 | void ProfileSummaryInfo::refresh(std::unique_ptr<ProfileSummary> &&Other) { |
52 | if (Other) { |
53 | Summary.swap(u&: Other); |
54 | return; |
55 | } |
56 | if (hasProfileSummary()) |
57 | return; |
58 | // First try to get context sensitive ProfileSummary. |
59 | auto *SummaryMD = M->getProfileSummary(/* IsCS */ true); |
60 | if (SummaryMD) |
61 | Summary.reset(p: ProfileSummary::getFromMD(MD: SummaryMD)); |
62 | |
63 | if (!hasProfileSummary()) { |
64 | // This will actually return PSK_Instr or PSK_Sample summary. |
65 | SummaryMD = M->getProfileSummary(/* IsCS */ false); |
66 | if (SummaryMD) |
67 | Summary.reset(p: ProfileSummary::getFromMD(MD: SummaryMD)); |
68 | } |
69 | if (!hasProfileSummary()) |
70 | return; |
71 | computeThresholds(); |
72 | } |
73 | |
74 | std::optional<uint64_t> ProfileSummaryInfo::getProfileCount( |
75 | const CallBase &Call, BlockFrequencyInfo *BFI, bool AllowSynthetic) const { |
76 | assert((isa<CallInst>(Call) || isa<InvokeInst>(Call)) && |
77 | "We can only get profile count for call/invoke instruction." ); |
78 | if (hasSampleProfile()) { |
79 | // In sample PGO mode, check if there is a profile metadata on the |
80 | // instruction. If it is present, determine hotness solely based on that, |
81 | // since the sampled entry count may not be accurate. If there is no |
82 | // annotated on the instruction, return std::nullopt. |
83 | uint64_t TotalCount; |
84 | if (Call.extractProfTotalWeight(TotalVal&: TotalCount)) |
85 | return TotalCount; |
86 | return std::nullopt; |
87 | } |
88 | if (BFI) |
89 | return BFI->getBlockProfileCount(BB: Call.getParent(), AllowSynthetic); |
90 | return std::nullopt; |
91 | } |
92 | |
93 | bool ProfileSummaryInfo::isFunctionHotnessUnknown(const Function &F) const { |
94 | assert(hasPartialSampleProfile() && "Expect partial sample profile" ); |
95 | return !F.getEntryCount(); |
96 | } |
97 | |
98 | /// Returns true if the function's entry is a cold. If it returns false, it |
99 | /// either means it is not cold or it is unknown whether it is cold or not (for |
100 | /// example, no profile data is available). |
101 | bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) const { |
102 | if (!F) |
103 | return false; |
104 | if (F->hasFnAttribute(Kind: Attribute::Cold)) |
105 | return true; |
106 | if (!hasProfileSummary()) |
107 | return false; |
108 | auto FunctionCount = F->getEntryCount(); |
109 | // FIXME: The heuristic used below for determining coldness is based on |
110 | // preliminary SPEC tuning for inliner. This will eventually be a |
111 | // convenience method that calls isHotCount. |
112 | return FunctionCount && isColdCount(C: FunctionCount->getCount()); |
113 | } |
114 | |
115 | /// Compute the hot and cold thresholds. |
116 | void ProfileSummaryInfo::computeThresholds() { |
117 | auto &DetailedSummary = Summary->getDetailedSummary(); |
118 | auto &HotEntry = ProfileSummaryBuilder::getEntryForPercentile( |
119 | DS: DetailedSummary, Percentile: ProfileSummaryCutoffHot); |
120 | HotCountThreshold = |
121 | ProfileSummaryBuilder::getHotCountThreshold(DS: DetailedSummary); |
122 | ColdCountThreshold = |
123 | ProfileSummaryBuilder::getColdCountThreshold(DS: DetailedSummary); |
124 | assert(ColdCountThreshold <= HotCountThreshold && |
125 | "Cold count threshold cannot exceed hot count threshold!" ); |
126 | if (!hasPartialSampleProfile() || !ScalePartialSampleProfileWorkingSetSize) { |
127 | HasHugeWorkingSetSize = |
128 | HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold; |
129 | HasLargeWorkingSetSize = |
130 | HotEntry.NumCounts > ProfileSummaryLargeWorkingSetSizeThreshold; |
131 | } else { |
132 | // Scale the working set size of the partial sample profile to reflect the |
133 | // size of the program being compiled. |
134 | double PartialProfileRatio = Summary->getPartialProfileRatio(); |
135 | uint64_t ScaledHotEntryNumCounts = |
136 | static_cast<uint64_t>(HotEntry.NumCounts * PartialProfileRatio * |
137 | PartialSampleProfileWorkingSetSizeScaleFactor); |
138 | HasHugeWorkingSetSize = |
139 | ScaledHotEntryNumCounts > ProfileSummaryHugeWorkingSetSizeThreshold; |
140 | HasLargeWorkingSetSize = |
141 | ScaledHotEntryNumCounts > ProfileSummaryLargeWorkingSetSizeThreshold; |
142 | } |
143 | } |
144 | |
145 | std::optional<uint64_t> |
146 | ProfileSummaryInfo::computeThreshold(int PercentileCutoff) const { |
147 | if (!hasProfileSummary()) |
148 | return std::nullopt; |
149 | auto [Iter, Inserted] = ThresholdCache.try_emplace(Key: PercentileCutoff); |
150 | if (!Inserted) |
151 | return Iter->second; |
152 | auto &DetailedSummary = Summary->getDetailedSummary(); |
153 | auto &Entry = ProfileSummaryBuilder::getEntryForPercentile(DS: DetailedSummary, |
154 | Percentile: PercentileCutoff); |
155 | uint64_t CountThreshold = Entry.MinCount; |
156 | Iter->second = CountThreshold; |
157 | return CountThreshold; |
158 | } |
159 | |
160 | bool ProfileSummaryInfo::hasHugeWorkingSetSize() const { |
161 | return HasHugeWorkingSetSize && *HasHugeWorkingSetSize; |
162 | } |
163 | |
164 | bool ProfileSummaryInfo::hasLargeWorkingSetSize() const { |
165 | return HasLargeWorkingSetSize && *HasLargeWorkingSetSize; |
166 | } |
167 | |
168 | bool ProfileSummaryInfo::isHotCount(uint64_t C) const { |
169 | return HotCountThreshold && C >= *HotCountThreshold; |
170 | } |
171 | |
172 | bool ProfileSummaryInfo::isColdCount(uint64_t C) const { |
173 | return ColdCountThreshold && C <= *ColdCountThreshold; |
174 | } |
175 | |
176 | template <bool isHot> |
177 | bool ProfileSummaryInfo::isHotOrColdCountNthPercentile(int PercentileCutoff, |
178 | uint64_t C) const { |
179 | auto CountThreshold = computeThreshold(PercentileCutoff); |
180 | if (isHot) |
181 | return CountThreshold && C >= *CountThreshold; |
182 | else |
183 | return CountThreshold && C <= *CountThreshold; |
184 | } |
185 | |
186 | bool ProfileSummaryInfo::isHotCountNthPercentile(int PercentileCutoff, |
187 | uint64_t C) const { |
188 | return isHotOrColdCountNthPercentile<true>(PercentileCutoff, C); |
189 | } |
190 | |
191 | bool ProfileSummaryInfo::isColdCountNthPercentile(int PercentileCutoff, |
192 | uint64_t C) const { |
193 | return isHotOrColdCountNthPercentile<false>(PercentileCutoff, C); |
194 | } |
195 | |
196 | uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() const { |
197 | return HotCountThreshold.value_or(UINT64_MAX); |
198 | } |
199 | |
200 | uint64_t ProfileSummaryInfo::getOrCompColdCountThreshold() const { |
201 | return ColdCountThreshold.value_or(u: 0); |
202 | } |
203 | |
204 | bool ProfileSummaryInfo::isHotCallSite(const CallBase &CB, |
205 | BlockFrequencyInfo *BFI) const { |
206 | auto C = getProfileCount(Call: CB, BFI); |
207 | return C && isHotCount(C: *C); |
208 | } |
209 | |
210 | bool ProfileSummaryInfo::isColdCallSite(const CallBase &CB, |
211 | BlockFrequencyInfo *BFI) const { |
212 | auto C = getProfileCount(Call: CB, BFI); |
213 | if (C) |
214 | return isColdCount(C: *C); |
215 | |
216 | // In SamplePGO, if the caller has been sampled, and there is no profile |
217 | // annotated on the callsite, we consider the callsite as cold. |
218 | return hasSampleProfile() && CB.getCaller()->hasProfileData(); |
219 | } |
220 | |
221 | bool ProfileSummaryInfo::hasPartialSampleProfile() const { |
222 | return hasProfileSummary() && |
223 | Summary->getKind() == ProfileSummary::PSK_Sample && |
224 | (PartialProfile || Summary->isPartialProfile()); |
225 | } |
226 | |
227 | INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info" , |
228 | "Profile summary info" , false, true) |
229 | |
230 | ProfileSummaryInfoWrapperPass::ProfileSummaryInfoWrapperPass() |
231 | : ImmutablePass(ID) {} |
232 | |
233 | bool ProfileSummaryInfoWrapperPass::doInitialization(Module &M) { |
234 | PSI.reset(p: new ProfileSummaryInfo(M)); |
235 | return false; |
236 | } |
237 | |
238 | bool ProfileSummaryInfoWrapperPass::doFinalization(Module &M) { |
239 | PSI.reset(); |
240 | return false; |
241 | } |
242 | |
243 | AnalysisKey ProfileSummaryAnalysis::Key; |
244 | ProfileSummaryInfo ProfileSummaryAnalysis::run(Module &M, |
245 | ModuleAnalysisManager &) { |
246 | return ProfileSummaryInfo(M); |
247 | } |
248 | |
249 | PreservedAnalyses ProfileSummaryPrinterPass::run(Module &M, |
250 | ModuleAnalysisManager &AM) { |
251 | ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(IR&: M); |
252 | |
253 | OS << "Functions in " << M.getName() << " with hot/cold annotations: \n" ; |
254 | for (auto &F : M) { |
255 | OS << F.getName(); |
256 | if (PSI.isFunctionEntryHot(F: &F)) |
257 | OS << " :hot entry " ; |
258 | else if (PSI.isFunctionEntryCold(F: &F)) |
259 | OS << " :cold entry " ; |
260 | OS << "\n" ; |
261 | } |
262 | return PreservedAnalyses::all(); |
263 | } |
264 | |
265 | char ProfileSummaryInfoWrapperPass::ID = 0; |
266 | |