1//===-- ProfileGenerator.h - Profile Generator -----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
10#define LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
11#include "CSPreInliner.h"
12#include "ErrorHandling.h"
13#include "PerfReader.h"
14#include "ProfiledBinary.h"
15#include "llvm/IR/DebugInfoMetadata.h"
16#include "llvm/ProfileData/SampleProfWriter.h"
17#include <memory>
18#include <unordered_set>
19
20using namespace llvm;
21using namespace sampleprof;
22
23namespace llvm {
24namespace sampleprof {
25
26using ProbeCounterMap =
27 std::unordered_map<const MCDecodedPseudoProbe *, uint64_t>;
28
29// This base class for profile generation of sample-based PGO. We reuse all
30// structures relating to function profiles and profile writers as seen in
31// /ProfileData/SampleProf.h.
32class ProfileGeneratorBase {
33
34public:
35 ProfileGeneratorBase(ProfiledBinary *Binary) : Binary(Binary){};
36 ProfileGeneratorBase(ProfiledBinary *Binary,
37 const ContextSampleCounterMap *Counters)
38 : Binary(Binary), SampleCounters(Counters){};
39 ProfileGeneratorBase(ProfiledBinary *Binary,
40 const SampleProfileMap &&Profiles)
41 : Binary(Binary), ProfileMap(std::move(Profiles)){};
42
43 virtual ~ProfileGeneratorBase() = default;
44 static std::unique_ptr<ProfileGeneratorBase>
45 create(ProfiledBinary *Binary, const ContextSampleCounterMap *Counters,
46 bool profileIsCS);
47 static std::unique_ptr<ProfileGeneratorBase>
48 create(ProfiledBinary *Binary, SampleProfileMap &ProfileMap,
49 bool profileIsCS);
50 virtual void generateProfile() = 0;
51 void write();
52
53 static uint32_t
54 getDuplicationFactor(unsigned Discriminator,
55 bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
56 return UseFSD ? 1
57 : llvm::DILocation::getDuplicationFactorFromDiscriminator(
58 D: Discriminator);
59 }
60
61 static uint32_t
62 getBaseDiscriminator(unsigned Discriminator,
63 bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
64 return UseFSD ? Discriminator
65 : DILocation::getBaseDiscriminatorFromDiscriminator(
66 D: Discriminator, /* IsFSDiscriminator */ IsFSDiscriminator: false);
67 }
68
69 static bool UseFSDiscriminator;
70
71protected:
72 // Use SampleProfileWriter to serialize profile map
73 void write(std::unique_ptr<SampleProfileWriter> Writer,
74 SampleProfileMap &ProfileMap);
75 /*
76 For each region boundary point, mark if it is begin or end (or both) of
77 the region. Boundary points are inclusive. Log the sample count as well
78 so we can use it when we compute the sample count of each disjoint region
79 later. Note that there might be multiple ranges with different sample
80 count that share same begin/end point. We need to accumulate the sample
81 count for the boundary point for such case, because for the example
82 below,
83
84 |<--100-->|
85 |<------200------>|
86 A B C
87
88 sample count for disjoint region [A,B] would be 300.
89 */
90 void findDisjointRanges(RangeSample &DisjointRanges,
91 const RangeSample &Ranges);
92
93 // Go through each address from range to extract the top frame probe by
94 // looking up in the Address2ProbeMap
95 void extractProbesFromRange(const RangeSample &RangeCounter,
96 ProbeCounterMap &ProbeCounter,
97 bool FindDisjointRanges = true);
98
99 // Helper function for updating body sample for a leaf location in
100 // FunctionProfile
101 void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile,
102 const SampleContextFrame &LeafLoc,
103 uint64_t Count);
104
105 void updateFunctionSamples();
106
107 void updateTotalSamples();
108
109 void updateCallsiteSamples();
110
111 void filterAmbiguousProfile(SampleProfileMap &Profiles);
112
113 bool filterAmbiguousProfile(FunctionSamples &FS);
114
115 StringRef getCalleeNameForAddress(uint64_t TargetAddress);
116
117 void computeSummaryAndThreshold(SampleProfileMap &ProfileMap);
118
119 void calculateBodySamplesAndSize(const FunctionSamples &FSamples,
120 uint64_t &TotalBodySamples,
121 uint64_t &FuncBodySize);
122
123 double calculateDensity(const SampleProfileMap &Profiles);
124
125 void calculateAndShowDensity(const SampleProfileMap &Profiles);
126
127 void showDensitySuggestion(double Density);
128
129 void collectProfiledFunctions();
130
131 bool collectFunctionsFromRawProfile(
132 std::unordered_set<const BinaryFunction *> &ProfiledFunctions);
133
134 // Collect profiled Functions for llvm sample profile input.
135 virtual bool collectFunctionsFromLLVMProfile(
136 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) = 0;
137
138 // List of function prefix to filter out.
139 static constexpr const char *FuncPrefixsToFilter[] = {"__cxx_global_var_init",
140 "__tls_init"};
141
142 // Thresholds from profile summary to answer isHotCount/isColdCount queries.
143 uint64_t HotCountThreshold;
144
145 uint64_t ColdCountThreshold;
146
147 ProfiledBinary *Binary = nullptr;
148
149 std::unique_ptr<ProfileSummary> Summary;
150
151 // Used by SampleProfileWriter
152 SampleProfileMap ProfileMap;
153
154 const ContextSampleCounterMap *SampleCounters = nullptr;
155};
156
157class ProfileGenerator : public ProfileGeneratorBase {
158
159public:
160 ProfileGenerator(ProfiledBinary *Binary,
161 const ContextSampleCounterMap *Counters)
162 : ProfileGeneratorBase(Binary, Counters){};
163 ProfileGenerator(ProfiledBinary *Binary, const SampleProfileMap &&Profiles)
164 : ProfileGeneratorBase(Binary, std::move(Profiles)){};
165 void generateProfile() override;
166
167private:
168 void generateLineNumBasedProfile();
169 void generateProbeBasedProfile();
170 RangeSample preprocessRangeCounter(const RangeSample &RangeCounter);
171 FunctionSamples &getTopLevelFunctionProfile(FunctionId FuncName);
172 // Helper function to get the leaf frame's FunctionProfile by traversing the
173 // inline stack and meanwhile it adds the total samples for each frame's
174 // function profile.
175 FunctionSamples &
176 getLeafProfileAndAddTotalSamples(const SampleContextFrameVector &FrameVec,
177 uint64_t Count);
178 void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter);
179 void
180 populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters);
181 void
182 populateBodySamplesWithProbesForAllFunctions(const RangeSample &RangeCounter);
183 void populateBoundarySamplesWithProbesForAllFunctions(
184 const BranchSample &BranchCounters);
185 void postProcessProfiles();
186 void trimColdProfiles(const SampleProfileMap &Profiles,
187 uint64_t ColdCntThreshold);
188 bool collectFunctionsFromLLVMProfile(
189 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) override;
190};
191
192class CSProfileGenerator : public ProfileGeneratorBase {
193public:
194 CSProfileGenerator(ProfiledBinary *Binary,
195 const ContextSampleCounterMap *Counters)
196 : ProfileGeneratorBase(Binary, Counters){};
197 CSProfileGenerator(ProfiledBinary *Binary, SampleProfileMap &Profiles)
198 : ProfileGeneratorBase(Binary), ContextTracker(Profiles, nullptr){};
199 void generateProfile() override;
200
201 // Trim the context stack at a given depth.
202 template <typename T>
203 static void trimContext(SmallVectorImpl<T> &S, int Depth = MaxContextDepth) {
204 if (Depth < 0 || static_cast<size_t>(Depth) >= S.size())
205 return;
206 std::copy(S.begin() + S.size() - static_cast<size_t>(Depth), S.end(),
207 S.begin());
208 S.resize(Depth);
209 }
210
211 // Remove adjacent repeated context sequences up to a given sequence length,
212 // -1 means no size limit. Note that repeated sequences are identified based
213 // on the exact call site, this is finer granularity than function recursion.
214 template <typename T>
215 static void compressRecursionContext(SmallVectorImpl<T> &Context,
216 int32_t CSize = MaxCompressionSize) {
217 uint32_t I = 1;
218 uint32_t HS = static_cast<uint32_t>(Context.size() / 2);
219 uint32_t MaxDedupSize =
220 CSize == -1 ? HS : std::min(a: static_cast<uint32_t>(CSize), b: HS);
221 auto BeginIter = Context.begin();
222 // Use an in-place algorithm to save memory copy
223 // End indicates the end location of current iteration's data
224 uint32_t End = 0;
225 // Deduplicate from length 1 to the max possible size of a repeated
226 // sequence.
227 while (I <= MaxDedupSize) {
228 // This is a linear algorithm that deduplicates adjacent repeated
229 // sequences of size I. The deduplication detection runs on a sliding
230 // window whose size is 2*I and it keeps sliding the window to deduplicate
231 // the data inside. Once duplication is detected, deduplicate it by
232 // skipping the right half part of the window, otherwise just copy back
233 // the new one by appending them at the back of End pointer(for the next
234 // iteration).
235 //
236 // For example:
237 // Input: [a1, a2, b1, b2]
238 // (Added index to distinguish the same char, the origin is [a, a, b,
239 // b], the size of the dedup window is 2(I = 1) at the beginning)
240 //
241 // 1) The initial status is a dummy window[null, a1], then just copy the
242 // right half of the window(End = 0), then slide the window.
243 // Result: [a1], a2, b1, b2 (End points to the element right before ],
244 // after ] is the data of the previous iteration)
245 //
246 // 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of
247 // the window i.e the duplication happen. Only slide the window.
248 // Result: [a1], a2, b1, b2
249 //
250 // 3) Next window is [a2, b1], copy the right half of the window(b1 is
251 // new) to the End and slide the window.
252 // Result: [a1, b1], b1, b2
253 //
254 // 4) Next window is [b1, b2], same to 2), skip b2.
255 // Result: [a1, b1], b1, b2
256 // After resize, it will be [a, b]
257
258 // Use pointers like below to do comparison inside the window
259 // [a b c a b c]
260 // | | | | |
261 // LeftBoundary Left Right Left+I Right+I
262 // A duplication found if Left < LeftBoundry.
263
264 int32_t Right = I - 1;
265 End = I;
266 int32_t LeftBoundary = 0;
267 while (Right + I < Context.size()) {
268 // To avoids scanning a part of a sequence repeatedly, it finds out
269 // the common suffix of two hald in the window. The common suffix will
270 // serve as the common prefix of next possible pair of duplicate
271 // sequences. The non-common part will be ignored and never scanned
272 // again.
273
274 // For example.
275 // Input: [a, b1], c1, b2, c2
276 // I = 2
277 //
278 // 1) For the window [a, b1, c1, b2], non-common-suffix for the right
279 // part is 'c1', copy it and only slide the window 1 step.
280 // Result: [a, b1, c1], b2, c2
281 //
282 // 2) Next window is [b1, c1, b2, c2], so duplication happen.
283 // Result after resize: [a, b, c]
284
285 int32_t Left = Right;
286 while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) {
287 // Find the longest suffix inside the window. When stops, Left points
288 // at the diverging point in the current sequence.
289 Left--;
290 }
291
292 bool DuplicationFound = (Left < LeftBoundary);
293 // Don't need to recheck the data before Right
294 LeftBoundary = Right + 1;
295 if (DuplicationFound) {
296 // Duplication found, skip right half of the window.
297 Right += I;
298 } else {
299 // Copy the non-common-suffix part of the adjacent sequence.
300 std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1,
301 BeginIter + End);
302 End += Left + I - Right;
303 // Only slide the window by the size of non-common-suffix
304 Right = Left + I;
305 }
306 }
307 // Don't forget the remaining part that's not scanned.
308 std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End);
309 End += Context.size() - Right - 1;
310 I++;
311 Context.resize(End);
312 MaxDedupSize = std::min(a: static_cast<uint32_t>(End / 2), b: MaxDedupSize);
313 }
314 }
315
316private:
317 void generateLineNumBasedProfile();
318
319 FunctionSamples *getOrCreateFunctionSamples(ContextTrieNode *ContextNode,
320 bool WasLeafInlined = false);
321
322 // Lookup or create ContextTrieNode for the context, FunctionSamples is
323 // created inside this function.
324 ContextTrieNode *getOrCreateContextNode(const SampleContextFrames Context,
325 bool WasLeafInlined = false);
326
327 // For profiled only functions, on-demand compute their inline context
328 // function byte size which is used by the pre-inliner.
329 void computeSizeForProfiledFunctions();
330 // Post processing for profiles before writing out, such as mermining
331 // and trimming cold profiles, running preinliner on profiles.
332 void postProcessProfiles();
333
334 void populateBodySamplesForFunction(FunctionSamples &FunctionProfile,
335 const RangeSample &RangeCounters);
336
337 void populateBoundarySamplesForFunction(ContextTrieNode *CallerNode,
338 const BranchSample &BranchCounters);
339
340 void populateInferredFunctionSamples(ContextTrieNode &Node);
341
342 void updateFunctionSamples();
343
344 void generateProbeBasedProfile();
345
346 // Fill in function body samples from probes
347 void populateBodySamplesWithProbes(const RangeSample &RangeCounter,
348 const AddrBasedCtxKey *CtxKey);
349 // Fill in boundary samples for a call probe
350 void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter,
351 const AddrBasedCtxKey *CtxKey);
352
353 ContextTrieNode *
354 getContextNodeForLeafProbe(const AddrBasedCtxKey *CtxKey,
355 const MCDecodedPseudoProbe *LeafProbe);
356
357 // Helper function to get FunctionSamples for the leaf probe
358 FunctionSamples &
359 getFunctionProfileForLeafProbe(const AddrBasedCtxKey *CtxKey,
360 const MCDecodedPseudoProbe *LeafProbe);
361
362 void convertToProfileMap(ContextTrieNode &Node,
363 SampleContextFrameVector &Context);
364
365 void convertToProfileMap();
366
367 void computeSummaryAndThreshold();
368
369 bool collectFunctionsFromLLVMProfile(
370 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) override;
371
372 void initializeMissingFrameInferrer();
373
374 // Given an input `Context`, output `NewContext` with inferred missing tail
375 // call frames.
376 void inferMissingFrames(const SmallVectorImpl<uint64_t> &Context,
377 SmallVectorImpl<uint64_t> &NewContext);
378
379 ContextTrieNode &getRootContext() { return ContextTracker.getRootContext(); };
380
381 // The container for holding the FunctionSamples used by context trie.
382 std::list<FunctionSamples> FSamplesList;
383
384 // Underlying context table serves for sample profile writer.
385 std::unordered_set<SampleContextFrameVector, SampleContextFrameHash> Contexts;
386
387 SampleContextTracker ContextTracker;
388
389 bool IsProfileValidOnTrie = true;
390
391public:
392 // Deduplicate adjacent repeated context sequences up to a given sequence
393 // length. -1 means no size limit.
394 static int32_t MaxCompressionSize;
395 static int MaxContextDepth;
396};
397
398} // end namespace sampleprof
399} // end namespace llvm
400
401#endif
402