1//===-- ProfileGenerator.h - Profile Generator -----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
10#define LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
11#include "CSPreInliner.h"
12#include "ErrorHandling.h"
13#include "PerfReader.h"
14#include "ProfiledBinary.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/SmallPtrSet.h"
17#include "llvm/IR/DebugInfoMetadata.h"
18#include "llvm/ProfileData/SampleProfWriter.h"
19#include <memory>
20#include <unordered_set>
21
22namespace llvm {
23namespace sampleprof {
24
25using ProbeCounterMap = DenseMap<const MCDecodedPseudoProbe *, uint64_t>;
26
27// This base class for profile generation of sample-based PGO. We reuse all
28// structures relating to function profiles and profile writers as seen in
29// /ProfileData/SampleProf.h.
30class ProfileGeneratorBase {
31
32public:
33 ProfileGeneratorBase(ProfiledBinary *Binary) : Binary(Binary){};
34 ProfileGeneratorBase(ProfiledBinary *Binary,
35 const ContextSampleCounterMap *Counters)
36 : Binary(Binary), SampleCounters(Counters){};
37 ProfileGeneratorBase(ProfiledBinary *Binary,
38 const SampleProfileMap &&Profiles)
39 : Binary(Binary), ProfileMap(std::move(Profiles)){};
40
41 virtual ~ProfileGeneratorBase() = default;
42 static std::unique_ptr<ProfileGeneratorBase>
43 create(ProfiledBinary *Binary, const ContextSampleCounterMap *Counters,
44 bool profileIsCS);
45 static std::unique_ptr<ProfileGeneratorBase>
46 create(ProfiledBinary *Binary, SampleProfileMap &ProfileMap,
47 bool profileIsCS);
48 virtual void generateProfile() = 0;
49 void write();
50
51 static uint32_t
52 getDuplicationFactor(unsigned Discriminator,
53 bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
54 return UseFSD ? 1
55 : llvm::DILocation::getDuplicationFactorFromDiscriminator(
56 D: Discriminator);
57 }
58
59 static uint32_t
60 getBaseDiscriminator(unsigned Discriminator,
61 bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
62 return UseFSD ? Discriminator
63 : DILocation::getBaseDiscriminatorFromDiscriminator(
64 D: Discriminator, /* IsFSDiscriminator */ IsFSDiscriminator: false);
65 }
66
67 static bool UseFSDiscriminator;
68
69protected:
70 // Use SampleProfileWriter to serialize profile map
71 void write(std::unique_ptr<SampleProfileWriter> Writer,
72 SampleProfileMap &ProfileMap);
73 /*
74 For each region boundary point, mark if it is begin or end (or both) of
75 the region. Boundary points are inclusive. Log the sample count as well
76 so we can use it when we compute the sample count of each disjoint region
77 later. Note that there might be multiple ranges with different sample
78 count that share same begin/end point. We need to accumulate the sample
79 count for the boundary point for such case, because for the example
80 below,
81
82 |<--100-->|
83 |<------200------>|
84 A B C
85
86 sample count for disjoint region [A,B] would be 300.
87 */
88 void findDisjointRanges(RangeSample &DisjointRanges,
89 const RangeSample &Ranges);
90
91 // Go through each address from range to extract the top frame probe by
92 // looking up in the Address2ProbeMap
93 void extractProbesFromRange(const RangeSample &RangeCounter,
94 ProbeCounterMap &ProbeCounter,
95 bool FindDisjointRanges = true);
96
97 // Helper function for updating body sample for a leaf location in
98 // FunctionProfile
99 void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile,
100 const SampleContextFrame &LeafLoc,
101 uint64_t Count);
102
103 void updateFunctionSamples();
104
105 void updateTotalSamples();
106
107 void updateCallsiteSamples();
108
109 void filterAmbiguousProfile(SampleProfileMap &Profiles);
110
111 bool filterAmbiguousProfile(FunctionSamples &FS);
112
113 StringRef getCalleeNameForAddress(uint64_t TargetAddress);
114
115 void computeSummaryAndThreshold(SampleProfileMap &ProfileMap);
116
117 void calculateBodySamplesAndSize(const FunctionSamples &FSamples,
118 uint64_t &TotalBodySamples,
119 uint64_t &FuncBodySize);
120
121 double calculateDensity(const SampleProfileMap &Profiles);
122
123 void calculateAndShowDensity(const SampleProfileMap &Profiles);
124
125 void showDensitySuggestion(double Density);
126
127 void markAllContextPreinlined(SampleProfileMap &ProfileMap);
128
129 void collectProfiledFunctions();
130
131 bool collectFunctionsFromRawProfile(
132 SmallPtrSetImpl<const BinaryFunction *> &ProfiledFunctions);
133
134 // Collect profiled Functions for llvm sample profile input.
135 virtual bool collectFunctionsFromLLVMProfile(
136 SmallPtrSetImpl<const BinaryFunction *> &ProfiledFunctions) = 0;
137
138 // List of function prefix to filter out.
139 static constexpr const char *FuncPrefixsToFilter[] = {"__cxx_global_var_init",
140 "__tls_init"};
141
142 // Thresholds from profile summary to answer isHotCount/isColdCount queries.
143 uint64_t HotCountThreshold;
144
145 uint64_t ColdCountThreshold;
146
147 ProfiledBinary *Binary = nullptr;
148
149 std::unique_ptr<ProfileSummary> Summary;
150
151 // Used by SampleProfileWriter
152 SampleProfileMap ProfileMap;
153
154 const ContextSampleCounterMap *SampleCounters = nullptr;
155};
156
157class ProfileGenerator : public ProfileGeneratorBase {
158
159public:
160 ProfileGenerator(ProfiledBinary *Binary,
161 const ContextSampleCounterMap *Counters)
162 : ProfileGeneratorBase(Binary, Counters){};
163 ProfileGenerator(ProfiledBinary *Binary, const SampleProfileMap &&Profiles)
164 : ProfileGeneratorBase(Binary, std::move(Profiles)){};
165 void generateProfile() override;
166
167private:
168 void generateLineNumBasedProfile();
169 void generateProbeBasedProfile();
170 RangeSample preprocessRangeCounter(const RangeSample &RangeCounter);
171 FunctionSamples &getTopLevelFunctionProfile(FunctionId FuncName);
172 // Helper function to get the leaf frame's FunctionProfile by traversing the
173 // inline stack and meanwhile it adds the total samples for each frame's
174 // function profile.
175 FunctionSamples &
176 getLeafProfileAndAddTotalSamples(const SampleContextFrameVector &FrameVec,
177 uint64_t Count);
178 void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter);
179 void
180 populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters);
181 void
182 populateBodySamplesWithProbesForAllFunctions(const RangeSample &RangeCounter);
183 void populateBoundarySamplesWithProbesForAllFunctions(
184 const BranchSample &BranchCounters);
185 void
186 populateTypeSamplesForAllFunctions(const DataAccessSample &DataAccessSamples);
187 void postProcessProfiles();
188 void trimColdProfiles(uint64_t ColdCntThreshold);
189 bool collectFunctionsFromLLVMProfile(
190 SmallPtrSetImpl<const BinaryFunction *> &ProfiledFunctions) override;
191};
192
193class CSProfileGenerator : public ProfileGeneratorBase {
194public:
195 CSProfileGenerator(ProfiledBinary *Binary,
196 const ContextSampleCounterMap *Counters)
197 : ProfileGeneratorBase(Binary, Counters){};
198 CSProfileGenerator(ProfiledBinary *Binary, SampleProfileMap &Profiles)
199 : ProfileGeneratorBase(Binary), ContextTracker(Profiles, nullptr){};
200 void generateProfile() override;
201
202 // Trim the context stack at a given depth.
203 template <typename T>
204 static void trimContext(SmallVectorImpl<T> &S, int Depth = MaxContextDepth) {
205 if (Depth < 0 || static_cast<size_t>(Depth) >= S.size())
206 return;
207 std::copy(S.begin() + S.size() - static_cast<size_t>(Depth), S.end(),
208 S.begin());
209 S.resize(Depth);
210 }
211
212 // Remove adjacent repeated context sequences up to a given sequence length,
213 // -1 means no size limit. Note that repeated sequences are identified based
214 // on the exact call site, this is finer granularity than function recursion.
215 template <typename T>
216 static void compressRecursionContext(SmallVectorImpl<T> &Context,
217 int32_t CSize = MaxCompressionSize) {
218 uint32_t I = 1;
219 uint32_t HS = static_cast<uint32_t>(Context.size() / 2);
220 uint32_t MaxDedupSize =
221 CSize == -1 ? HS : std::min(a: static_cast<uint32_t>(CSize), b: HS);
222 auto BeginIter = Context.begin();
223 // Use an in-place algorithm to save memory copy
224 // End indicates the end location of current iteration's data
225 uint32_t End = 0;
226 // Deduplicate from length 1 to the max possible size of a repeated
227 // sequence.
228 while (I <= MaxDedupSize) {
229 // This is a linear algorithm that deduplicates adjacent repeated
230 // sequences of size I. The deduplication detection runs on a sliding
231 // window whose size is 2*I and it keeps sliding the window to deduplicate
232 // the data inside. Once duplication is detected, deduplicate it by
233 // skipping the right half part of the window, otherwise just copy back
234 // the new one by appending them at the back of End pointer(for the next
235 // iteration).
236 //
237 // For example:
238 // Input: [a1, a2, b1, b2]
239 // (Added index to distinguish the same char, the origin is [a, a, b,
240 // b], the size of the dedup window is 2(I = 1) at the beginning)
241 //
242 // 1) The initial status is a dummy window[null, a1], then just copy the
243 // right half of the window(End = 0), then slide the window.
244 // Result: [a1], a2, b1, b2 (End points to the element right before ],
245 // after ] is the data of the previous iteration)
246 //
247 // 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of
248 // the window i.e the duplication happen. Only slide the window.
249 // Result: [a1], a2, b1, b2
250 //
251 // 3) Next window is [a2, b1], copy the right half of the window(b1 is
252 // new) to the End and slide the window.
253 // Result: [a1, b1], b1, b2
254 //
255 // 4) Next window is [b1, b2], same to 2), skip b2.
256 // Result: [a1, b1], b1, b2
257 // After resize, it will be [a, b]
258
259 // Use pointers like below to do comparison inside the window
260 // [a b c a b c]
261 // | | | | |
262 // LeftBoundary Left Right Left+I Right+I
263 // A duplication found if Left < LeftBoundry.
264
265 int32_t Right = I - 1;
266 End = I;
267 int32_t LeftBoundary = 0;
268 while (Right + I < Context.size()) {
269 // To avoids scanning a part of a sequence repeatedly, it finds out
270 // the common suffix of two hald in the window. The common suffix will
271 // serve as the common prefix of next possible pair of duplicate
272 // sequences. The non-common part will be ignored and never scanned
273 // again.
274
275 // For example.
276 // Input: [a, b1], c1, b2, c2
277 // I = 2
278 //
279 // 1) For the window [a, b1, c1, b2], non-common-suffix for the right
280 // part is 'c1', copy it and only slide the window 1 step.
281 // Result: [a, b1, c1], b2, c2
282 //
283 // 2) Next window is [b1, c1, b2, c2], so duplication happen.
284 // Result after resize: [a, b, c]
285
286 int32_t Left = Right;
287 while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) {
288 // Find the longest suffix inside the window. When stops, Left points
289 // at the diverging point in the current sequence.
290 Left--;
291 }
292
293 bool DuplicationFound = (Left < LeftBoundary);
294 // Don't need to recheck the data before Right
295 LeftBoundary = Right + 1;
296 if (DuplicationFound) {
297 // Duplication found, skip right half of the window.
298 Right += I;
299 } else {
300 // Copy the non-common-suffix part of the adjacent sequence.
301 std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1,
302 BeginIter + End);
303 End += Left + I - Right;
304 // Only slide the window by the size of non-common-suffix
305 Right = Left + I;
306 }
307 }
308 // Don't forget the remaining part that's not scanned.
309 std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End);
310 End += Context.size() - Right - 1;
311 I++;
312 Context.resize(End);
313 MaxDedupSize = std::min(a: static_cast<uint32_t>(End / 2), b: MaxDedupSize);
314 }
315 }
316
317private:
318 void generateLineNumBasedProfile();
319
320 FunctionSamples *getOrCreateFunctionSamples(ContextTrieNode *ContextNode,
321 bool WasLeafInlined = false);
322
323 // Lookup or create ContextTrieNode for the context, FunctionSamples is
324 // created inside this function.
325 ContextTrieNode *getOrCreateContextNode(const SampleContextFrames Context,
326 bool WasLeafInlined = false);
327
328 // For profiled only functions, on-demand compute their inline context
329 // function byte size which is used by the pre-inliner.
330 void computeSizeForProfiledFunctions();
331 // Post processing for profiles before writing out, such as mermining
332 // and trimming cold profiles, running preinliner on profiles.
333 void postProcessProfiles();
334
335 void populateBodySamplesForFunction(FunctionSamples &FunctionProfile,
336 const RangeSample &RangeCounters);
337
338 void populateBoundarySamplesForFunction(ContextTrieNode *CallerNode,
339 const BranchSample &BranchCounters);
340
341 void populateInferredFunctionSamples(ContextTrieNode &Node);
342
343 void updateFunctionSamples();
344
345 void generateProbeBasedProfile();
346
347 // Fill in function body samples from probes
348 void populateBodySamplesWithProbes(const RangeSample &RangeCounter,
349 const AddrBasedCtxKey *CtxKey);
350 // Fill in boundary samples for a call probe
351 void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter,
352 const AddrBasedCtxKey *CtxKey);
353
354 ContextTrieNode *
355 getContextNodeForLeafProbe(const AddrBasedCtxKey *CtxKey,
356 const MCDecodedPseudoProbe *LeafProbe);
357
358 // Helper function to get FunctionSamples for the leaf probe
359 FunctionSamples &
360 getFunctionProfileForLeafProbe(const AddrBasedCtxKey *CtxKey,
361 const MCDecodedPseudoProbe *LeafProbe);
362
363 void convertToProfileMap(ContextTrieNode &Node,
364 SampleContextFrameVector &Context);
365
366 void convertToProfileMap();
367
368 void computeSummaryAndThreshold();
369
370 bool collectFunctionsFromLLVMProfile(
371 SmallPtrSetImpl<const BinaryFunction *> &ProfiledFunctions) override;
372
373 void initializeMissingFrameInferrer();
374
375 // Given an input `Context`, output `NewContext` with inferred missing tail
376 // call frames.
377 void inferMissingFrames(const SmallVectorImpl<uint64_t> &Context,
378 SmallVectorImpl<uint64_t> &NewContext);
379
380 ContextTrieNode &getRootContext() { return ContextTracker.getRootContext(); };
381
382 // The container for holding the FunctionSamples used by context trie.
383 std::list<FunctionSamples> FSamplesList;
384
385 // Underlying context table serves for sample profile writer. SampleContexts
386 // in the output profile hold ArrayRefs into the stored vectors, so the
387 // elements' addresses must be stable: keep std::unordered_set.
388 std::unordered_set<SampleContextFrameVector, SampleContextFrameHash> Contexts;
389
390 SampleContextTracker ContextTracker;
391
392 bool IsProfileValidOnTrie = true;
393
394public:
395 // Deduplicate adjacent repeated context sequences up to a given sequence
396 // length. -1 means no size limit.
397 static int32_t MaxCompressionSize;
398 static int MaxContextDepth;
399};
400
401} // end namespace sampleprof
402} // end namespace llvm
403
404#endif
405