1//===-- ProfileGenerator.h - Profile Generator -----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
10#define LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
11#include "CSPreInliner.h"
12#include "ErrorHandling.h"
13#include "PerfReader.h"
14#include "ProfiledBinary.h"
15#include "llvm/IR/DebugInfoMetadata.h"
16#include "llvm/ProfileData/SampleProfWriter.h"
17#include <memory>
18#include <unordered_set>
19
20namespace llvm {
21namespace sampleprof {
22
23using ProbeCounterMap =
24 std::unordered_map<const MCDecodedPseudoProbe *, uint64_t>;
25
26// This base class for profile generation of sample-based PGO. We reuse all
27// structures relating to function profiles and profile writers as seen in
28// /ProfileData/SampleProf.h.
29class ProfileGeneratorBase {
30
31public:
32 ProfileGeneratorBase(ProfiledBinary *Binary) : Binary(Binary){};
33 ProfileGeneratorBase(ProfiledBinary *Binary,
34 const ContextSampleCounterMap *Counters)
35 : Binary(Binary), SampleCounters(Counters){};
36 ProfileGeneratorBase(ProfiledBinary *Binary,
37 const SampleProfileMap &&Profiles)
38 : Binary(Binary), ProfileMap(std::move(Profiles)){};
39
40 virtual ~ProfileGeneratorBase() = default;
41 static std::unique_ptr<ProfileGeneratorBase>
42 create(ProfiledBinary *Binary, const ContextSampleCounterMap *Counters,
43 bool profileIsCS);
44 static std::unique_ptr<ProfileGeneratorBase>
45 create(ProfiledBinary *Binary, SampleProfileMap &ProfileMap,
46 bool profileIsCS);
47 virtual void generateProfile() = 0;
48 void write();
49
50 static uint32_t
51 getDuplicationFactor(unsigned Discriminator,
52 bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
53 return UseFSD ? 1
54 : llvm::DILocation::getDuplicationFactorFromDiscriminator(
55 D: Discriminator);
56 }
57
58 static uint32_t
59 getBaseDiscriminator(unsigned Discriminator,
60 bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
61 return UseFSD ? Discriminator
62 : DILocation::getBaseDiscriminatorFromDiscriminator(
63 D: Discriminator, /* IsFSDiscriminator */ IsFSDiscriminator: false);
64 }
65
66 static bool UseFSDiscriminator;
67
68protected:
69 // Use SampleProfileWriter to serialize profile map
70 void write(std::unique_ptr<SampleProfileWriter> Writer,
71 SampleProfileMap &ProfileMap);
72 /*
73 For each region boundary point, mark if it is begin or end (or both) of
74 the region. Boundary points are inclusive. Log the sample count as well
75 so we can use it when we compute the sample count of each disjoint region
76 later. Note that there might be multiple ranges with different sample
77 count that share same begin/end point. We need to accumulate the sample
78 count for the boundary point for such case, because for the example
79 below,
80
81 |<--100-->|
82 |<------200------>|
83 A B C
84
85 sample count for disjoint region [A,B] would be 300.
86 */
87 void findDisjointRanges(RangeSample &DisjointRanges,
88 const RangeSample &Ranges);
89
90 // Go through each address from range to extract the top frame probe by
91 // looking up in the Address2ProbeMap
92 void extractProbesFromRange(const RangeSample &RangeCounter,
93 ProbeCounterMap &ProbeCounter,
94 bool FindDisjointRanges = true);
95
96 // Helper function for updating body sample for a leaf location in
97 // FunctionProfile
98 void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile,
99 const SampleContextFrame &LeafLoc,
100 uint64_t Count);
101
102 void updateFunctionSamples();
103
104 void updateTotalSamples();
105
106 void updateCallsiteSamples();
107
108 void filterAmbiguousProfile(SampleProfileMap &Profiles);
109
110 bool filterAmbiguousProfile(FunctionSamples &FS);
111
112 StringRef getCalleeNameForAddress(uint64_t TargetAddress);
113
114 void computeSummaryAndThreshold(SampleProfileMap &ProfileMap);
115
116 void calculateBodySamplesAndSize(const FunctionSamples &FSamples,
117 uint64_t &TotalBodySamples,
118 uint64_t &FuncBodySize);
119
120 double calculateDensity(const SampleProfileMap &Profiles);
121
122 void calculateAndShowDensity(const SampleProfileMap &Profiles);
123
124 void showDensitySuggestion(double Density);
125
126 void markAllContextPreinlined(SampleProfileMap &ProfileMap);
127
128 void collectProfiledFunctions();
129
130 bool collectFunctionsFromRawProfile(
131 std::unordered_set<const BinaryFunction *> &ProfiledFunctions);
132
133 // Collect profiled Functions for llvm sample profile input.
134 virtual bool collectFunctionsFromLLVMProfile(
135 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) = 0;
136
137 // List of function prefix to filter out.
138 static constexpr const char *FuncPrefixsToFilter[] = {"__cxx_global_var_init",
139 "__tls_init"};
140
141 // Thresholds from profile summary to answer isHotCount/isColdCount queries.
142 uint64_t HotCountThreshold;
143
144 uint64_t ColdCountThreshold;
145
146 ProfiledBinary *Binary = nullptr;
147
148 std::unique_ptr<ProfileSummary> Summary;
149
150 // Used by SampleProfileWriter
151 SampleProfileMap ProfileMap;
152
153 const ContextSampleCounterMap *SampleCounters = nullptr;
154};
155
156class ProfileGenerator : public ProfileGeneratorBase {
157
158public:
159 ProfileGenerator(ProfiledBinary *Binary,
160 const ContextSampleCounterMap *Counters)
161 : ProfileGeneratorBase(Binary, Counters){};
162 ProfileGenerator(ProfiledBinary *Binary, const SampleProfileMap &&Profiles)
163 : ProfileGeneratorBase(Binary, std::move(Profiles)){};
164 void generateProfile() override;
165
166private:
167 void generateLineNumBasedProfile();
168 void generateProbeBasedProfile();
169 RangeSample preprocessRangeCounter(const RangeSample &RangeCounter);
170 FunctionSamples &getTopLevelFunctionProfile(FunctionId FuncName);
171 // Helper function to get the leaf frame's FunctionProfile by traversing the
172 // inline stack and meanwhile it adds the total samples for each frame's
173 // function profile.
174 FunctionSamples &
175 getLeafProfileAndAddTotalSamples(const SampleContextFrameVector &FrameVec,
176 uint64_t Count);
177 void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter);
178 void
179 populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters);
180 void
181 populateBodySamplesWithProbesForAllFunctions(const RangeSample &RangeCounter);
182 void populateBoundarySamplesWithProbesForAllFunctions(
183 const BranchSample &BranchCounters);
184 void
185 populateTypeSamplesForAllFunctions(const DataAccessSample &DataAccessSamples);
186 void postProcessProfiles();
187 void trimColdProfiles(const SampleProfileMap &Profiles,
188 uint64_t ColdCntThreshold);
189 bool collectFunctionsFromLLVMProfile(
190 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) override;
191};
192
193class CSProfileGenerator : public ProfileGeneratorBase {
194public:
195 CSProfileGenerator(ProfiledBinary *Binary,
196 const ContextSampleCounterMap *Counters)
197 : ProfileGeneratorBase(Binary, Counters){};
198 CSProfileGenerator(ProfiledBinary *Binary, SampleProfileMap &Profiles)
199 : ProfileGeneratorBase(Binary), ContextTracker(Profiles, nullptr){};
200 void generateProfile() override;
201
202 // Trim the context stack at a given depth.
203 template <typename T>
204 static void trimContext(SmallVectorImpl<T> &S, int Depth = MaxContextDepth) {
205 if (Depth < 0 || static_cast<size_t>(Depth) >= S.size())
206 return;
207 std::copy(S.begin() + S.size() - static_cast<size_t>(Depth), S.end(),
208 S.begin());
209 S.resize(Depth);
210 }
211
212 // Remove adjacent repeated context sequences up to a given sequence length,
213 // -1 means no size limit. Note that repeated sequences are identified based
214 // on the exact call site, this is finer granularity than function recursion.
215 template <typename T>
216 static void compressRecursionContext(SmallVectorImpl<T> &Context,
217 int32_t CSize = MaxCompressionSize) {
218 uint32_t I = 1;
219 uint32_t HS = static_cast<uint32_t>(Context.size() / 2);
220 uint32_t MaxDedupSize =
221 CSize == -1 ? HS : std::min(a: static_cast<uint32_t>(CSize), b: HS);
222 auto BeginIter = Context.begin();
223 // Use an in-place algorithm to save memory copy
224 // End indicates the end location of current iteration's data
225 uint32_t End = 0;
226 // Deduplicate from length 1 to the max possible size of a repeated
227 // sequence.
228 while (I <= MaxDedupSize) {
229 // This is a linear algorithm that deduplicates adjacent repeated
230 // sequences of size I. The deduplication detection runs on a sliding
231 // window whose size is 2*I and it keeps sliding the window to deduplicate
232 // the data inside. Once duplication is detected, deduplicate it by
233 // skipping the right half part of the window, otherwise just copy back
234 // the new one by appending them at the back of End pointer(for the next
235 // iteration).
236 //
237 // For example:
238 // Input: [a1, a2, b1, b2]
239 // (Added index to distinguish the same char, the origin is [a, a, b,
240 // b], the size of the dedup window is 2(I = 1) at the beginning)
241 //
242 // 1) The initial status is a dummy window[null, a1], then just copy the
243 // right half of the window(End = 0), then slide the window.
244 // Result: [a1], a2, b1, b2 (End points to the element right before ],
245 // after ] is the data of the previous iteration)
246 //
247 // 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of
248 // the window i.e the duplication happen. Only slide the window.
249 // Result: [a1], a2, b1, b2
250 //
251 // 3) Next window is [a2, b1], copy the right half of the window(b1 is
252 // new) to the End and slide the window.
253 // Result: [a1, b1], b1, b2
254 //
255 // 4) Next window is [b1, b2], same to 2), skip b2.
256 // Result: [a1, b1], b1, b2
257 // After resize, it will be [a, b]
258
259 // Use pointers like below to do comparison inside the window
260 // [a b c a b c]
261 // | | | | |
262 // LeftBoundary Left Right Left+I Right+I
263 // A duplication found if Left < LeftBoundry.
264
265 int32_t Right = I - 1;
266 End = I;
267 int32_t LeftBoundary = 0;
268 while (Right + I < Context.size()) {
269 // To avoids scanning a part of a sequence repeatedly, it finds out
270 // the common suffix of two hald in the window. The common suffix will
271 // serve as the common prefix of next possible pair of duplicate
272 // sequences. The non-common part will be ignored and never scanned
273 // again.
274
275 // For example.
276 // Input: [a, b1], c1, b2, c2
277 // I = 2
278 //
279 // 1) For the window [a, b1, c1, b2], non-common-suffix for the right
280 // part is 'c1', copy it and only slide the window 1 step.
281 // Result: [a, b1, c1], b2, c2
282 //
283 // 2) Next window is [b1, c1, b2, c2], so duplication happen.
284 // Result after resize: [a, b, c]
285
286 int32_t Left = Right;
287 while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) {
288 // Find the longest suffix inside the window. When stops, Left points
289 // at the diverging point in the current sequence.
290 Left--;
291 }
292
293 bool DuplicationFound = (Left < LeftBoundary);
294 // Don't need to recheck the data before Right
295 LeftBoundary = Right + 1;
296 if (DuplicationFound) {
297 // Duplication found, skip right half of the window.
298 Right += I;
299 } else {
300 // Copy the non-common-suffix part of the adjacent sequence.
301 std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1,
302 BeginIter + End);
303 End += Left + I - Right;
304 // Only slide the window by the size of non-common-suffix
305 Right = Left + I;
306 }
307 }
308 // Don't forget the remaining part that's not scanned.
309 std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End);
310 End += Context.size() - Right - 1;
311 I++;
312 Context.resize(End);
313 MaxDedupSize = std::min(a: static_cast<uint32_t>(End / 2), b: MaxDedupSize);
314 }
315 }
316
317private:
318 void generateLineNumBasedProfile();
319
320 FunctionSamples *getOrCreateFunctionSamples(ContextTrieNode *ContextNode,
321 bool WasLeafInlined = false);
322
323 // Lookup or create ContextTrieNode for the context, FunctionSamples is
324 // created inside this function.
325 ContextTrieNode *getOrCreateContextNode(const SampleContextFrames Context,
326 bool WasLeafInlined = false);
327
328 // For profiled only functions, on-demand compute their inline context
329 // function byte size which is used by the pre-inliner.
330 void computeSizeForProfiledFunctions();
331 // Post processing for profiles before writing out, such as mermining
332 // and trimming cold profiles, running preinliner on profiles.
333 void postProcessProfiles();
334
335 void populateBodySamplesForFunction(FunctionSamples &FunctionProfile,
336 const RangeSample &RangeCounters);
337
338 void populateBoundarySamplesForFunction(ContextTrieNode *CallerNode,
339 const BranchSample &BranchCounters);
340
341 void populateInferredFunctionSamples(ContextTrieNode &Node);
342
343 void updateFunctionSamples();
344
345 void generateProbeBasedProfile();
346
347 // Fill in function body samples from probes
348 void populateBodySamplesWithProbes(const RangeSample &RangeCounter,
349 const AddrBasedCtxKey *CtxKey);
350 // Fill in boundary samples for a call probe
351 void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter,
352 const AddrBasedCtxKey *CtxKey);
353
354 ContextTrieNode *
355 getContextNodeForLeafProbe(const AddrBasedCtxKey *CtxKey,
356 const MCDecodedPseudoProbe *LeafProbe);
357
358 // Helper function to get FunctionSamples for the leaf probe
359 FunctionSamples &
360 getFunctionProfileForLeafProbe(const AddrBasedCtxKey *CtxKey,
361 const MCDecodedPseudoProbe *LeafProbe);
362
363 void convertToProfileMap(ContextTrieNode &Node,
364 SampleContextFrameVector &Context);
365
366 void convertToProfileMap();
367
368 void computeSummaryAndThreshold();
369
370 bool collectFunctionsFromLLVMProfile(
371 std::unordered_set<const BinaryFunction *> &ProfiledFunctions) override;
372
373 void initializeMissingFrameInferrer();
374
375 // Given an input `Context`, output `NewContext` with inferred missing tail
376 // call frames.
377 void inferMissingFrames(const SmallVectorImpl<uint64_t> &Context,
378 SmallVectorImpl<uint64_t> &NewContext);
379
380 ContextTrieNode &getRootContext() { return ContextTracker.getRootContext(); };
381
382 // The container for holding the FunctionSamples used by context trie.
383 std::list<FunctionSamples> FSamplesList;
384
385 // Underlying context table serves for sample profile writer.
386 std::unordered_set<SampleContextFrameVector, SampleContextFrameHash> Contexts;
387
388 SampleContextTracker ContextTracker;
389
390 bool IsProfileValidOnTrie = true;
391
392public:
393 // Deduplicate adjacent repeated context sequences up to a given sequence
394 // length. -1 means no size limit.
395 static int32_t MaxCompressionSize;
396 static int MaxContextDepth;
397};
398
399} // end namespace sampleprof
400} // end namespace llvm
401
402#endif
403