1 | //===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the SampleProfileMatcher used for stale |
10 | // profile matching. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/Transforms/IPO/SampleProfileMatcher.h" |
15 | #include "llvm/IR/IntrinsicInst.h" |
16 | #include "llvm/IR/MDBuilder.h" |
17 | #include "llvm/Support/CommandLine.h" |
18 | |
19 | using namespace llvm; |
20 | using namespace sampleprof; |
21 | |
22 | #define DEBUG_TYPE "sample-profile-matcher" |
23 | |
24 | static cl::opt<unsigned> FuncProfileSimilarityThreshold( |
25 | "func-profile-similarity-threshold" , cl::Hidden, cl::init(Val: 80), |
26 | cl::desc("Consider a profile matches a function if the similarity of their " |
27 | "callee sequences is above the specified percentile." )); |
28 | |
29 | static cl::opt<unsigned> MinFuncCountForCGMatching( |
30 | "min-func-count-for-cg-matching" , cl::Hidden, cl::init(Val: 5), |
31 | cl::desc("The minimum number of basic blocks required for a function to " |
32 | "run stale profile call graph matching." )); |
33 | |
34 | static cl::opt<unsigned> MinCallCountForCGMatching( |
35 | "min-call-count-for-cg-matching" , cl::Hidden, cl::init(Val: 3), |
36 | cl::desc("The minimum number of call anchors required for a function to " |
37 | "run stale profile call graph matching." )); |
38 | |
39 | extern cl::opt<bool> SalvageStaleProfile; |
40 | extern cl::opt<bool> SalvageUnusedProfile; |
41 | extern cl::opt<bool> PersistProfileStaleness; |
42 | extern cl::opt<bool> ReportProfileStaleness; |
43 | |
44 | static cl::opt<unsigned> SalvageStaleProfileMaxCallsites( |
45 | "salvage-stale-profile-max-callsites" , cl::Hidden, cl::init(UINT_MAX), |
46 | cl::desc("The maximum number of callsites in a function, above which stale " |
47 | "profile matching will be skipped." )); |
48 | |
49 | void SampleProfileMatcher::findIRAnchors(const Function &F, |
50 | AnchorMap &IRAnchors) const { |
51 | // For inlined code, recover the original callsite and callee by finding the |
52 | // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the |
53 | // top-level frame is "main:1", the callsite is "1" and the callee is "foo". |
54 | auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) { |
55 | assert((DIL && DIL->getInlinedAt()) && "No inlined callsite" ); |
56 | const DILocation *PrevDIL = nullptr; |
57 | do { |
58 | PrevDIL = DIL; |
59 | DIL = DIL->getInlinedAt(); |
60 | } while (DIL->getInlinedAt()); |
61 | |
62 | LineLocation Callsite = FunctionSamples::getCallSiteIdentifier( |
63 | DIL, ProfileIsFS: FunctionSamples::ProfileIsFS); |
64 | StringRef CalleeName = PrevDIL->getSubprogramLinkageName(); |
65 | return std::make_pair(x&: Callsite, y: FunctionId(CalleeName)); |
66 | }; |
67 | |
68 | auto GetCanonicalCalleeName = [](const CallBase *CB) { |
69 | StringRef CalleeName = UnknownIndirectCallee; |
70 | if (Function *Callee = CB->getCalledFunction()) |
71 | CalleeName = FunctionSamples::getCanonicalFnName(FnName: Callee->getName()); |
72 | return CalleeName; |
73 | }; |
74 | |
75 | // Extract profile matching anchors in the IR. |
76 | for (auto &BB : F) { |
77 | for (auto &I : BB) { |
78 | DILocation *DIL = I.getDebugLoc(); |
79 | if (!DIL) |
80 | continue; |
81 | |
82 | if (FunctionSamples::ProfileIsProbeBased) { |
83 | if (auto Probe = extractProbe(Inst: I)) { |
84 | // Flatten inlined IR for the matching. |
85 | if (DIL->getInlinedAt()) { |
86 | IRAnchors.emplace(args: FindTopLevelInlinedCallsite(DIL)); |
87 | } else { |
88 | // Use empty StringRef for basic block probe. |
89 | StringRef CalleeName; |
90 | if (const auto *CB = dyn_cast<CallBase>(Val: &I)) { |
91 | // Skip the probe inst whose callee name is "llvm.pseudoprobe". |
92 | if (!isa<IntrinsicInst>(Val: &I)) |
93 | CalleeName = GetCanonicalCalleeName(CB); |
94 | } |
95 | LineLocation Loc = LineLocation(Probe->Id, 0); |
96 | IRAnchors.emplace(args&: Loc, args: FunctionId(CalleeName)); |
97 | } |
98 | } |
99 | } else { |
100 | // TODO: For line-number based profile(AutoFDO), currently only support |
101 | // find callsite anchors. In future, we need to parse all the non-call |
102 | // instructions to extract the line locations for profile matching. |
103 | if (!isa<CallBase>(Val: &I) || isa<IntrinsicInst>(Val: &I)) |
104 | continue; |
105 | |
106 | if (DIL->getInlinedAt()) { |
107 | IRAnchors.emplace(args: FindTopLevelInlinedCallsite(DIL)); |
108 | } else { |
109 | LineLocation Callsite = FunctionSamples::getCallSiteIdentifier( |
110 | DIL, ProfileIsFS: FunctionSamples::ProfileIsFS); |
111 | StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(Val: &I)); |
112 | IRAnchors.emplace(args&: Callsite, args: FunctionId(CalleeName)); |
113 | } |
114 | } |
115 | } |
116 | } |
117 | } |
118 | |
119 | void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS, |
120 | AnchorMap &ProfileAnchors) const { |
121 | auto isInvalidLineOffset = [](uint32_t LineOffset) { |
122 | return LineOffset & 0x8000; |
123 | }; |
124 | |
125 | auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName, |
126 | AnchorMap &ProfileAnchors) { |
127 | auto Ret = ProfileAnchors.try_emplace(k: Loc, args: CalleeName); |
128 | if (!Ret.second) { |
129 | // For multiple callees, which indicates it's an indirect call, we use a |
130 | // dummy name(UnknownIndirectCallee) as the indrect callee name. |
131 | Ret.first->second = FunctionId(UnknownIndirectCallee); |
132 | } |
133 | }; |
134 | |
135 | for (const auto &I : FS.getBodySamples()) { |
136 | const LineLocation &Loc = I.first; |
137 | if (isInvalidLineOffset(Loc.LineOffset)) |
138 | continue; |
139 | for (const auto &C : I.second.getCallTargets()) |
140 | InsertAnchor(Loc, C.first, ProfileAnchors); |
141 | } |
142 | |
143 | for (const auto &I : FS.getCallsiteSamples()) { |
144 | const LineLocation &Loc = I.first; |
145 | if (isInvalidLineOffset(Loc.LineOffset)) |
146 | continue; |
147 | for (const auto &C : I.second) |
148 | InsertAnchor(Loc, C.first, ProfileAnchors); |
149 | } |
150 | } |
151 | |
152 | bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName, |
153 | Function *&FuncWithoutProfile) { |
154 | FuncWithoutProfile = nullptr; |
155 | auto R = FunctionsWithoutProfile.find(Key: IRFuncName); |
156 | if (R != FunctionsWithoutProfile.end()) |
157 | FuncWithoutProfile = R->second; |
158 | return !FuncWithoutProfile; |
159 | } |
160 | |
161 | bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) { |
162 | return SymbolMap->find(Key: ProfileFuncName) == SymbolMap->end(); |
163 | } |
164 | |
165 | bool SampleProfileMatcher::functionMatchesProfile( |
166 | const FunctionId &IRFuncName, const FunctionId &ProfileFuncName, |
167 | bool FindMatchedProfileOnly) { |
168 | if (IRFuncName == ProfileFuncName) |
169 | return true; |
170 | if (!SalvageUnusedProfile) |
171 | return false; |
172 | |
173 | // If IR function doesn't have profile and the profile is unused, try |
174 | // matching them. |
175 | Function *IRFunc = nullptr; |
176 | if (functionHasProfile(IRFuncName, FuncWithoutProfile&: IRFunc) || |
177 | !isProfileUnused(ProfileFuncName)) |
178 | return false; |
179 | |
180 | assert(FunctionId(IRFunc->getName()) != ProfileFuncName && |
181 | "IR function should be different from profile function to match" ); |
182 | return functionMatchesProfile(IRFunc&: *IRFunc, ProfFunc: ProfileFuncName, |
183 | FindMatchedProfileOnly); |
184 | } |
185 | |
186 | LocToLocMap |
187 | SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1, |
188 | const AnchorList &AnchorList2, |
189 | bool MatchUnusedFunction) { |
190 | int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(), |
191 | MaxDepth = Size1 + Size2; |
192 | auto Index = [&](int32_t I) { return I + MaxDepth; }; |
193 | |
194 | LocToLocMap EqualLocations; |
195 | if (MaxDepth == 0) |
196 | return EqualLocations; |
197 | |
198 | // Backtrack the SES result. |
199 | auto Backtrack = [&](const std::vector<std::vector<int32_t>> &Trace, |
200 | const AnchorList &AnchorList1, |
201 | const AnchorList &AnchorList2, |
202 | LocToLocMap &EqualLocations) { |
203 | int32_t X = Size1, Y = Size2; |
204 | for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) { |
205 | const auto &P = Trace[Depth]; |
206 | int32_t K = X - Y; |
207 | int32_t PrevK = K; |
208 | if (K == -Depth || (K != Depth && P[Index(K - 1)] < P[Index(K + 1)])) |
209 | PrevK = K + 1; |
210 | else |
211 | PrevK = K - 1; |
212 | |
213 | int32_t PrevX = P[Index(PrevK)]; |
214 | int32_t PrevY = PrevX - PrevK; |
215 | while (X > PrevX && Y > PrevY) { |
216 | X--; |
217 | Y--; |
218 | EqualLocations.insert(x: {AnchorList1[X].first, AnchorList2[Y].first}); |
219 | } |
220 | |
221 | if (Depth == 0) |
222 | break; |
223 | |
224 | if (Y == PrevY) |
225 | X--; |
226 | else if (X == PrevX) |
227 | Y--; |
228 | X = PrevX; |
229 | Y = PrevY; |
230 | } |
231 | }; |
232 | |
233 | // The greedy LCS/SES algorithm. |
234 | |
235 | // An array contains the endpoints of the furthest reaching D-paths. |
236 | std::vector<int32_t> V(2 * MaxDepth + 1, -1); |
237 | V[Index(1)] = 0; |
238 | // Trace is used to backtrack the SES result. |
239 | std::vector<std::vector<int32_t>> Trace; |
240 | for (int32_t Depth = 0; Depth <= MaxDepth; Depth++) { |
241 | Trace.push_back(x: V); |
242 | for (int32_t K = -Depth; K <= Depth; K += 2) { |
243 | int32_t X = 0, Y = 0; |
244 | if (K == -Depth || (K != Depth && V[Index(K - 1)] < V[Index(K + 1)])) |
245 | X = V[Index(K + 1)]; |
246 | else |
247 | X = V[Index(K - 1)] + 1; |
248 | Y = X - K; |
249 | while (X < Size1 && Y < Size2 && |
250 | functionMatchesProfile( |
251 | IRFuncName: AnchorList1[X].second, ProfileFuncName: AnchorList2[Y].second, |
252 | FindMatchedProfileOnly: !MatchUnusedFunction /* Find matched function only */)) |
253 | X++, Y++; |
254 | |
255 | V[Index(K)] = X; |
256 | |
257 | if (X >= Size1 && Y >= Size2) { |
258 | // Length of an SES is D. |
259 | Backtrack(Trace, AnchorList1, AnchorList2, EqualLocations); |
260 | return EqualLocations; |
261 | } |
262 | } |
263 | } |
264 | // Length of an SES is greater than MaxDepth. |
265 | return EqualLocations; |
266 | } |
267 | |
268 | void SampleProfileMatcher::matchNonCallsiteLocs( |
269 | const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors, |
270 | LocToLocMap &IRToProfileLocationMap) { |
271 | auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) { |
272 | // Skip the unchanged location mapping to save memory. |
273 | if (From != To) |
274 | IRToProfileLocationMap.insert(x: {From, To}); |
275 | }; |
276 | |
277 | // Use function's beginning location as the initial anchor. |
278 | int32_t LocationDelta = 0; |
279 | SmallVector<LineLocation> LastMatchedNonAnchors; |
280 | for (const auto &IR : IRAnchors) { |
281 | const auto &Loc = IR.first; |
282 | bool IsMatchedAnchor = false; |
283 | // Match the anchor location in lexical order. |
284 | auto R = MatchedAnchors.find(x: Loc); |
285 | if (R != MatchedAnchors.end()) { |
286 | const auto &Candidate = R->second; |
287 | InsertMatching(Loc, Candidate); |
288 | LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef() |
289 | << " is matched from " << Loc << " to " << Candidate |
290 | << "\n" ); |
291 | LocationDelta = Candidate.LineOffset - Loc.LineOffset; |
292 | |
293 | // Match backwards for non-anchor locations. |
294 | // The locations in LastMatchedNonAnchors have been matched forwards |
295 | // based on the previous anchor, spilt it evenly and overwrite the |
296 | // second half based on the current anchor. |
297 | for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2; |
298 | I < LastMatchedNonAnchors.size(); I++) { |
299 | const auto &L = LastMatchedNonAnchors[I]; |
300 | uint32_t CandidateLineOffset = L.LineOffset + LocationDelta; |
301 | LineLocation Candidate(CandidateLineOffset, L.Discriminator); |
302 | InsertMatching(L, Candidate); |
303 | LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L |
304 | << " to " << Candidate << "\n" ); |
305 | } |
306 | |
307 | IsMatchedAnchor = true; |
308 | LastMatchedNonAnchors.clear(); |
309 | } |
310 | |
311 | // Match forwards for non-anchor locations. |
312 | if (!IsMatchedAnchor) { |
313 | uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta; |
314 | LineLocation Candidate(CandidateLineOffset, Loc.Discriminator); |
315 | InsertMatching(Loc, Candidate); |
316 | LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to " |
317 | << Candidate << "\n" ); |
318 | LastMatchedNonAnchors.emplace_back(Args: Loc); |
319 | } |
320 | } |
321 | } |
322 | |
323 | // Filter the non-call locations from IRAnchors and ProfileAnchors and write |
324 | // them into a list for random access later. |
325 | void SampleProfileMatcher::getFilteredAnchorList( |
326 | const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors, |
327 | AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) { |
328 | for (const auto &I : IRAnchors) { |
329 | if (I.second.stringRef().empty()) |
330 | continue; |
331 | FilteredIRAnchorsList.emplace_back(args: I); |
332 | } |
333 | |
334 | for (const auto &I : ProfileAnchors) |
335 | FilteredProfileAnchorList.emplace_back(args: I); |
336 | } |
337 | |
338 | // Call target name anchor based profile fuzzy matching. |
339 | // Input: |
340 | // For IR locations, the anchor is the callee name of direct callsite; For |
341 | // profile locations, it's the call target name for BodySamples or inlinee's |
342 | // profile name for CallsiteSamples. |
343 | // Matching heuristic: |
344 | // First match all the anchors using the diff algorithm, then split the |
345 | // non-anchor locations between the two anchors evenly, first half are matched |
346 | // based on the start anchor, second half are matched based on the end anchor. |
347 | // For example, given: |
348 | // IR locations: [1, 2(foo), 3, 5, 6(bar), 7] |
349 | // Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9] |
350 | // The matching gives: |
351 | // [1, 2(foo), 3, 5, 6(bar), 7] |
352 | // | | | | | | |
353 | // [1, 2, 3(foo), 4, 7, 8(bar), 9] |
354 | // The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9]. |
355 | void SampleProfileMatcher::runStaleProfileMatching( |
356 | const Function &F, const AnchorMap &IRAnchors, |
357 | const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap, |
358 | bool RunCFGMatching, bool RunCGMatching) { |
359 | if (!RunCFGMatching && !RunCGMatching) |
360 | return; |
361 | LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName() |
362 | << "\n" ); |
363 | assert(IRToProfileLocationMap.empty() && |
364 | "Run stale profile matching only once per function" ); |
365 | |
366 | AnchorList FilteredProfileAnchorList; |
367 | AnchorList FilteredIRAnchorsList; |
368 | getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList, |
369 | FilteredProfileAnchorList); |
370 | |
371 | if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty()) |
372 | return; |
373 | |
374 | if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites || |
375 | FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) { |
376 | LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName() |
377 | << " because the number of callsites in the IR is " |
378 | << FilteredIRAnchorsList.size() |
379 | << " and in the profile is " |
380 | << FilteredProfileAnchorList.size() << "\n" ); |
381 | return; |
382 | } |
383 | |
384 | // Match the callsite anchors by finding the longest common subsequence |
385 | // between IR and profile. |
386 | // Define a match between two anchors as follows: |
387 | // 1) The function names of anchors are the same. |
388 | // 2) The similarity between the anchor functions is above a threshold if |
389 | // RunCGMatching is set. |
390 | // For 2), we only consider the anchor functions from IR and profile don't |
391 | // appear on either side to reduce the matching scope. Note that we need to |
392 | // use IR anchor as base(A side) to align with the order of |
393 | // IRToProfileLocationMap. |
394 | LocToLocMap MatchedAnchors = |
395 | longestCommonSequence(AnchorList1: FilteredIRAnchorsList, AnchorList2: FilteredProfileAnchorList, |
396 | MatchUnusedFunction: RunCGMatching /* Match unused functions */); |
397 | |
398 | // CFG level matching: |
399 | // Apply the callsite matchings to infer matching for the basic |
400 | // block(non-callsite) locations and write the result to |
401 | // IRToProfileLocationMap. |
402 | if (RunCFGMatching) |
403 | matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap); |
404 | } |
405 | |
406 | void SampleProfileMatcher::runOnFunction(Function &F) { |
407 | // We need to use flattened function samples for matching. |
408 | // Unlike IR, which includes all callsites from the source code, the callsites |
409 | // in profile only show up when they are hit by samples, i,e. the profile |
410 | // callsites in one context may differ from those in another context. To get |
411 | // the maximum number of callsites, we merge the function profiles from all |
412 | // contexts, aka, the flattened profile to find profile anchors. |
413 | const auto *FSFlattened = getFlattenedSamplesFor(F); |
414 | if (SalvageUnusedProfile && !FSFlattened) { |
415 | // Apply the matching in place to find the new function's matched profile. |
416 | // TODO: For extended profile format, if a function profile is unused and |
417 | // it's top-level, even if the profile is matched, it's not found in the |
418 | // profile. This is because sample reader only read the used profile at the |
419 | // beginning, we need to support loading the profile on-demand in future. |
420 | auto R = FuncToProfileNameMap.find(x: &F); |
421 | if (R != FuncToProfileNameMap.end()) |
422 | FSFlattened = getFlattenedSamplesFor(Fname: R->second); |
423 | } |
424 | if (!FSFlattened) |
425 | return; |
426 | |
427 | // Anchors for IR. It's a map from IR location to callee name, callee name is |
428 | // empty for non-call instruction and use a dummy name(UnknownIndirectCallee) |
429 | // for unknown indrect callee name. |
430 | AnchorMap IRAnchors; |
431 | findIRAnchors(F, IRAnchors); |
432 | // Anchors for profile. It's a map from callsite location to a set of callee |
433 | // name. |
434 | AnchorMap ProfileAnchors; |
435 | findProfileAnchors(FS: *FSFlattened, ProfileAnchors); |
436 | |
437 | // Compute the callsite match states for profile staleness report. |
438 | if (ReportProfileStaleness || PersistProfileStaleness) |
439 | recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap: nullptr); |
440 | |
441 | if (!SalvageStaleProfile) |
442 | return; |
443 | // For probe-based profiles, run matching only when profile checksum is |
444 | // mismatched. |
445 | bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased && |
446 | !ProbeManager->profileIsValid(F, Samples: *FSFlattened); |
447 | bool RunCFGMatching = |
448 | !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch; |
449 | bool RunCGMatching = SalvageUnusedProfile; |
450 | // For imported functions, the checksum metadata(pseudo_probe_desc) are |
451 | // dropped, so we leverage function attribute(profile-checksum-mismatch) to |
452 | // transfer the info: add the attribute during pre-link phase and check it |
453 | // during post-link phase(see "profileIsValid"). |
454 | if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) |
455 | F.addFnAttr(Kind: "profile-checksum-mismatch" ); |
456 | |
457 | // The matching result will be saved to IRToProfileLocationMap, create a |
458 | // new map for each function. |
459 | auto &IRToProfileLocationMap = getIRToProfileLocationMap(F); |
460 | runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap, |
461 | RunCFGMatching, RunCGMatching); |
462 | // Find and update callsite match states after matching. |
463 | if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness)) |
464 | recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, |
465 | IRToProfileLocationMap: &IRToProfileLocationMap); |
466 | } |
467 | |
468 | void SampleProfileMatcher::recordCallsiteMatchStates( |
469 | const Function &F, const AnchorMap &IRAnchors, |
470 | const AnchorMap &ProfileAnchors, |
471 | const LocToLocMap *IRToProfileLocationMap) { |
472 | bool IsPostMatch = IRToProfileLocationMap != nullptr; |
473 | auto &CallsiteMatchStates = |
474 | FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(FnName: F.getName())]; |
475 | |
476 | auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) { |
477 | // IRToProfileLocationMap is null in pre-match phrase. |
478 | if (!IRToProfileLocationMap) |
479 | return IRLoc; |
480 | const auto &ProfileLoc = IRToProfileLocationMap->find(x: IRLoc); |
481 | if (ProfileLoc != IRToProfileLocationMap->end()) |
482 | return ProfileLoc->second; |
483 | else |
484 | return IRLoc; |
485 | }; |
486 | |
487 | for (const auto &I : IRAnchors) { |
488 | // After fuzzy profile matching, use the matching result to remap the |
489 | // current IR callsite. |
490 | const auto &ProfileLoc = MapIRLocToProfileLoc(I.first); |
491 | const auto &IRCalleeId = I.second; |
492 | const auto &It = ProfileAnchors.find(x: ProfileLoc); |
493 | if (It == ProfileAnchors.end()) |
494 | continue; |
495 | const auto &ProfCalleeId = It->second; |
496 | if (IRCalleeId == ProfCalleeId) { |
497 | auto It = CallsiteMatchStates.find(x: ProfileLoc); |
498 | if (It == CallsiteMatchStates.end()) |
499 | CallsiteMatchStates.emplace(args: ProfileLoc, args: MatchState::InitialMatch); |
500 | else if (IsPostMatch) { |
501 | if (It->second == MatchState::InitialMatch) |
502 | It->second = MatchState::UnchangedMatch; |
503 | else if (It->second == MatchState::InitialMismatch) |
504 | It->second = MatchState::RecoveredMismatch; |
505 | } |
506 | } |
507 | } |
508 | |
509 | // Check if there are any callsites in the profile that does not match to any |
510 | // IR callsites. |
511 | for (const auto &I : ProfileAnchors) { |
512 | const auto &Loc = I.first; |
513 | assert(!I.second.stringRef().empty() && "Callees should not be empty" ); |
514 | auto It = CallsiteMatchStates.find(x: Loc); |
515 | if (It == CallsiteMatchStates.end()) |
516 | CallsiteMatchStates.emplace(args: Loc, args: MatchState::InitialMismatch); |
517 | else if (IsPostMatch) { |
518 | // Update the state if it's not matched(UnchangedMatch or |
519 | // RecoveredMismatch). |
520 | if (It->second == MatchState::InitialMismatch) |
521 | It->second = MatchState::UnchangedMismatch; |
522 | else if (It->second == MatchState::InitialMatch) |
523 | It->second = MatchState::RemovedMatch; |
524 | } |
525 | } |
526 | } |
527 | |
528 | void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS, |
529 | bool IsTopLevel) { |
530 | const auto *FuncDesc = ProbeManager->getDesc(GUID: FS.getGUID()); |
531 | // Skip the function that is external or renamed. |
532 | if (!FuncDesc) |
533 | return; |
534 | |
535 | if (ProbeManager->profileIsHashMismatched(FuncDesc: *FuncDesc, Samples: FS)) { |
536 | if (IsTopLevel) |
537 | NumStaleProfileFunc++; |
538 | // Given currently all probe ids are after block probe ids, once the |
539 | // checksum is mismatched, it's likely all the callites are mismatched and |
540 | // dropped. We conservatively count all the samples as mismatched and stop |
541 | // counting the inlinees' profiles. |
542 | MismatchedFunctionSamples += FS.getTotalSamples(); |
543 | return; |
544 | } |
545 | |
546 | // Even the current-level function checksum is matched, it's possible that the |
547 | // nested inlinees' checksums are mismatched that affect the inlinee's sample |
548 | // loading, we need to go deeper to check the inlinees' function samples. |
549 | // Similarly, count all the samples as mismatched if the inlinee's checksum is |
550 | // mismatched using this recursive function. |
551 | for (const auto &I : FS.getCallsiteSamples()) |
552 | for (const auto &CS : I.second) |
553 | countMismatchedFuncSamples(FS: CS.second, IsTopLevel: false); |
554 | } |
555 | |
556 | void SampleProfileMatcher::countMismatchedCallsiteSamples( |
557 | const FunctionSamples &FS) { |
558 | auto It = FuncCallsiteMatchStates.find(Key: FS.getFuncName()); |
559 | // Skip it if no mismatched callsite or this is an external function. |
560 | if (It == FuncCallsiteMatchStates.end() || It->second.empty()) |
561 | return; |
562 | const auto &CallsiteMatchStates = It->second; |
563 | |
564 | auto findMatchState = [&](const LineLocation &Loc) { |
565 | auto It = CallsiteMatchStates.find(x: Loc); |
566 | if (It == CallsiteMatchStates.end()) |
567 | return MatchState::Unknown; |
568 | return It->second; |
569 | }; |
570 | |
571 | auto AttributeMismatchedSamples = [&](const enum MatchState &State, |
572 | uint64_t Samples) { |
573 | if (isMismatchState(State)) |
574 | MismatchedCallsiteSamples += Samples; |
575 | else if (State == MatchState::RecoveredMismatch) |
576 | RecoveredCallsiteSamples += Samples; |
577 | }; |
578 | |
579 | // The non-inlined callsites are saved in the body samples of function |
580 | // profile, go through it to count the non-inlined callsite samples. |
581 | for (const auto &I : FS.getBodySamples()) |
582 | AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples()); |
583 | |
584 | // Count the inlined callsite samples. |
585 | for (const auto &I : FS.getCallsiteSamples()) { |
586 | auto State = findMatchState(I.first); |
587 | uint64_t CallsiteSamples = 0; |
588 | for (const auto &CS : I.second) |
589 | CallsiteSamples += CS.second.getTotalSamples(); |
590 | AttributeMismatchedSamples(State, CallsiteSamples); |
591 | |
592 | if (isMismatchState(State)) |
593 | continue; |
594 | |
595 | // When the current level of inlined call site matches the profiled call |
596 | // site, we need to go deeper along the inline tree to count mismatches from |
597 | // lower level inlinees. |
598 | for (const auto &CS : I.second) |
599 | countMismatchedCallsiteSamples(FS: CS.second); |
600 | } |
601 | } |
602 | |
603 | void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) { |
604 | auto It = FuncCallsiteMatchStates.find(Key: FS.getFuncName()); |
605 | // Skip it if no mismatched callsite or this is an external function. |
606 | if (It == FuncCallsiteMatchStates.end() || It->second.empty()) |
607 | return; |
608 | const auto &MatchStates = It->second; |
609 | [[maybe_unused]] bool OnInitialState = |
610 | isInitialState(State: MatchStates.begin()->second); |
611 | for (const auto &I : MatchStates) { |
612 | TotalProfiledCallsites++; |
613 | assert( |
614 | (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) && |
615 | "Profile matching state is inconsistent" ); |
616 | |
617 | if (isMismatchState(State: I.second)) |
618 | NumMismatchedCallsites++; |
619 | else if (I.second == MatchState::RecoveredMismatch) |
620 | NumRecoveredCallsites++; |
621 | } |
622 | } |
623 | |
624 | void SampleProfileMatcher::countCallGraphRecoveredSamples( |
625 | const FunctionSamples &FS, |
626 | std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) { |
627 | if (CallGraphRecoveredProfiles.count(x: FS.getFunction())) { |
628 | NumCallGraphRecoveredFuncSamples += FS.getTotalSamples(); |
629 | return; |
630 | } |
631 | |
632 | for (const auto &CM : FS.getCallsiteSamples()) { |
633 | for (const auto &CS : CM.second) { |
634 | countCallGraphRecoveredSamples(FS: CS.second, CallGraphRecoveredProfiles); |
635 | } |
636 | } |
637 | } |
638 | |
639 | void SampleProfileMatcher::computeAndReportProfileStaleness() { |
640 | if (!ReportProfileStaleness && !PersistProfileStaleness) |
641 | return; |
642 | |
643 | std::unordered_set<FunctionId> CallGraphRecoveredProfiles; |
644 | if (SalvageUnusedProfile) { |
645 | for (const auto &I : FuncToProfileNameMap) { |
646 | CallGraphRecoveredProfiles.insert(x: I.second); |
647 | if (GlobalValue::isAvailableExternallyLinkage(Linkage: I.first->getLinkage())) |
648 | continue; |
649 | NumCallGraphRecoveredProfiledFunc++; |
650 | } |
651 | } |
652 | |
653 | // Count profile mismatches for profile staleness report. |
654 | for (const auto &F : M) { |
655 | if (skipProfileForFunction(F)) |
656 | continue; |
657 | // As the stats will be merged by linker, skip reporting the metrics for |
658 | // imported functions to avoid repeated counting. |
659 | if (GlobalValue::isAvailableExternallyLinkage(Linkage: F.getLinkage())) |
660 | continue; |
661 | const auto *FS = Reader.getSamplesFor(F); |
662 | if (!FS) |
663 | continue; |
664 | TotalProfiledFunc++; |
665 | TotalFunctionSamples += FS->getTotalSamples(); |
666 | |
667 | if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty()) |
668 | countCallGraphRecoveredSamples(FS: *FS, CallGraphRecoveredProfiles); |
669 | |
670 | // Checksum mismatch is only used in pseudo-probe mode. |
671 | if (FunctionSamples::ProfileIsProbeBased) |
672 | countMismatchedFuncSamples(FS: *FS, IsTopLevel: true); |
673 | |
674 | // Count mismatches and samples for calliste. |
675 | countMismatchCallsites(FS: *FS); |
676 | countMismatchedCallsiteSamples(FS: *FS); |
677 | } |
678 | |
679 | if (ReportProfileStaleness) { |
680 | if (FunctionSamples::ProfileIsProbeBased) { |
681 | errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc |
682 | << ") of functions' profile are invalid and (" |
683 | << MismatchedFunctionSamples << "/" << TotalFunctionSamples |
684 | << ") of samples are discarded due to function hash mismatch.\n" ; |
685 | } |
686 | if (SalvageUnusedProfile) { |
687 | errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/" |
688 | << TotalProfiledFunc << ") of functions' profile are matched and (" |
689 | << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples |
690 | << ") of samples are reused by call graph matching.\n" ; |
691 | } |
692 | |
693 | errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/" |
694 | << TotalProfiledCallsites |
695 | << ") of callsites' profile are invalid and (" |
696 | << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/" |
697 | << TotalFunctionSamples |
698 | << ") of samples are discarded due to callsite location mismatch.\n" ; |
699 | errs() << "(" << NumRecoveredCallsites << "/" |
700 | << (NumRecoveredCallsites + NumMismatchedCallsites) |
701 | << ") of callsites and (" << RecoveredCallsiteSamples << "/" |
702 | << (RecoveredCallsiteSamples + MismatchedCallsiteSamples) |
703 | << ") of samples are recovered by stale profile matching.\n" ; |
704 | } |
705 | |
706 | if (PersistProfileStaleness) { |
707 | LLVMContext &Ctx = M.getContext(); |
708 | MDBuilder MDB(Ctx); |
709 | |
710 | SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec; |
711 | if (FunctionSamples::ProfileIsProbeBased) { |
712 | ProfStatsVec.emplace_back(Args: "NumStaleProfileFunc" , Args&: NumStaleProfileFunc); |
713 | ProfStatsVec.emplace_back(Args: "TotalProfiledFunc" , Args&: TotalProfiledFunc); |
714 | ProfStatsVec.emplace_back(Args: "MismatchedFunctionSamples" , |
715 | Args&: MismatchedFunctionSamples); |
716 | ProfStatsVec.emplace_back(Args: "TotalFunctionSamples" , Args&: TotalFunctionSamples); |
717 | } |
718 | |
719 | if (SalvageUnusedProfile) { |
720 | ProfStatsVec.emplace_back(Args: "NumCallGraphRecoveredProfiledFunc" , |
721 | Args&: NumCallGraphRecoveredProfiledFunc); |
722 | ProfStatsVec.emplace_back(Args: "NumCallGraphRecoveredFuncSamples" , |
723 | Args&: NumCallGraphRecoveredFuncSamples); |
724 | } |
725 | |
726 | ProfStatsVec.emplace_back(Args: "NumMismatchedCallsites" , Args&: NumMismatchedCallsites); |
727 | ProfStatsVec.emplace_back(Args: "NumRecoveredCallsites" , Args&: NumRecoveredCallsites); |
728 | ProfStatsVec.emplace_back(Args: "TotalProfiledCallsites" , Args&: TotalProfiledCallsites); |
729 | ProfStatsVec.emplace_back(Args: "MismatchedCallsiteSamples" , |
730 | Args&: MismatchedCallsiteSamples); |
731 | ProfStatsVec.emplace_back(Args: "RecoveredCallsiteSamples" , |
732 | Args&: RecoveredCallsiteSamples); |
733 | |
734 | auto *MD = MDB.createLLVMStats(LLVMStatsVec: ProfStatsVec); |
735 | auto *NMD = M.getOrInsertNamedMetadata(Name: "llvm.stats" ); |
736 | NMD->addOperand(M: MD); |
737 | } |
738 | } |
739 | |
740 | void SampleProfileMatcher::findFunctionsWithoutProfile() { |
741 | // TODO: Support MD5 profile. |
742 | if (FunctionSamples::UseMD5) |
743 | return; |
744 | StringSet<> NamesInProfile; |
745 | if (auto NameTable = Reader.getNameTable()) { |
746 | for (auto Name : *NameTable) |
747 | NamesInProfile.insert(key: Name.stringRef()); |
748 | } |
749 | |
750 | for (auto &F : M) { |
751 | // Skip declarations, as even if the function can be matched, we have |
752 | // nothing to do with it. |
753 | if (F.isDeclaration()) |
754 | continue; |
755 | |
756 | StringRef CanonFName = FunctionSamples::getCanonicalFnName(FnName: F.getName()); |
757 | const auto *FS = getFlattenedSamplesFor(F); |
758 | if (FS) |
759 | continue; |
760 | |
761 | // For extended binary, functions fully inlined may not be loaded in the |
762 | // top-level profile, so check the NameTable which has the all symbol names |
763 | // in profile. |
764 | if (NamesInProfile.count(Key: CanonFName)) |
765 | continue; |
766 | |
767 | // For extended binary, non-profiled function symbols are in the profile |
768 | // symbol list table. |
769 | if (PSL && PSL->contains(Name: CanonFName)) |
770 | continue; |
771 | |
772 | LLVM_DEBUG(dbgs() << "Function " << CanonFName |
773 | << " is not in profile or profile symbol list.\n" ); |
774 | FunctionsWithoutProfile[FunctionId(CanonFName)] = &F; |
775 | } |
776 | } |
777 | |
778 | bool SampleProfileMatcher::functionMatchesProfileHelper( |
779 | const Function &IRFunc, const FunctionId &ProfFunc) { |
780 | // The value is in the range [0, 1]. The bigger the value is, the more similar |
781 | // two sequences are. |
782 | float Similarity = 0.0; |
783 | |
784 | const auto *FSFlattened = getFlattenedSamplesFor(Fname: ProfFunc); |
785 | if (!FSFlattened) |
786 | return false; |
787 | // The check for similarity or checksum may not be reliable if the function is |
788 | // tiny, we use the number of basic block as a proxy for the function |
789 | // complexity and skip the matching if it's too small. |
790 | if (IRFunc.size() < MinFuncCountForCGMatching || |
791 | FSFlattened->getBodySamples().size() < MinFuncCountForCGMatching) |
792 | return false; |
793 | |
794 | // For probe-based function, we first trust the checksum info. If the checksum |
795 | // doesn't match, we continue checking for similarity. |
796 | if (FunctionSamples::ProfileIsProbeBased) { |
797 | const auto *FuncDesc = ProbeManager->getDesc(F: IRFunc); |
798 | if (FuncDesc && |
799 | !ProbeManager->profileIsHashMismatched(FuncDesc: *FuncDesc, Samples: *FSFlattened)) { |
800 | LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName() |
801 | << "(IR) and " << ProfFunc << "(Profile) match.\n" ); |
802 | |
803 | return true; |
804 | } |
805 | } |
806 | |
807 | AnchorMap IRAnchors; |
808 | findIRAnchors(F: IRFunc, IRAnchors); |
809 | AnchorMap ProfileAnchors; |
810 | findProfileAnchors(FS: *FSFlattened, ProfileAnchors); |
811 | |
812 | AnchorList FilteredIRAnchorsList; |
813 | AnchorList FilteredProfileAnchorList; |
814 | getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList, |
815 | FilteredProfileAnchorList); |
816 | |
817 | // Similarly skip the matching if the num of anchors is not enough. |
818 | if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching || |
819 | FilteredProfileAnchorList.size() < MinCallCountForCGMatching) |
820 | return false; |
821 | |
822 | // Use the diff algorithm to find the LCS between IR and profile. |
823 | |
824 | // Don't recursively match the callee function to avoid infinite matching, |
825 | // callee functions will be handled later since it's processed in top-down |
826 | // order . |
827 | LocToLocMap MatchedAnchors = |
828 | longestCommonSequence(AnchorList1: FilteredIRAnchorsList, AnchorList2: FilteredProfileAnchorList, |
829 | MatchUnusedFunction: false /* Match unused functions */); |
830 | |
831 | Similarity = |
832 | static_cast<float>(MatchedAnchors.size()) * 2 / |
833 | (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size()); |
834 | |
835 | LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName() |
836 | << "(IR) and " << ProfFunc << "(profile) is " |
837 | << format("%.2f" , Similarity) << "\n" ); |
838 | assert((Similarity >= 0 && Similarity <= 1.0) && |
839 | "Similarity value should be in [0, 1]" ); |
840 | return Similarity * 100 > FuncProfileSimilarityThreshold; |
841 | } |
842 | |
843 | // If FindMatchedProfileOnly is set to true, only use the processed function |
844 | // results. This is used for skipping the repeated recursive matching. |
845 | bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc, |
846 | const FunctionId &ProfFunc, |
847 | bool FindMatchedProfileOnly) { |
848 | auto R = FuncProfileMatchCache.find(x: {&IRFunc, ProfFunc}); |
849 | if (R != FuncProfileMatchCache.end()) |
850 | return R->second; |
851 | |
852 | if (FindMatchedProfileOnly) |
853 | return false; |
854 | |
855 | bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc); |
856 | FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched; |
857 | if (Matched) { |
858 | FuncToProfileNameMap[&IRFunc] = ProfFunc; |
859 | LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName() |
860 | << " matches profile:" << ProfFunc << "\n" ); |
861 | } |
862 | |
863 | return Matched; |
864 | } |
865 | |
866 | void SampleProfileMatcher::runOnModule() { |
867 | ProfileConverter::flattenProfile(InputProfiles: Reader.getProfiles(), OutputProfiles&: FlattenedProfiles, |
868 | ProfileIsCS: FunctionSamples::ProfileIsCS); |
869 | if (SalvageUnusedProfile) |
870 | findFunctionsWithoutProfile(); |
871 | |
872 | // Process the matching in top-down order so that the caller matching result |
873 | // can be used to the callee matching. |
874 | std::vector<Function *> TopDownFunctionList; |
875 | TopDownFunctionList.reserve(n: M.size()); |
876 | buildTopDownFuncOrder(CG, FunctionOrderList&: TopDownFunctionList); |
877 | for (auto *F : TopDownFunctionList) { |
878 | if (skipProfileForFunction(F: *F)) |
879 | continue; |
880 | runOnFunction(F&: *F); |
881 | } |
882 | |
883 | // Update the data in SampleLoader. |
884 | if (SalvageUnusedProfile) |
885 | for (auto &I : FuncToProfileNameMap) { |
886 | assert(I.first && "New function is null" ); |
887 | FunctionId FuncName(I.first->getName()); |
888 | FuncNameToProfNameMap->emplace(Args&: FuncName, Args&: I.second); |
889 | // We need to remove the old entry to avoid duplicating the function |
890 | // processing. |
891 | SymbolMap->erase(Ctx: FuncName); |
892 | SymbolMap->emplace(Args&: I.second, Args: I.first); |
893 | } |
894 | |
895 | if (SalvageStaleProfile) |
896 | distributeIRToProfileLocationMap(); |
897 | |
898 | computeAndReportProfileStaleness(); |
899 | } |
900 | |
901 | void SampleProfileMatcher::distributeIRToProfileLocationMap( |
902 | FunctionSamples &FS) { |
903 | const auto ProfileMappings = FuncMappings.find(Key: FS.getFuncName()); |
904 | if (ProfileMappings != FuncMappings.end()) { |
905 | FS.setIRToProfileLocationMap(&(ProfileMappings->second)); |
906 | } |
907 | |
908 | for (auto &Callees : |
909 | const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) { |
910 | for (auto &FS : Callees.second) { |
911 | distributeIRToProfileLocationMap(FS&: FS.second); |
912 | } |
913 | } |
914 | } |
915 | |
916 | // Use a central place to distribute the matching results. Outlined and inlined |
917 | // profile with the function name will be set to the same pointer. |
918 | void SampleProfileMatcher::distributeIRToProfileLocationMap() { |
919 | for (auto &I : Reader.getProfiles()) { |
920 | distributeIRToProfileLocationMap(FS&: I.second); |
921 | } |
922 | } |
923 | |