1//===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SampleProfileMatcher used for stale
10// profile matching.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Transforms/IPO/SampleProfileMatcher.h"
15#include "llvm/IR/IntrinsicInst.h"
16#include "llvm/IR/MDBuilder.h"
17#include "llvm/Support/CommandLine.h"
18
19using namespace llvm;
20using namespace sampleprof;
21
22#define DEBUG_TYPE "sample-profile-matcher"
23
24static cl::opt<unsigned> FuncProfileSimilarityThreshold(
25 "func-profile-similarity-threshold", cl::Hidden, cl::init(Val: 80),
26 cl::desc("Consider a profile matches a function if the similarity of their "
27 "callee sequences is above the specified percentile."));
28
29static cl::opt<unsigned> MinFuncCountForCGMatching(
30 "min-func-count-for-cg-matching", cl::Hidden, cl::init(Val: 5),
31 cl::desc("The minimum number of basic blocks required for a function to "
32 "run stale profile call graph matching."));
33
34static cl::opt<unsigned> MinCallCountForCGMatching(
35 "min-call-count-for-cg-matching", cl::Hidden, cl::init(Val: 3),
36 cl::desc("The minimum number of call anchors required for a function to "
37 "run stale profile call graph matching."));
38
39extern cl::opt<bool> SalvageStaleProfile;
40extern cl::opt<bool> SalvageUnusedProfile;
41extern cl::opt<bool> PersistProfileStaleness;
42extern cl::opt<bool> ReportProfileStaleness;
43
44static cl::opt<unsigned> SalvageStaleProfileMaxCallsites(
45 "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX),
46 cl::desc("The maximum number of callsites in a function, above which stale "
47 "profile matching will be skipped."));
48
49void SampleProfileMatcher::findIRAnchors(const Function &F,
50 AnchorMap &IRAnchors) const {
51 // For inlined code, recover the original callsite and callee by finding the
52 // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
53 // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
54 auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {
55 assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");
56 const DILocation *PrevDIL = nullptr;
57 do {
58 PrevDIL = DIL;
59 DIL = DIL->getInlinedAt();
60 } while (DIL->getInlinedAt());
61
62 LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
63 DIL, ProfileIsFS: FunctionSamples::ProfileIsFS);
64 StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
65 return std::make_pair(x&: Callsite, y: FunctionId(CalleeName));
66 };
67
68 auto GetCanonicalCalleeName = [](const CallBase *CB) {
69 StringRef CalleeName = UnknownIndirectCallee;
70 if (Function *Callee = CB->getCalledFunction())
71 CalleeName = FunctionSamples::getCanonicalFnName(FnName: Callee->getName());
72 return CalleeName;
73 };
74
75 // Extract profile matching anchors in the IR.
76 for (auto &BB : F) {
77 for (auto &I : BB) {
78 DILocation *DIL = I.getDebugLoc();
79 if (!DIL)
80 continue;
81
82 if (FunctionSamples::ProfileIsProbeBased) {
83 if (auto Probe = extractProbe(Inst: I)) {
84 // Flatten inlined IR for the matching.
85 if (DIL->getInlinedAt()) {
86 IRAnchors.emplace(args: FindTopLevelInlinedCallsite(DIL));
87 } else {
88 // Use empty StringRef for basic block probe.
89 StringRef CalleeName;
90 if (const auto *CB = dyn_cast<CallBase>(Val: &I)) {
91 // Skip the probe inst whose callee name is "llvm.pseudoprobe".
92 if (!isa<IntrinsicInst>(Val: &I))
93 CalleeName = GetCanonicalCalleeName(CB);
94 }
95 LineLocation Loc = LineLocation(Probe->Id, 0);
96 IRAnchors.emplace(args&: Loc, args: FunctionId(CalleeName));
97 }
98 }
99 } else {
100 // TODO: For line-number based profile(AutoFDO), currently only support
101 // find callsite anchors. In future, we need to parse all the non-call
102 // instructions to extract the line locations for profile matching.
103 if (!isa<CallBase>(Val: &I) || isa<IntrinsicInst>(Val: &I))
104 continue;
105
106 if (DIL->getInlinedAt()) {
107 IRAnchors.emplace(args: FindTopLevelInlinedCallsite(DIL));
108 } else {
109 LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
110 DIL, ProfileIsFS: FunctionSamples::ProfileIsFS);
111 StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(Val: &I));
112 IRAnchors.emplace(args&: Callsite, args: FunctionId(CalleeName));
113 }
114 }
115 }
116 }
117}
118
119void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
120 AnchorMap &ProfileAnchors) const {
121 auto isInvalidLineOffset = [](uint32_t LineOffset) {
122 return LineOffset & 0x8000;
123 };
124
125 auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName,
126 AnchorMap &ProfileAnchors) {
127 auto Ret = ProfileAnchors.try_emplace(k: Loc, args: CalleeName);
128 if (!Ret.second) {
129 // For multiple callees, which indicates it's an indirect call, we use a
130 // dummy name(UnknownIndirectCallee) as the indrect callee name.
131 Ret.first->second = FunctionId(UnknownIndirectCallee);
132 }
133 };
134
135 for (const auto &I : FS.getBodySamples()) {
136 const LineLocation &Loc = I.first;
137 if (isInvalidLineOffset(Loc.LineOffset))
138 continue;
139 for (const auto &C : I.second.getCallTargets())
140 InsertAnchor(Loc, C.first, ProfileAnchors);
141 }
142
143 for (const auto &I : FS.getCallsiteSamples()) {
144 const LineLocation &Loc = I.first;
145 if (isInvalidLineOffset(Loc.LineOffset))
146 continue;
147 for (const auto &C : I.second)
148 InsertAnchor(Loc, C.first, ProfileAnchors);
149 }
150}
151
152bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName,
153 Function *&FuncWithoutProfile) {
154 FuncWithoutProfile = nullptr;
155 auto R = FunctionsWithoutProfile.find(Key: IRFuncName);
156 if (R != FunctionsWithoutProfile.end())
157 FuncWithoutProfile = R->second;
158 return !FuncWithoutProfile;
159}
160
161bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) {
162 return SymbolMap->find(Key: ProfileFuncName) == SymbolMap->end();
163}
164
165bool SampleProfileMatcher::functionMatchesProfile(
166 const FunctionId &IRFuncName, const FunctionId &ProfileFuncName,
167 bool FindMatchedProfileOnly) {
168 if (IRFuncName == ProfileFuncName)
169 return true;
170 if (!SalvageUnusedProfile)
171 return false;
172
173 // If IR function doesn't have profile and the profile is unused, try
174 // matching them.
175 Function *IRFunc = nullptr;
176 if (functionHasProfile(IRFuncName, FuncWithoutProfile&: IRFunc) ||
177 !isProfileUnused(ProfileFuncName))
178 return false;
179
180 assert(FunctionId(IRFunc->getName()) != ProfileFuncName &&
181 "IR function should be different from profile function to match");
182 return functionMatchesProfile(IRFunc&: *IRFunc, ProfFunc: ProfileFuncName,
183 FindMatchedProfileOnly);
184}
185
186LocToLocMap
187SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
188 const AnchorList &AnchorList2,
189 bool MatchUnusedFunction) {
190 int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(),
191 MaxDepth = Size1 + Size2;
192 auto Index = [&](int32_t I) { return I + MaxDepth; };
193
194 LocToLocMap EqualLocations;
195 if (MaxDepth == 0)
196 return EqualLocations;
197
198 // Backtrack the SES result.
199 auto Backtrack = [&](const std::vector<std::vector<int32_t>> &Trace,
200 const AnchorList &AnchorList1,
201 const AnchorList &AnchorList2,
202 LocToLocMap &EqualLocations) {
203 int32_t X = Size1, Y = Size2;
204 for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) {
205 const auto &P = Trace[Depth];
206 int32_t K = X - Y;
207 int32_t PrevK = K;
208 if (K == -Depth || (K != Depth && P[Index(K - 1)] < P[Index(K + 1)]))
209 PrevK = K + 1;
210 else
211 PrevK = K - 1;
212
213 int32_t PrevX = P[Index(PrevK)];
214 int32_t PrevY = PrevX - PrevK;
215 while (X > PrevX && Y > PrevY) {
216 X--;
217 Y--;
218 EqualLocations.insert(x: {AnchorList1[X].first, AnchorList2[Y].first});
219 }
220
221 if (Depth == 0)
222 break;
223
224 if (Y == PrevY)
225 X--;
226 else if (X == PrevX)
227 Y--;
228 X = PrevX;
229 Y = PrevY;
230 }
231 };
232
233 // The greedy LCS/SES algorithm.
234
235 // An array contains the endpoints of the furthest reaching D-paths.
236 std::vector<int32_t> V(2 * MaxDepth + 1, -1);
237 V[Index(1)] = 0;
238 // Trace is used to backtrack the SES result.
239 std::vector<std::vector<int32_t>> Trace;
240 for (int32_t Depth = 0; Depth <= MaxDepth; Depth++) {
241 Trace.push_back(x: V);
242 for (int32_t K = -Depth; K <= Depth; K += 2) {
243 int32_t X = 0, Y = 0;
244 if (K == -Depth || (K != Depth && V[Index(K - 1)] < V[Index(K + 1)]))
245 X = V[Index(K + 1)];
246 else
247 X = V[Index(K - 1)] + 1;
248 Y = X - K;
249 while (X < Size1 && Y < Size2 &&
250 functionMatchesProfile(
251 IRFuncName: AnchorList1[X].second, ProfileFuncName: AnchorList2[Y].second,
252 FindMatchedProfileOnly: !MatchUnusedFunction /* Find matched function only */))
253 X++, Y++;
254
255 V[Index(K)] = X;
256
257 if (X >= Size1 && Y >= Size2) {
258 // Length of an SES is D.
259 Backtrack(Trace, AnchorList1, AnchorList2, EqualLocations);
260 return EqualLocations;
261 }
262 }
263 }
264 // Length of an SES is greater than MaxDepth.
265 return EqualLocations;
266}
267
268void SampleProfileMatcher::matchNonCallsiteLocs(
269 const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
270 LocToLocMap &IRToProfileLocationMap) {
271 auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
272 // Skip the unchanged location mapping to save memory.
273 if (From != To)
274 IRToProfileLocationMap.insert(x: {From, To});
275 };
276
277 // Use function's beginning location as the initial anchor.
278 int32_t LocationDelta = 0;
279 SmallVector<LineLocation> LastMatchedNonAnchors;
280 for (const auto &IR : IRAnchors) {
281 const auto &Loc = IR.first;
282 bool IsMatchedAnchor = false;
283 // Match the anchor location in lexical order.
284 auto R = MatchedAnchors.find(x: Loc);
285 if (R != MatchedAnchors.end()) {
286 const auto &Candidate = R->second;
287 InsertMatching(Loc, Candidate);
288 LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()
289 << " is matched from " << Loc << " to " << Candidate
290 << "\n");
291 LocationDelta = Candidate.LineOffset - Loc.LineOffset;
292
293 // Match backwards for non-anchor locations.
294 // The locations in LastMatchedNonAnchors have been matched forwards
295 // based on the previous anchor, spilt it evenly and overwrite the
296 // second half based on the current anchor.
297 for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
298 I < LastMatchedNonAnchors.size(); I++) {
299 const auto &L = LastMatchedNonAnchors[I];
300 uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
301 LineLocation Candidate(CandidateLineOffset, L.Discriminator);
302 InsertMatching(L, Candidate);
303 LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
304 << " to " << Candidate << "\n");
305 }
306
307 IsMatchedAnchor = true;
308 LastMatchedNonAnchors.clear();
309 }
310
311 // Match forwards for non-anchor locations.
312 if (!IsMatchedAnchor) {
313 uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
314 LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
315 InsertMatching(Loc, Candidate);
316 LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
317 << Candidate << "\n");
318 LastMatchedNonAnchors.emplace_back(Args: Loc);
319 }
320 }
321}
322
323// Filter the non-call locations from IRAnchors and ProfileAnchors and write
324// them into a list for random access later.
325void SampleProfileMatcher::getFilteredAnchorList(
326 const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors,
327 AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) {
328 for (const auto &I : IRAnchors) {
329 if (I.second.stringRef().empty())
330 continue;
331 FilteredIRAnchorsList.emplace_back(args: I);
332 }
333
334 for (const auto &I : ProfileAnchors)
335 FilteredProfileAnchorList.emplace_back(args: I);
336}
337
338// Call target name anchor based profile fuzzy matching.
339// Input:
340// For IR locations, the anchor is the callee name of direct callsite; For
341// profile locations, it's the call target name for BodySamples or inlinee's
342// profile name for CallsiteSamples.
343// Matching heuristic:
344// First match all the anchors using the diff algorithm, then split the
345// non-anchor locations between the two anchors evenly, first half are matched
346// based on the start anchor, second half are matched based on the end anchor.
347// For example, given:
348// IR locations: [1, 2(foo), 3, 5, 6(bar), 7]
349// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
350// The matching gives:
351// [1, 2(foo), 3, 5, 6(bar), 7]
352// | | | | | |
353// [1, 2, 3(foo), 4, 7, 8(bar), 9]
354// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
355void SampleProfileMatcher::runStaleProfileMatching(
356 const Function &F, const AnchorMap &IRAnchors,
357 const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap,
358 bool RunCFGMatching, bool RunCGMatching) {
359 if (!RunCFGMatching && !RunCGMatching)
360 return;
361 LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
362 << "\n");
363 assert(IRToProfileLocationMap.empty() &&
364 "Run stale profile matching only once per function");
365
366 AnchorList FilteredProfileAnchorList;
367 AnchorList FilteredIRAnchorsList;
368 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
369 FilteredProfileAnchorList);
370
371 if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
372 return;
373
374 if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites ||
375 FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) {
376 LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName()
377 << " because the number of callsites in the IR is "
378 << FilteredIRAnchorsList.size()
379 << " and in the profile is "
380 << FilteredProfileAnchorList.size() << "\n");
381 return;
382 }
383
384 // Match the callsite anchors by finding the longest common subsequence
385 // between IR and profile.
386 // Define a match between two anchors as follows:
387 // 1) The function names of anchors are the same.
388 // 2) The similarity between the anchor functions is above a threshold if
389 // RunCGMatching is set.
390 // For 2), we only consider the anchor functions from IR and profile don't
391 // appear on either side to reduce the matching scope. Note that we need to
392 // use IR anchor as base(A side) to align with the order of
393 // IRToProfileLocationMap.
394 LocToLocMap MatchedAnchors =
395 longestCommonSequence(AnchorList1: FilteredIRAnchorsList, AnchorList2: FilteredProfileAnchorList,
396 MatchUnusedFunction: RunCGMatching /* Match unused functions */);
397
398 // CFG level matching:
399 // Apply the callsite matchings to infer matching for the basic
400 // block(non-callsite) locations and write the result to
401 // IRToProfileLocationMap.
402 if (RunCFGMatching)
403 matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
404}
405
406void SampleProfileMatcher::runOnFunction(Function &F) {
407 // We need to use flattened function samples for matching.
408 // Unlike IR, which includes all callsites from the source code, the callsites
409 // in profile only show up when they are hit by samples, i,e. the profile
410 // callsites in one context may differ from those in another context. To get
411 // the maximum number of callsites, we merge the function profiles from all
412 // contexts, aka, the flattened profile to find profile anchors.
413 const auto *FSFlattened = getFlattenedSamplesFor(F);
414 if (SalvageUnusedProfile && !FSFlattened) {
415 // Apply the matching in place to find the new function's matched profile.
416 // TODO: For extended profile format, if a function profile is unused and
417 // it's top-level, even if the profile is matched, it's not found in the
418 // profile. This is because sample reader only read the used profile at the
419 // beginning, we need to support loading the profile on-demand in future.
420 auto R = FuncToProfileNameMap.find(x: &F);
421 if (R != FuncToProfileNameMap.end())
422 FSFlattened = getFlattenedSamplesFor(Fname: R->second);
423 }
424 if (!FSFlattened)
425 return;
426
427 // Anchors for IR. It's a map from IR location to callee name, callee name is
428 // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
429 // for unknown indrect callee name.
430 AnchorMap IRAnchors;
431 findIRAnchors(F, IRAnchors);
432 // Anchors for profile. It's a map from callsite location to a set of callee
433 // name.
434 AnchorMap ProfileAnchors;
435 findProfileAnchors(FS: *FSFlattened, ProfileAnchors);
436
437 // Compute the callsite match states for profile staleness report.
438 if (ReportProfileStaleness || PersistProfileStaleness)
439 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap: nullptr);
440
441 if (!SalvageStaleProfile)
442 return;
443 // For probe-based profiles, run matching only when profile checksum is
444 // mismatched.
445 bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
446 !ProbeManager->profileIsValid(F, Samples: *FSFlattened);
447 bool RunCFGMatching =
448 !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
449 bool RunCGMatching = SalvageUnusedProfile;
450 // For imported functions, the checksum metadata(pseudo_probe_desc) are
451 // dropped, so we leverage function attribute(profile-checksum-mismatch) to
452 // transfer the info: add the attribute during pre-link phase and check it
453 // during post-link phase(see "profileIsValid").
454 if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
455 F.addFnAttr(Kind: "profile-checksum-mismatch");
456
457 // The matching result will be saved to IRToProfileLocationMap, create a
458 // new map for each function.
459 auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
460 runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap,
461 RunCFGMatching, RunCGMatching);
462 // Find and update callsite match states after matching.
463 if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness))
464 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
465 IRToProfileLocationMap: &IRToProfileLocationMap);
466}
467
468void SampleProfileMatcher::recordCallsiteMatchStates(
469 const Function &F, const AnchorMap &IRAnchors,
470 const AnchorMap &ProfileAnchors,
471 const LocToLocMap *IRToProfileLocationMap) {
472 bool IsPostMatch = IRToProfileLocationMap != nullptr;
473 auto &CallsiteMatchStates =
474 FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(FnName: F.getName())];
475
476 auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
477 // IRToProfileLocationMap is null in pre-match phrase.
478 if (!IRToProfileLocationMap)
479 return IRLoc;
480 const auto &ProfileLoc = IRToProfileLocationMap->find(x: IRLoc);
481 if (ProfileLoc != IRToProfileLocationMap->end())
482 return ProfileLoc->second;
483 else
484 return IRLoc;
485 };
486
487 for (const auto &I : IRAnchors) {
488 // After fuzzy profile matching, use the matching result to remap the
489 // current IR callsite.
490 const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);
491 const auto &IRCalleeId = I.second;
492 const auto &It = ProfileAnchors.find(x: ProfileLoc);
493 if (It == ProfileAnchors.end())
494 continue;
495 const auto &ProfCalleeId = It->second;
496 if (IRCalleeId == ProfCalleeId) {
497 auto It = CallsiteMatchStates.find(x: ProfileLoc);
498 if (It == CallsiteMatchStates.end())
499 CallsiteMatchStates.emplace(args: ProfileLoc, args: MatchState::InitialMatch);
500 else if (IsPostMatch) {
501 if (It->second == MatchState::InitialMatch)
502 It->second = MatchState::UnchangedMatch;
503 else if (It->second == MatchState::InitialMismatch)
504 It->second = MatchState::RecoveredMismatch;
505 }
506 }
507 }
508
509 // Check if there are any callsites in the profile that does not match to any
510 // IR callsites.
511 for (const auto &I : ProfileAnchors) {
512 const auto &Loc = I.first;
513 assert(!I.second.stringRef().empty() && "Callees should not be empty");
514 auto It = CallsiteMatchStates.find(x: Loc);
515 if (It == CallsiteMatchStates.end())
516 CallsiteMatchStates.emplace(args: Loc, args: MatchState::InitialMismatch);
517 else if (IsPostMatch) {
518 // Update the state if it's not matched(UnchangedMatch or
519 // RecoveredMismatch).
520 if (It->second == MatchState::InitialMismatch)
521 It->second = MatchState::UnchangedMismatch;
522 else if (It->second == MatchState::InitialMatch)
523 It->second = MatchState::RemovedMatch;
524 }
525 }
526}
527
528void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS,
529 bool IsTopLevel) {
530 const auto *FuncDesc = ProbeManager->getDesc(GUID: FS.getGUID());
531 // Skip the function that is external or renamed.
532 if (!FuncDesc)
533 return;
534
535 if (ProbeManager->profileIsHashMismatched(FuncDesc: *FuncDesc, Samples: FS)) {
536 if (IsTopLevel)
537 NumStaleProfileFunc++;
538 // Given currently all probe ids are after block probe ids, once the
539 // checksum is mismatched, it's likely all the callites are mismatched and
540 // dropped. We conservatively count all the samples as mismatched and stop
541 // counting the inlinees' profiles.
542 MismatchedFunctionSamples += FS.getTotalSamples();
543 return;
544 }
545
546 // Even the current-level function checksum is matched, it's possible that the
547 // nested inlinees' checksums are mismatched that affect the inlinee's sample
548 // loading, we need to go deeper to check the inlinees' function samples.
549 // Similarly, count all the samples as mismatched if the inlinee's checksum is
550 // mismatched using this recursive function.
551 for (const auto &I : FS.getCallsiteSamples())
552 for (const auto &CS : I.second)
553 countMismatchedFuncSamples(FS: CS.second, IsTopLevel: false);
554}
555
556void SampleProfileMatcher::countMismatchedCallsiteSamples(
557 const FunctionSamples &FS) {
558 auto It = FuncCallsiteMatchStates.find(Key: FS.getFuncName());
559 // Skip it if no mismatched callsite or this is an external function.
560 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
561 return;
562 const auto &CallsiteMatchStates = It->second;
563
564 auto findMatchState = [&](const LineLocation &Loc) {
565 auto It = CallsiteMatchStates.find(x: Loc);
566 if (It == CallsiteMatchStates.end())
567 return MatchState::Unknown;
568 return It->second;
569 };
570
571 auto AttributeMismatchedSamples = [&](const enum MatchState &State,
572 uint64_t Samples) {
573 if (isMismatchState(State))
574 MismatchedCallsiteSamples += Samples;
575 else if (State == MatchState::RecoveredMismatch)
576 RecoveredCallsiteSamples += Samples;
577 };
578
579 // The non-inlined callsites are saved in the body samples of function
580 // profile, go through it to count the non-inlined callsite samples.
581 for (const auto &I : FS.getBodySamples())
582 AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples());
583
584 // Count the inlined callsite samples.
585 for (const auto &I : FS.getCallsiteSamples()) {
586 auto State = findMatchState(I.first);
587 uint64_t CallsiteSamples = 0;
588 for (const auto &CS : I.second)
589 CallsiteSamples += CS.second.getTotalSamples();
590 AttributeMismatchedSamples(State, CallsiteSamples);
591
592 if (isMismatchState(State))
593 continue;
594
595 // When the current level of inlined call site matches the profiled call
596 // site, we need to go deeper along the inline tree to count mismatches from
597 // lower level inlinees.
598 for (const auto &CS : I.second)
599 countMismatchedCallsiteSamples(FS: CS.second);
600 }
601}
602
603void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) {
604 auto It = FuncCallsiteMatchStates.find(Key: FS.getFuncName());
605 // Skip it if no mismatched callsite or this is an external function.
606 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
607 return;
608 const auto &MatchStates = It->second;
609 [[maybe_unused]] bool OnInitialState =
610 isInitialState(State: MatchStates.begin()->second);
611 for (const auto &I : MatchStates) {
612 TotalProfiledCallsites++;
613 assert(
614 (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) &&
615 "Profile matching state is inconsistent");
616
617 if (isMismatchState(State: I.second))
618 NumMismatchedCallsites++;
619 else if (I.second == MatchState::RecoveredMismatch)
620 NumRecoveredCallsites++;
621 }
622}
623
624void SampleProfileMatcher::countCallGraphRecoveredSamples(
625 const FunctionSamples &FS,
626 std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) {
627 if (CallGraphRecoveredProfiles.count(x: FS.getFunction())) {
628 NumCallGraphRecoveredFuncSamples += FS.getTotalSamples();
629 return;
630 }
631
632 for (const auto &CM : FS.getCallsiteSamples()) {
633 for (const auto &CS : CM.second) {
634 countCallGraphRecoveredSamples(FS: CS.second, CallGraphRecoveredProfiles);
635 }
636 }
637}
638
639void SampleProfileMatcher::computeAndReportProfileStaleness() {
640 if (!ReportProfileStaleness && !PersistProfileStaleness)
641 return;
642
643 std::unordered_set<FunctionId> CallGraphRecoveredProfiles;
644 if (SalvageUnusedProfile) {
645 for (const auto &I : FuncToProfileNameMap) {
646 CallGraphRecoveredProfiles.insert(x: I.second);
647 if (GlobalValue::isAvailableExternallyLinkage(Linkage: I.first->getLinkage()))
648 continue;
649 NumCallGraphRecoveredProfiledFunc++;
650 }
651 }
652
653 // Count profile mismatches for profile staleness report.
654 for (const auto &F : M) {
655 if (skipProfileForFunction(F))
656 continue;
657 // As the stats will be merged by linker, skip reporting the metrics for
658 // imported functions to avoid repeated counting.
659 if (GlobalValue::isAvailableExternallyLinkage(Linkage: F.getLinkage()))
660 continue;
661 const auto *FS = Reader.getSamplesFor(F);
662 if (!FS)
663 continue;
664 TotalProfiledFunc++;
665 TotalFunctionSamples += FS->getTotalSamples();
666
667 if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty())
668 countCallGraphRecoveredSamples(FS: *FS, CallGraphRecoveredProfiles);
669
670 // Checksum mismatch is only used in pseudo-probe mode.
671 if (FunctionSamples::ProfileIsProbeBased)
672 countMismatchedFuncSamples(FS: *FS, IsTopLevel: true);
673
674 // Count mismatches and samples for calliste.
675 countMismatchCallsites(FS: *FS);
676 countMismatchedCallsiteSamples(FS: *FS);
677 }
678
679 if (ReportProfileStaleness) {
680 if (FunctionSamples::ProfileIsProbeBased) {
681 errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc
682 << ") of functions' profile are invalid and ("
683 << MismatchedFunctionSamples << "/" << TotalFunctionSamples
684 << ") of samples are discarded due to function hash mismatch.\n";
685 }
686 if (SalvageUnusedProfile) {
687 errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/"
688 << TotalProfiledFunc << ") of functions' profile are matched and ("
689 << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples
690 << ") of samples are reused by call graph matching.\n";
691 }
692
693 errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"
694 << TotalProfiledCallsites
695 << ") of callsites' profile are invalid and ("
696 << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/"
697 << TotalFunctionSamples
698 << ") of samples are discarded due to callsite location mismatch.\n";
699 errs() << "(" << NumRecoveredCallsites << "/"
700 << (NumRecoveredCallsites + NumMismatchedCallsites)
701 << ") of callsites and (" << RecoveredCallsiteSamples << "/"
702 << (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
703 << ") of samples are recovered by stale profile matching.\n";
704 }
705
706 if (PersistProfileStaleness) {
707 LLVMContext &Ctx = M.getContext();
708 MDBuilder MDB(Ctx);
709
710 SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec;
711 if (FunctionSamples::ProfileIsProbeBased) {
712 ProfStatsVec.emplace_back(Args: "NumStaleProfileFunc", Args&: NumStaleProfileFunc);
713 ProfStatsVec.emplace_back(Args: "TotalProfiledFunc", Args&: TotalProfiledFunc);
714 ProfStatsVec.emplace_back(Args: "MismatchedFunctionSamples",
715 Args&: MismatchedFunctionSamples);
716 ProfStatsVec.emplace_back(Args: "TotalFunctionSamples", Args&: TotalFunctionSamples);
717 }
718
719 if (SalvageUnusedProfile) {
720 ProfStatsVec.emplace_back(Args: "NumCallGraphRecoveredProfiledFunc",
721 Args&: NumCallGraphRecoveredProfiledFunc);
722 ProfStatsVec.emplace_back(Args: "NumCallGraphRecoveredFuncSamples",
723 Args&: NumCallGraphRecoveredFuncSamples);
724 }
725
726 ProfStatsVec.emplace_back(Args: "NumMismatchedCallsites", Args&: NumMismatchedCallsites);
727 ProfStatsVec.emplace_back(Args: "NumRecoveredCallsites", Args&: NumRecoveredCallsites);
728 ProfStatsVec.emplace_back(Args: "TotalProfiledCallsites", Args&: TotalProfiledCallsites);
729 ProfStatsVec.emplace_back(Args: "MismatchedCallsiteSamples",
730 Args&: MismatchedCallsiteSamples);
731 ProfStatsVec.emplace_back(Args: "RecoveredCallsiteSamples",
732 Args&: RecoveredCallsiteSamples);
733
734 auto *MD = MDB.createLLVMStats(LLVMStatsVec: ProfStatsVec);
735 auto *NMD = M.getOrInsertNamedMetadata(Name: "llvm.stats");
736 NMD->addOperand(M: MD);
737 }
738}
739
740void SampleProfileMatcher::findFunctionsWithoutProfile() {
741 // TODO: Support MD5 profile.
742 if (FunctionSamples::UseMD5)
743 return;
744 StringSet<> NamesInProfile;
745 if (auto NameTable = Reader.getNameTable()) {
746 for (auto Name : *NameTable)
747 NamesInProfile.insert(key: Name.stringRef());
748 }
749
750 for (auto &F : M) {
751 // Skip declarations, as even if the function can be matched, we have
752 // nothing to do with it.
753 if (F.isDeclaration())
754 continue;
755
756 StringRef CanonFName = FunctionSamples::getCanonicalFnName(FnName: F.getName());
757 const auto *FS = getFlattenedSamplesFor(F);
758 if (FS)
759 continue;
760
761 // For extended binary, functions fully inlined may not be loaded in the
762 // top-level profile, so check the NameTable which has the all symbol names
763 // in profile.
764 if (NamesInProfile.count(Key: CanonFName))
765 continue;
766
767 // For extended binary, non-profiled function symbols are in the profile
768 // symbol list table.
769 if (PSL && PSL->contains(Name: CanonFName))
770 continue;
771
772 LLVM_DEBUG(dbgs() << "Function " << CanonFName
773 << " is not in profile or profile symbol list.\n");
774 FunctionsWithoutProfile[FunctionId(CanonFName)] = &F;
775 }
776}
777
778bool SampleProfileMatcher::functionMatchesProfileHelper(
779 const Function &IRFunc, const FunctionId &ProfFunc) {
780 // The value is in the range [0, 1]. The bigger the value is, the more similar
781 // two sequences are.
782 float Similarity = 0.0;
783
784 const auto *FSFlattened = getFlattenedSamplesFor(Fname: ProfFunc);
785 if (!FSFlattened)
786 return false;
787 // The check for similarity or checksum may not be reliable if the function is
788 // tiny, we use the number of basic block as a proxy for the function
789 // complexity and skip the matching if it's too small.
790 if (IRFunc.size() < MinFuncCountForCGMatching ||
791 FSFlattened->getBodySamples().size() < MinFuncCountForCGMatching)
792 return false;
793
794 // For probe-based function, we first trust the checksum info. If the checksum
795 // doesn't match, we continue checking for similarity.
796 if (FunctionSamples::ProfileIsProbeBased) {
797 const auto *FuncDesc = ProbeManager->getDesc(F: IRFunc);
798 if (FuncDesc &&
799 !ProbeManager->profileIsHashMismatched(FuncDesc: *FuncDesc, Samples: *FSFlattened)) {
800 LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
801 << "(IR) and " << ProfFunc << "(Profile) match.\n");
802
803 return true;
804 }
805 }
806
807 AnchorMap IRAnchors;
808 findIRAnchors(F: IRFunc, IRAnchors);
809 AnchorMap ProfileAnchors;
810 findProfileAnchors(FS: *FSFlattened, ProfileAnchors);
811
812 AnchorList FilteredIRAnchorsList;
813 AnchorList FilteredProfileAnchorList;
814 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
815 FilteredProfileAnchorList);
816
817 // Similarly skip the matching if the num of anchors is not enough.
818 if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching ||
819 FilteredProfileAnchorList.size() < MinCallCountForCGMatching)
820 return false;
821
822 // Use the diff algorithm to find the LCS between IR and profile.
823
824 // Don't recursively match the callee function to avoid infinite matching,
825 // callee functions will be handled later since it's processed in top-down
826 // order .
827 LocToLocMap MatchedAnchors =
828 longestCommonSequence(AnchorList1: FilteredIRAnchorsList, AnchorList2: FilteredProfileAnchorList,
829 MatchUnusedFunction: false /* Match unused functions */);
830
831 Similarity =
832 static_cast<float>(MatchedAnchors.size()) * 2 /
833 (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size());
834
835 LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
836 << "(IR) and " << ProfFunc << "(profile) is "
837 << format("%.2f", Similarity) << "\n");
838 assert((Similarity >= 0 && Similarity <= 1.0) &&
839 "Similarity value should be in [0, 1]");
840 return Similarity * 100 > FuncProfileSimilarityThreshold;
841}
842
843// If FindMatchedProfileOnly is set to true, only use the processed function
844// results. This is used for skipping the repeated recursive matching.
845bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
846 const FunctionId &ProfFunc,
847 bool FindMatchedProfileOnly) {
848 auto R = FuncProfileMatchCache.find(x: {&IRFunc, ProfFunc});
849 if (R != FuncProfileMatchCache.end())
850 return R->second;
851
852 if (FindMatchedProfileOnly)
853 return false;
854
855 bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
856 FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched;
857 if (Matched) {
858 FuncToProfileNameMap[&IRFunc] = ProfFunc;
859 LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName()
860 << " matches profile:" << ProfFunc << "\n");
861 }
862
863 return Matched;
864}
865
866void SampleProfileMatcher::runOnModule() {
867 ProfileConverter::flattenProfile(InputProfiles: Reader.getProfiles(), OutputProfiles&: FlattenedProfiles,
868 ProfileIsCS: FunctionSamples::ProfileIsCS);
869 if (SalvageUnusedProfile)
870 findFunctionsWithoutProfile();
871
872 // Process the matching in top-down order so that the caller matching result
873 // can be used to the callee matching.
874 std::vector<Function *> TopDownFunctionList;
875 TopDownFunctionList.reserve(n: M.size());
876 buildTopDownFuncOrder(CG, FunctionOrderList&: TopDownFunctionList);
877 for (auto *F : TopDownFunctionList) {
878 if (skipProfileForFunction(F: *F))
879 continue;
880 runOnFunction(F&: *F);
881 }
882
883 // Update the data in SampleLoader.
884 if (SalvageUnusedProfile)
885 for (auto &I : FuncToProfileNameMap) {
886 assert(I.first && "New function is null");
887 FunctionId FuncName(I.first->getName());
888 FuncNameToProfNameMap->emplace(Args&: FuncName, Args&: I.second);
889 // We need to remove the old entry to avoid duplicating the function
890 // processing.
891 SymbolMap->erase(Ctx: FuncName);
892 SymbolMap->emplace(Args&: I.second, Args: I.first);
893 }
894
895 if (SalvageStaleProfile)
896 distributeIRToProfileLocationMap();
897
898 computeAndReportProfileStaleness();
899}
900
901void SampleProfileMatcher::distributeIRToProfileLocationMap(
902 FunctionSamples &FS) {
903 const auto ProfileMappings = FuncMappings.find(Key: FS.getFuncName());
904 if (ProfileMappings != FuncMappings.end()) {
905 FS.setIRToProfileLocationMap(&(ProfileMappings->second));
906 }
907
908 for (auto &Callees :
909 const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
910 for (auto &FS : Callees.second) {
911 distributeIRToProfileLocationMap(FS&: FS.second);
912 }
913 }
914}
915
916// Use a central place to distribute the matching results. Outlined and inlined
917// profile with the function name will be set to the same pointer.
918void SampleProfileMatcher::distributeIRToProfileLocationMap() {
919 for (auto &I : Reader.getProfiles()) {
920 distributeIRToProfileLocationMap(FS&: I.second);
921 }
922}
923