1//===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SampleProfileMatcher used for stale
10// profile matching.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Transforms/IPO/SampleProfileMatcher.h"
15#include "llvm/Demangle/Demangle.h"
16#include "llvm/IR/IntrinsicInst.h"
17#include "llvm/IR/MDBuilder.h"
18#include "llvm/Support/CommandLine.h"
19#include "llvm/Transforms/Utils/LongestCommonSequence.h"
20
21using namespace llvm;
22using namespace sampleprof;
23
24#define DEBUG_TYPE "sample-profile-matcher"
25
26static cl::opt<unsigned> FuncProfileSimilarityThreshold(
27 "func-profile-similarity-threshold", cl::Hidden, cl::init(Val: 80),
28 cl::desc("Consider a profile matches a function if the similarity of their "
29 "callee sequences is above the specified percentile."));
30
31static cl::opt<unsigned> MinFuncCountForCGMatching(
32 "min-func-count-for-cg-matching", cl::Hidden, cl::init(Val: 5),
33 cl::desc("The minimum number of basic blocks required for a function to "
34 "run stale profile call graph matching."));
35
36static cl::opt<unsigned> MinCallCountForCGMatching(
37 "min-call-count-for-cg-matching", cl::Hidden, cl::init(Val: 3),
38 cl::desc("The minimum number of call anchors required for a function to "
39 "run stale profile call graph matching."));
40
41static cl::opt<bool> LoadFuncProfileforCGMatching(
42 "load-func-profile-for-cg-matching", cl::Hidden, cl::init(Val: true),
43 cl::desc(
44 "Load top-level profiles that the sample reader initially skipped for "
45 "the call-graph matching (only meaningful for extended binary "
46 "format)"));
47
48extern cl::opt<bool> SalvageStaleProfile;
49extern cl::opt<bool> SalvageUnusedProfile;
50extern cl::opt<bool> PersistProfileStaleness;
51extern cl::opt<bool> ReportProfileStaleness;
52
53static cl::opt<unsigned> SalvageStaleProfileMaxCallsites(
54 "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX),
55 cl::desc("The maximum number of callsites in a function, above which stale "
56 "profile matching will be skipped."));
57
58void SampleProfileMatcher::findIRAnchors(const Function &F,
59 AnchorMap &IRAnchors) const {
60 // For inlined code, recover the original callsite and callee by finding the
61 // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
62 // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
63 auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {
64 assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");
65 const DILocation *PrevDIL = nullptr;
66 do {
67 PrevDIL = DIL;
68 DIL = DIL->getInlinedAt();
69 } while (DIL->getInlinedAt());
70
71 LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
72 DIL, ProfileIsFS: FunctionSamples::ProfileIsFS);
73 StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
74 return std::make_pair(x&: Callsite, y: FunctionId(CalleeName));
75 };
76
77 auto GetCanonicalCalleeName = [](const CallBase *CB) {
78 StringRef CalleeName = UnknownIndirectCallee;
79 if (Function *Callee = CB->getCalledFunction())
80 CalleeName = FunctionSamples::getCanonicalFnName(FnName: Callee->getName());
81 return CalleeName;
82 };
83
84 // Extract profile matching anchors in the IR.
85 for (auto &BB : F) {
86 for (auto &I : BB) {
87 DILocation *DIL = I.getDebugLoc();
88 if (!DIL)
89 continue;
90
91 if (FunctionSamples::ProfileIsProbeBased) {
92 if (auto Probe = extractProbe(Inst: I)) {
93 // Flatten inlined IR for the matching.
94 if (DIL->getInlinedAt()) {
95 IRAnchors.emplace(args: FindTopLevelInlinedCallsite(DIL));
96 } else {
97 // Use empty StringRef for basic block probe.
98 StringRef CalleeName;
99 if (const auto *CB = dyn_cast<CallBase>(Val: &I)) {
100 // Skip the probe inst whose callee name is "llvm.pseudoprobe".
101 if (!isa<IntrinsicInst>(Val: &I))
102 CalleeName = GetCanonicalCalleeName(CB);
103 }
104 LineLocation Loc = LineLocation(Probe->Id, 0);
105 IRAnchors.emplace(args&: Loc, args: FunctionId(CalleeName));
106 }
107 }
108 } else {
109 // TODO: For line-number based profile(AutoFDO), currently only support
110 // find callsite anchors. In future, we need to parse all the non-call
111 // instructions to extract the line locations for profile matching.
112 if (!isa<CallBase>(Val: &I) || isa<IntrinsicInst>(Val: &I))
113 continue;
114
115 if (DIL->getInlinedAt()) {
116 IRAnchors.emplace(args: FindTopLevelInlinedCallsite(DIL));
117 } else {
118 LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
119 DIL, ProfileIsFS: FunctionSamples::ProfileIsFS);
120 StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(Val: &I));
121 IRAnchors.emplace(args&: Callsite, args: FunctionId(CalleeName));
122 }
123 }
124 }
125 }
126}
127
128void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
129 AnchorMap &ProfileAnchors) const {
130 auto isInvalidLineOffset = [](uint32_t LineOffset) {
131 return LineOffset & 0x8000;
132 };
133
134 auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName,
135 AnchorMap &ProfileAnchors) {
136 auto Ret = ProfileAnchors.try_emplace(k: Loc, args: CalleeName);
137 if (!Ret.second) {
138 // For multiple callees, which indicates it's an indirect call, we use a
139 // dummy name(UnknownIndirectCallee) as the indrect callee name.
140 Ret.first->second = FunctionId(UnknownIndirectCallee);
141 }
142 };
143
144 for (const auto &I : FS.getBodySamples()) {
145 const LineLocation &Loc = I.first;
146 if (isInvalidLineOffset(Loc.LineOffset))
147 continue;
148 for (const auto &C : I.second.getCallTargets())
149 InsertAnchor(Loc, C.first, ProfileAnchors);
150 }
151
152 for (const auto &I : FS.getCallsiteSamples()) {
153 const LineLocation &Loc = I.first;
154 if (isInvalidLineOffset(Loc.LineOffset))
155 continue;
156 for (const auto &C : I.second)
157 InsertAnchor(Loc, C.first, ProfileAnchors);
158 }
159}
160
161bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName,
162 Function *&FuncWithoutProfile) {
163 FuncWithoutProfile = nullptr;
164 auto R = FunctionsWithoutProfile.find(Key: IRFuncName);
165 if (R != FunctionsWithoutProfile.end())
166 FuncWithoutProfile = R->second;
167 return !FuncWithoutProfile;
168}
169
170bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) {
171 return SymbolMap->find(Key: ProfileFuncName) == SymbolMap->end();
172}
173
174bool SampleProfileMatcher::functionMatchesProfile(
175 const FunctionId &IRFuncName, const FunctionId &ProfileFuncName,
176 bool FindMatchedProfileOnly) {
177 if (IRFuncName == ProfileFuncName)
178 return true;
179 if (!SalvageUnusedProfile)
180 return false;
181
182 // If IR function doesn't have profile and the profile is unused, try
183 // matching them.
184 Function *IRFunc = nullptr;
185 if (functionHasProfile(IRFuncName, FuncWithoutProfile&: IRFunc) ||
186 !isProfileUnused(ProfileFuncName))
187 return false;
188
189 assert(FunctionId(IRFunc->getName()) != ProfileFuncName &&
190 "IR function should be different from profile function to match");
191 return functionMatchesProfile(IRFunc&: *IRFunc, ProfFunc: ProfileFuncName,
192 FindMatchedProfileOnly);
193}
194
195LocToLocMap
196SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
197 const AnchorList &AnchorList2,
198 bool MatchUnusedFunction) {
199 LocToLocMap MatchedAnchors;
200 llvm::longestCommonSequence<LineLocation, FunctionId>(
201 AnchorList1, AnchorList2,
202 FunctionMatchesProfile: [&](const FunctionId &A, const FunctionId &B) {
203 return functionMatchesProfile(
204 IRFuncName: A, ProfileFuncName: B,
205 FindMatchedProfileOnly: !MatchUnusedFunction // Find matched function only
206 );
207 },
208 InsertMatching: [&](LineLocation A, LineLocation B) {
209 MatchedAnchors.try_emplace(k: A, args&: B);
210 });
211 return MatchedAnchors;
212}
213
214void SampleProfileMatcher::matchNonCallsiteLocs(
215 const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
216 LocToLocMap &IRToProfileLocationMap) {
217 auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
218 // Skip the unchanged location mapping to save memory.
219 if (From != To)
220 IRToProfileLocationMap.insert(x: {From, To});
221 };
222
223 // Use function's beginning location as the initial anchor.
224 int32_t LocationDelta = 0;
225 SmallVector<LineLocation> LastMatchedNonAnchors;
226 for (const auto &IR : IRAnchors) {
227 const auto &Loc = IR.first;
228 bool IsMatchedAnchor = false;
229 // Match the anchor location in lexical order.
230 auto R = MatchedAnchors.find(x: Loc);
231 if (R != MatchedAnchors.end()) {
232 const auto &Candidate = R->second;
233 InsertMatching(Loc, Candidate);
234 LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()
235 << " is matched from " << Loc << " to " << Candidate
236 << "\n");
237 LocationDelta = Candidate.LineOffset - Loc.LineOffset;
238
239 // Match backwards for non-anchor locations.
240 // The locations in LastMatchedNonAnchors have been matched forwards
241 // based on the previous anchor, spilt it evenly and overwrite the
242 // second half based on the current anchor.
243 for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
244 I < LastMatchedNonAnchors.size(); I++) {
245 const auto &L = LastMatchedNonAnchors[I];
246 uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
247 LineLocation Candidate(CandidateLineOffset, L.Discriminator);
248 InsertMatching(L, Candidate);
249 LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
250 << " to " << Candidate << "\n");
251 }
252
253 IsMatchedAnchor = true;
254 LastMatchedNonAnchors.clear();
255 }
256
257 // Match forwards for non-anchor locations.
258 if (!IsMatchedAnchor) {
259 uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
260 LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
261 InsertMatching(Loc, Candidate);
262 LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
263 << Candidate << "\n");
264 LastMatchedNonAnchors.emplace_back(Args: Loc);
265 }
266 }
267}
268
269// Filter the non-call locations from IRAnchors and ProfileAnchors and write
270// them into a list for random access later.
271void SampleProfileMatcher::getFilteredAnchorList(
272 const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors,
273 AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) {
274 for (const auto &I : IRAnchors) {
275 if (I.second.stringRef().empty())
276 continue;
277 FilteredIRAnchorsList.emplace_back(args: I);
278 }
279
280 for (const auto &I : ProfileAnchors)
281 FilteredProfileAnchorList.emplace_back(args: I);
282}
283
284// Call target name anchor based profile fuzzy matching.
285// Input:
286// For IR locations, the anchor is the callee name of direct callsite; For
287// profile locations, it's the call target name for BodySamples or inlinee's
288// profile name for CallsiteSamples.
289// Matching heuristic:
290// First match all the anchors using the diff algorithm, then split the
291// non-anchor locations between the two anchors evenly, first half are matched
292// based on the start anchor, second half are matched based on the end anchor.
293// For example, given:
294// IR locations: [1, 2(foo), 3, 5, 6(bar), 7]
295// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
296// The matching gives:
297// [1, 2(foo), 3, 5, 6(bar), 7]
298// | | | | | |
299// [1, 2, 3(foo), 4, 7, 8(bar), 9]
300// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
301void SampleProfileMatcher::runStaleProfileMatching(
302 const Function &F, const AnchorMap &IRAnchors,
303 const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap,
304 bool RunCFGMatching, bool RunCGMatching) {
305 if (!RunCFGMatching && !RunCGMatching)
306 return;
307 LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
308 << "\n");
309 assert(IRToProfileLocationMap.empty() &&
310 "Run stale profile matching only once per function");
311
312 AnchorList FilteredProfileAnchorList;
313 AnchorList FilteredIRAnchorsList;
314 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
315 FilteredProfileAnchorList);
316
317 if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
318 return;
319
320 if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites ||
321 FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) {
322 LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName()
323 << " because the number of callsites in the IR is "
324 << FilteredIRAnchorsList.size()
325 << " and in the profile is "
326 << FilteredProfileAnchorList.size() << "\n");
327 return;
328 }
329
330 // Match the callsite anchors by finding the longest common subsequence
331 // between IR and profile.
332 // Define a match between two anchors as follows:
333 // 1) The function names of anchors are the same.
334 // 2) The similarity between the anchor functions is above a threshold if
335 // RunCGMatching is set.
336 // For 2), we only consider the anchor functions from IR and profile don't
337 // appear on either side to reduce the matching scope. Note that we need to
338 // use IR anchor as base(A side) to align with the order of
339 // IRToProfileLocationMap.
340 LocToLocMap MatchedAnchors =
341 longestCommonSequence(AnchorList1: FilteredIRAnchorsList, AnchorList2: FilteredProfileAnchorList,
342 MatchUnusedFunction: RunCGMatching /* Match unused functions */);
343
344 // CFG level matching:
345 // Apply the callsite matchings to infer matching for the basic
346 // block(non-callsite) locations and write the result to
347 // IRToProfileLocationMap.
348 if (RunCFGMatching)
349 matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
350}
351
352void SampleProfileMatcher::runOnFunction(Function &F) {
353 // We need to use flattened function samples for matching.
354 // Unlike IR, which includes all callsites from the source code, the callsites
355 // in profile only show up when they are hit by samples, i,e. the profile
356 // callsites in one context may differ from those in another context. To get
357 // the maximum number of callsites, we merge the function profiles from all
358 // contexts, aka, the flattened profile to find profile anchors.
359 const auto *FSForMatching = getFlattenedSamplesFor(F);
360 if (SalvageUnusedProfile && !FSForMatching) {
361 // Apply the matching in place to find the new function's matched profile.
362 auto R = FuncToProfileNameMap.find(x: &F);
363 if (R != FuncToProfileNameMap.end()) {
364 FSForMatching = getFlattenedSamplesFor(Fname: R->second);
365 // Try to find the salvaged top-level profiles that are explicitly loaded
366 // for the matching, see "functionMatchesProfileHelper" for the details.
367 if (!FSForMatching && LoadFuncProfileforCGMatching)
368 FSForMatching = Reader.getSamplesFor(Fname: R->second.stringRef());
369 }
370 }
371 if (!FSForMatching)
372 return;
373
374 // Anchors for IR. It's a map from IR location to callee name, callee name is
375 // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
376 // for unknown indrect callee name.
377 AnchorMap IRAnchors;
378 findIRAnchors(F, IRAnchors);
379 // Anchors for profile. It's a map from callsite location to a set of callee
380 // name.
381 AnchorMap ProfileAnchors;
382 findProfileAnchors(FS: *FSForMatching, ProfileAnchors);
383
384 // Compute the callsite match states for profile staleness report.
385 if (ReportProfileStaleness || PersistProfileStaleness)
386 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap: nullptr);
387
388 if (!SalvageStaleProfile)
389 return;
390 // For probe-based profiles, run matching only when profile checksum is
391 // mismatched.
392 bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
393 !ProbeManager->profileIsValid(F, Samples: *FSForMatching);
394 bool RunCFGMatching =
395 !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
396 bool RunCGMatching = SalvageUnusedProfile;
397 // For imported functions, the checksum metadata(pseudo_probe_desc) are
398 // dropped, so we leverage function attribute(profile-checksum-mismatch) to
399 // transfer the info: add the attribute during pre-link phase and check it
400 // during post-link phase(see "profileIsValid").
401 if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
402 F.addFnAttr(Kind: "profile-checksum-mismatch");
403
404 // The matching result will be saved to IRToProfileLocationMap, create a
405 // new map for each function.
406 auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
407 runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap,
408 RunCFGMatching, RunCGMatching);
409 // Find and update callsite match states after matching.
410 if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness))
411 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
412 IRToProfileLocationMap: &IRToProfileLocationMap);
413}
414
415void SampleProfileMatcher::recordCallsiteMatchStates(
416 const Function &F, const AnchorMap &IRAnchors,
417 const AnchorMap &ProfileAnchors,
418 const LocToLocMap *IRToProfileLocationMap) {
419 bool IsPostMatch = IRToProfileLocationMap != nullptr;
420 auto &CallsiteMatchStates =
421 FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(FnName: F.getName())];
422
423 auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
424 // IRToProfileLocationMap is null in pre-match phrase.
425 if (!IRToProfileLocationMap)
426 return IRLoc;
427 const auto &ProfileLoc = IRToProfileLocationMap->find(x: IRLoc);
428 if (ProfileLoc != IRToProfileLocationMap->end())
429 return ProfileLoc->second;
430 else
431 return IRLoc;
432 };
433
434 for (const auto &I : IRAnchors) {
435 // After fuzzy profile matching, use the matching result to remap the
436 // current IR callsite.
437 const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);
438 const auto &IRCalleeId = I.second;
439 const auto &It = ProfileAnchors.find(x: ProfileLoc);
440 if (It == ProfileAnchors.end())
441 continue;
442 const auto &ProfCalleeId = It->second;
443 if (IRCalleeId == ProfCalleeId) {
444 auto It = CallsiteMatchStates.find(x: ProfileLoc);
445 if (It == CallsiteMatchStates.end())
446 CallsiteMatchStates.emplace(args: ProfileLoc, args: MatchState::InitialMatch);
447 else if (IsPostMatch) {
448 if (It->second == MatchState::InitialMatch)
449 It->second = MatchState::UnchangedMatch;
450 else if (It->second == MatchState::InitialMismatch)
451 It->second = MatchState::RecoveredMismatch;
452 }
453 }
454 }
455
456 // Check if there are any callsites in the profile that does not match to any
457 // IR callsites.
458 for (const auto &I : ProfileAnchors) {
459 const auto &Loc = I.first;
460 assert(!I.second.stringRef().empty() && "Callees should not be empty");
461 auto It = CallsiteMatchStates.find(x: Loc);
462 if (It == CallsiteMatchStates.end())
463 CallsiteMatchStates.emplace(args: Loc, args: MatchState::InitialMismatch);
464 else if (IsPostMatch) {
465 // Update the state if it's not matched(UnchangedMatch or
466 // RecoveredMismatch).
467 if (It->second == MatchState::InitialMismatch)
468 It->second = MatchState::UnchangedMismatch;
469 else if (It->second == MatchState::InitialMatch)
470 It->second = MatchState::RemovedMatch;
471 }
472 }
473}
474
475void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS,
476 bool IsTopLevel) {
477 const auto *FuncDesc = ProbeManager->getDesc(GUID: FS.getGUID());
478 // Skip the function that is external or renamed.
479 if (!FuncDesc)
480 return;
481
482 if (ProbeManager->profileIsHashMismatched(FuncDesc: *FuncDesc, Samples: FS)) {
483 if (IsTopLevel)
484 NumStaleProfileFunc++;
485 // Given currently all probe ids are after block probe ids, once the
486 // checksum is mismatched, it's likely all the callites are mismatched and
487 // dropped. We conservatively count all the samples as mismatched and stop
488 // counting the inlinees' profiles.
489 MismatchedFunctionSamples += FS.getTotalSamples();
490 return;
491 }
492
493 // Even the current-level function checksum is matched, it's possible that the
494 // nested inlinees' checksums are mismatched that affect the inlinee's sample
495 // loading, we need to go deeper to check the inlinees' function samples.
496 // Similarly, count all the samples as mismatched if the inlinee's checksum is
497 // mismatched using this recursive function.
498 for (const auto &I : FS.getCallsiteSamples())
499 for (const auto &CS : I.second)
500 countMismatchedFuncSamples(FS: CS.second, IsTopLevel: false);
501}
502
503void SampleProfileMatcher::countMismatchedCallsiteSamples(
504 const FunctionSamples &FS) {
505 auto It = FuncCallsiteMatchStates.find(Key: FS.getFuncName());
506 // Skip it if no mismatched callsite or this is an external function.
507 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
508 return;
509 const auto &CallsiteMatchStates = It->second;
510
511 auto findMatchState = [&](const LineLocation &Loc) {
512 auto It = CallsiteMatchStates.find(x: Loc);
513 if (It == CallsiteMatchStates.end())
514 return MatchState::Unknown;
515 return It->second;
516 };
517
518 auto AttributeMismatchedSamples = [&](const enum MatchState &State,
519 uint64_t Samples) {
520 if (isMismatchState(State))
521 MismatchedCallsiteSamples += Samples;
522 else if (State == MatchState::RecoveredMismatch)
523 RecoveredCallsiteSamples += Samples;
524 };
525
526 // The non-inlined callsites are saved in the body samples of function
527 // profile, go through it to count the non-inlined callsite samples.
528 for (const auto &I : FS.getBodySamples())
529 AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples());
530
531 // Count the inlined callsite samples.
532 for (const auto &I : FS.getCallsiteSamples()) {
533 auto State = findMatchState(I.first);
534 uint64_t CallsiteSamples = 0;
535 for (const auto &CS : I.second)
536 CallsiteSamples += CS.second.getTotalSamples();
537 AttributeMismatchedSamples(State, CallsiteSamples);
538
539 if (isMismatchState(State))
540 continue;
541
542 // When the current level of inlined call site matches the profiled call
543 // site, we need to go deeper along the inline tree to count mismatches from
544 // lower level inlinees.
545 for (const auto &CS : I.second)
546 countMismatchedCallsiteSamples(FS: CS.second);
547 }
548}
549
550void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) {
551 auto It = FuncCallsiteMatchStates.find(Key: FS.getFuncName());
552 // Skip it if no mismatched callsite or this is an external function.
553 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
554 return;
555 const auto &MatchStates = It->second;
556 [[maybe_unused]] bool OnInitialState =
557 isInitialState(State: MatchStates.begin()->second);
558 for (const auto &I : MatchStates) {
559 TotalProfiledCallsites++;
560 assert(
561 (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) &&
562 "Profile matching state is inconsistent");
563
564 if (isMismatchState(State: I.second))
565 NumMismatchedCallsites++;
566 else if (I.second == MatchState::RecoveredMismatch)
567 NumRecoveredCallsites++;
568 }
569}
570
571void SampleProfileMatcher::countCallGraphRecoveredSamples(
572 const FunctionSamples &FS,
573 std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) {
574 if (CallGraphRecoveredProfiles.count(x: FS.getFunction())) {
575 NumCallGraphRecoveredFuncSamples += FS.getTotalSamples();
576 return;
577 }
578
579 for (const auto &CM : FS.getCallsiteSamples()) {
580 for (const auto &CS : CM.second) {
581 countCallGraphRecoveredSamples(FS: CS.second, CallGraphRecoveredProfiles);
582 }
583 }
584}
585
586void SampleProfileMatcher::computeAndReportProfileStaleness() {
587 if (!ReportProfileStaleness && !PersistProfileStaleness)
588 return;
589
590 std::unordered_set<FunctionId> CallGraphRecoveredProfiles;
591 if (SalvageUnusedProfile) {
592 for (const auto &I : FuncToProfileNameMap) {
593 CallGraphRecoveredProfiles.insert(x: I.second);
594 if (GlobalValue::isAvailableExternallyLinkage(Linkage: I.first->getLinkage()))
595 continue;
596 NumCallGraphRecoveredProfiledFunc++;
597 }
598 }
599
600 // Count profile mismatches for profile staleness report.
601 for (const auto &F : M) {
602 if (skipProfileForFunction(F))
603 continue;
604 // As the stats will be merged by linker, skip reporting the metrics for
605 // imported functions to avoid repeated counting.
606 if (GlobalValue::isAvailableExternallyLinkage(Linkage: F.getLinkage()))
607 continue;
608 const auto *FS = Reader.getSamplesFor(F);
609 if (!FS)
610 continue;
611 TotalProfiledFunc++;
612 TotalFunctionSamples += FS->getTotalSamples();
613
614 if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty())
615 countCallGraphRecoveredSamples(FS: *FS, CallGraphRecoveredProfiles);
616
617 // Checksum mismatch is only used in pseudo-probe mode.
618 if (FunctionSamples::ProfileIsProbeBased)
619 countMismatchedFuncSamples(FS: *FS, IsTopLevel: true);
620
621 // Count mismatches and samples for calliste.
622 countMismatchCallsites(FS: *FS);
623 countMismatchedCallsiteSamples(FS: *FS);
624 }
625
626 if (ReportProfileStaleness) {
627 if (FunctionSamples::ProfileIsProbeBased) {
628 errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc
629 << ") of functions' profile are invalid and ("
630 << MismatchedFunctionSamples << "/" << TotalFunctionSamples
631 << ") of samples are discarded due to function hash mismatch.\n";
632 }
633 if (SalvageUnusedProfile) {
634 errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/"
635 << TotalProfiledFunc << ") of functions' profile are matched and ("
636 << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples
637 << ") of samples are reused by call graph matching.\n";
638 }
639
640 errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"
641 << TotalProfiledCallsites
642 << ") of callsites' profile are invalid and ("
643 << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/"
644 << TotalFunctionSamples
645 << ") of samples are discarded due to callsite location mismatch.\n";
646 errs() << "(" << NumRecoveredCallsites << "/"
647 << (NumRecoveredCallsites + NumMismatchedCallsites)
648 << ") of callsites and (" << RecoveredCallsiteSamples << "/"
649 << (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
650 << ") of samples are recovered by stale profile matching.\n";
651 }
652
653 if (PersistProfileStaleness) {
654 LLVMContext &Ctx = M.getContext();
655 MDBuilder MDB(Ctx);
656
657 SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec;
658 if (FunctionSamples::ProfileIsProbeBased) {
659 ProfStatsVec.emplace_back(Args: "NumStaleProfileFunc", Args&: NumStaleProfileFunc);
660 ProfStatsVec.emplace_back(Args: "TotalProfiledFunc", Args&: TotalProfiledFunc);
661 ProfStatsVec.emplace_back(Args: "MismatchedFunctionSamples",
662 Args&: MismatchedFunctionSamples);
663 ProfStatsVec.emplace_back(Args: "TotalFunctionSamples", Args&: TotalFunctionSamples);
664 }
665
666 if (SalvageUnusedProfile) {
667 ProfStatsVec.emplace_back(Args: "NumCallGraphRecoveredProfiledFunc",
668 Args&: NumCallGraphRecoveredProfiledFunc);
669 ProfStatsVec.emplace_back(Args: "NumCallGraphRecoveredFuncSamples",
670 Args&: NumCallGraphRecoveredFuncSamples);
671 }
672
673 ProfStatsVec.emplace_back(Args: "NumMismatchedCallsites", Args&: NumMismatchedCallsites);
674 ProfStatsVec.emplace_back(Args: "NumRecoveredCallsites", Args&: NumRecoveredCallsites);
675 ProfStatsVec.emplace_back(Args: "TotalProfiledCallsites", Args&: TotalProfiledCallsites);
676 ProfStatsVec.emplace_back(Args: "MismatchedCallsiteSamples",
677 Args&: MismatchedCallsiteSamples);
678 ProfStatsVec.emplace_back(Args: "RecoveredCallsiteSamples",
679 Args&: RecoveredCallsiteSamples);
680
681 auto *MD = MDB.createLLVMStats(LLVMStatsVec: ProfStatsVec);
682 auto *NMD = M.getOrInsertNamedMetadata(Name: "llvm.stats");
683 NMD->addOperand(M: MD);
684 }
685}
686
687void SampleProfileMatcher::findFunctionsWithoutProfile() {
688 // TODO: Support MD5 profile.
689 if (FunctionSamples::UseMD5)
690 return;
691 StringSet<> NamesInProfile;
692 if (auto NameTable = Reader.getNameTable()) {
693 for (auto Name : *NameTable)
694 NamesInProfile.insert(key: Name.stringRef());
695 }
696
697 for (auto &F : M) {
698 // Skip declarations, as even if the function can be matched, we have
699 // nothing to do with it.
700 if (F.isDeclaration())
701 continue;
702
703 StringRef CanonFName = FunctionSamples::getCanonicalFnName(FnName: F.getName());
704 const auto *FS = getFlattenedSamplesFor(F);
705 if (FS)
706 continue;
707
708 // For extended binary, functions fully inlined may not be loaded in the
709 // top-level profile, so check the NameTable which has the all symbol names
710 // in profile.
711 if (NamesInProfile.count(Key: CanonFName))
712 continue;
713
714 // For extended binary, non-profiled function symbols are in the profile
715 // symbol list table.
716 if (PSL && PSL->contains(Name: CanonFName))
717 continue;
718
719 LLVM_DEBUG(dbgs() << "Function " << CanonFName
720 << " is not in profile or profile symbol list.\n");
721 FunctionsWithoutProfile[FunctionId(CanonFName)] = &F;
722 }
723}
724
725bool SampleProfileMatcher::functionMatchesProfileHelper(
726 const Function &IRFunc, const FunctionId &ProfFunc) {
727 // The value is in the range [0, 1]. The bigger the value is, the more similar
728 // two sequences are.
729 float Similarity = 0.0;
730
731 // Match the functions if they have the same base name(after demangling) and
732 // skip the similarity check.
733 ItaniumPartialDemangler Demangler;
734 // Helper lambda to demangle and get the base name. If the demangling failed,
735 // return an empty string.
736 auto GetBaseName = [&](StringRef FName) {
737 auto FunctionName = FName.str();
738 if (Demangler.partialDemangle(MangledName: FunctionName.c_str()))
739 return std::string();
740 size_t BaseNameSize = 0;
741 // The demangler API follows the __cxa_demangle one, and thus needs a
742 // pointer that originates from malloc (or nullptr) and the caller is
743 // responsible for free()-ing the buffer.
744 char *BaseNamePtr = Demangler.getFunctionBaseName(Buf: nullptr, N: &BaseNameSize);
745 std::string Result = (BaseNamePtr && BaseNameSize)
746 ? std::string(BaseNamePtr, BaseNameSize)
747 : std::string();
748 free(ptr: BaseNamePtr);
749 return Result;
750 };
751 auto IRBaseName = GetBaseName(IRFunc.getName());
752 auto ProfBaseName = GetBaseName(ProfFunc.stringRef());
753 if (!IRBaseName.empty() && IRBaseName == ProfBaseName) {
754 LLVM_DEBUG(dbgs() << "The functions " << IRFunc.getName() << "(IR) and "
755 << ProfFunc << "(Profile) share the same base name: "
756 << IRBaseName << ".\n");
757 return true;
758 }
759
760 const auto *FSForMatching = getFlattenedSamplesFor(Fname: ProfFunc);
761 // With extbinary profile format, initial profile loading only reads profile
762 // based on current function names in the module.
763 // However, if a function is renamed, sample loader skips to load its original
764 // profile(which has a different name), we will miss this case. To address
765 // this, we load the top-level profile candidate explicitly for the matching.
766 if (!FSForMatching && LoadFuncProfileforCGMatching) {
767 DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()});
768 if (std::error_code EC = Reader.read(FuncsToUse: TopLevelFunc))
769 return false;
770 FSForMatching = Reader.getSamplesFor(Fname: ProfFunc.stringRef());
771 LLVM_DEBUG({
772 if (FSForMatching)
773 dbgs() << "Read top-level function " << ProfFunc
774 << " for call-graph matching\n";
775 });
776 }
777 if (!FSForMatching)
778 return false;
779 // The check for similarity or checksum may not be reliable if the function is
780 // tiny, we use the number of basic block as a proxy for the function
781 // complexity and skip the matching if it's too small.
782 if (IRFunc.size() < MinFuncCountForCGMatching ||
783 FSForMatching->getBodySamples().size() < MinFuncCountForCGMatching)
784 return false;
785
786 // For probe-based function, we first trust the checksum info. If the checksum
787 // doesn't match, we continue checking for similarity.
788 if (FunctionSamples::ProfileIsProbeBased) {
789 const auto *FuncDesc = ProbeManager->getDesc(F: IRFunc);
790 if (FuncDesc &&
791 !ProbeManager->profileIsHashMismatched(FuncDesc: *FuncDesc, Samples: *FSForMatching)) {
792 LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
793 << "(IR) and " << ProfFunc << "(Profile) match.\n");
794
795 return true;
796 }
797 }
798
799 AnchorMap IRAnchors;
800 findIRAnchors(F: IRFunc, IRAnchors);
801 AnchorMap ProfileAnchors;
802 findProfileAnchors(FS: *FSForMatching, ProfileAnchors);
803
804 AnchorList FilteredIRAnchorsList;
805 AnchorList FilteredProfileAnchorList;
806 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
807 FilteredProfileAnchorList);
808
809 // Similarly skip the matching if the num of anchors is not enough.
810 if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching ||
811 FilteredProfileAnchorList.size() < MinCallCountForCGMatching)
812 return false;
813
814 // Use the diff algorithm to find the LCS between IR and profile.
815
816 // Don't recursively match the callee function to avoid infinite matching,
817 // callee functions will be handled later since it's processed in top-down
818 // order .
819 LocToLocMap MatchedAnchors =
820 longestCommonSequence(AnchorList1: FilteredIRAnchorsList, AnchorList2: FilteredProfileAnchorList,
821 MatchUnusedFunction: false /* Match unused functions */);
822
823 Similarity = static_cast<float>(MatchedAnchors.size()) /
824 FilteredProfileAnchorList.size();
825
826 LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
827 << "(IR) and " << ProfFunc << "(profile) is "
828 << format("%.2f", Similarity) << "\n");
829 assert((Similarity >= 0 && Similarity <= 1.0) &&
830 "Similarity value should be in [0, 1]");
831 return Similarity * 100 > FuncProfileSimilarityThreshold;
832}
833
834// If FindMatchedProfileOnly is set to true, only use the processed function
835// results. This is used for skipping the repeated recursive matching.
836bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
837 const FunctionId &ProfFunc,
838 bool FindMatchedProfileOnly) {
839 auto R = FuncProfileMatchCache.find(x: {&IRFunc, ProfFunc});
840 if (R != FuncProfileMatchCache.end())
841 return R->second;
842
843 if (FindMatchedProfileOnly)
844 return false;
845
846 bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
847 FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched;
848 if (Matched) {
849 FuncToProfileNameMap[&IRFunc] = ProfFunc;
850 LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName()
851 << " matches profile:" << ProfFunc << "\n");
852 }
853
854 return Matched;
855}
856
857void SampleProfileMatcher::UpdateWithSalvagedProfiles() {
858 DenseSet<StringRef> ProfileSalvagedFuncs;
859 // Update FuncNameToProfNameMap and SymbolMap.
860 for (auto &I : FuncToProfileNameMap) {
861 assert(I.first && "New function is null");
862 FunctionId FuncName(I.first->getName());
863 ProfileSalvagedFuncs.insert(V: I.second.stringRef());
864 FuncNameToProfNameMap->emplace(Args&: FuncName, Args&: I.second);
865
866 // We need to remove the old entry to avoid duplicating the function
867 // processing.
868 SymbolMap->erase(Ctx: FuncName);
869 SymbolMap->emplace(Args&: I.second, Args: I.first);
870 }
871
872 // With extbinary profile format, initial profile loading only reads profile
873 // based on current function names in the module, so we need to load top-level
874 // profiles for functions with different profile name explicitly after
875 // function-profile name map is established with stale profile matching.
876 Reader.read(FuncsToUse: ProfileSalvagedFuncs);
877 Reader.setFuncNameToProfNameMap(*FuncNameToProfNameMap);
878}
879
880void SampleProfileMatcher::runOnModule() {
881 ProfileConverter::flattenProfile(InputProfiles: Reader.getProfiles(), OutputProfiles&: FlattenedProfiles,
882 ProfileIsCS: FunctionSamples::ProfileIsCS);
883 if (SalvageUnusedProfile)
884 findFunctionsWithoutProfile();
885
886 // Process the matching in top-down order so that the caller matching result
887 // can be used to the callee matching.
888 std::vector<Function *> TopDownFunctionList;
889 TopDownFunctionList.reserve(n: M.size());
890 buildTopDownFuncOrder(CG, FunctionOrderList&: TopDownFunctionList);
891 for (auto *F : TopDownFunctionList) {
892 if (skipProfileForFunction(F: *F))
893 continue;
894 runOnFunction(F&: *F);
895 }
896
897 if (SalvageUnusedProfile)
898 UpdateWithSalvagedProfiles();
899
900 if (SalvageStaleProfile)
901 distributeIRToProfileLocationMap();
902
903 computeAndReportProfileStaleness();
904}
905
906void SampleProfileMatcher::distributeIRToProfileLocationMap(
907 FunctionSamples &FS) {
908 const auto ProfileMappings = FuncMappings.find(Key: FS.getFuncName());
909 if (ProfileMappings != FuncMappings.end()) {
910 FS.setIRToProfileLocationMap(&(ProfileMappings->second));
911 }
912
913 for (auto &Callees :
914 const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
915 for (auto &FS : Callees.second) {
916 distributeIRToProfileLocationMap(FS&: FS.second);
917 }
918 }
919}
920
921// Use a central place to distribute the matching results. Outlined and inlined
922// profile with the function name will be set to the same pointer.
923void SampleProfileMatcher::distributeIRToProfileLocationMap() {
924 for (auto &I : Reader.getProfiles()) {
925 distributeIRToProfileLocationMap(FS&: I.second);
926 }
927}
928