1//===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SampleProfileMatcher used for stale
10// profile matching.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Transforms/IPO/SampleProfileMatcher.h"
15#include "llvm/ADT/Statistic.h"
16#include "llvm/Demangle/Demangle.h"
17#include "llvm/IR/IntrinsicInst.h"
18#include "llvm/IR/MDBuilder.h"
19#include "llvm/Support/CommandLine.h"
20#include "llvm/Transforms/Utils/LongestCommonSequence.h"
21
22using namespace llvm;
23using namespace sampleprof;
24
25#define DEBUG_TYPE "sample-profile-matcher"
26
27STATISTIC(NumDirectProfileMatch,
28 "Number of functions matched by demangled basename");
29
30namespace llvm {
31
32static cl::opt<unsigned> FuncProfileSimilarityThreshold(
33 "func-profile-similarity-threshold", cl::Hidden, cl::init(Val: 80),
34 cl::desc("Consider a profile matches a function if the similarity of their "
35 "callee sequences is above the specified percentile."));
36
37static cl::opt<unsigned> MinFuncCountForCGMatching(
38 "min-func-count-for-cg-matching", cl::Hidden, cl::init(Val: 5),
39 cl::desc("The minimum number of basic blocks required for a function to "
40 "run stale profile call graph matching."));
41
42static cl::opt<unsigned> MinCallCountForCGMatching(
43 "min-call-count-for-cg-matching", cl::Hidden, cl::init(Val: 3),
44 cl::desc("The minimum number of call anchors required for a function to "
45 "run stale profile call graph matching."));
46
47static cl::opt<bool> LoadFuncProfileforCGMatching(
48 "load-func-profile-for-cg-matching", cl::Hidden, cl::init(Val: true),
49 cl::desc(
50 "Load top-level profiles that the sample reader initially skipped for "
51 "the call-graph matching (only meaningful for extended binary "
52 "format)"));
53
54extern cl::opt<bool> SalvageStaleProfile;
55extern cl::opt<bool> SalvageUnusedProfile;
56extern cl::opt<bool> PersistProfileStaleness;
57extern cl::opt<bool> ReportProfileStaleness;
58
59static cl::opt<unsigned> SalvageUnusedProfileMaxFunctions(
60 "salvage-unused-profile-max-functions", cl::Hidden, cl::init(UINT_MAX),
61 cl::desc("The maximum number of functions in a module, above which salvage "
62 "unused profile will be skipped."));
63
64static cl::opt<unsigned> SalvageStaleProfileMaxCallsites(
65 "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX),
66 cl::desc("The maximum number of callsites in a function, above which stale "
67 "profile matching will be skipped."));
68
69} // end namespace llvm
70
71void SampleProfileMatcher::findIRAnchors(const Function &F,
72 AnchorMap &IRAnchors) const {
73 // For inlined code, recover the original callsite and callee by finding the
74 // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
75 // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
76 auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {
77 assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");
78 const DILocation *PrevDIL = nullptr;
79 do {
80 PrevDIL = DIL;
81 DIL = DIL->getInlinedAt();
82 } while (DIL->getInlinedAt());
83
84 LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
85 DIL, ProfileIsFS: FunctionSamples::ProfileIsFS);
86 StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
87 return std::make_pair(x&: Callsite, y: FunctionId(CalleeName));
88 };
89
90 auto GetCanonicalCalleeName = [](const CallBase *CB) {
91 StringRef CalleeName = UnknownIndirectCallee;
92 if (Function *Callee = CB->getCalledFunction())
93 CalleeName = FunctionSamples::getCanonicalFnName(FnName: Callee->getName());
94 return CalleeName;
95 };
96
97 // Extract profile matching anchors in the IR.
98 for (auto &BB : F) {
99 for (auto &I : BB) {
100 DILocation *DIL = I.getDebugLoc();
101 if (!DIL)
102 continue;
103
104 if (FunctionSamples::ProfileIsProbeBased) {
105 if (auto Probe = extractProbe(Inst: I)) {
106 // Flatten inlined IR for the matching.
107 if (DIL->getInlinedAt()) {
108 IRAnchors.emplace(args: FindTopLevelInlinedCallsite(DIL));
109 } else {
110 // Use empty StringRef for basic block probe.
111 StringRef CalleeName;
112 if (const auto *CB = dyn_cast<CallBase>(Val: &I)) {
113 // Skip the probe inst whose callee name is "llvm.pseudoprobe".
114 if (!isa<IntrinsicInst>(Val: &I))
115 CalleeName = GetCanonicalCalleeName(CB);
116 }
117 LineLocation Loc = LineLocation(Probe->Id, 0);
118 IRAnchors.emplace(args&: Loc, args: FunctionId(CalleeName));
119 }
120 }
121 } else {
122 // TODO: For line-number based profile(AutoFDO), currently only support
123 // find callsite anchors. In future, we need to parse all the non-call
124 // instructions to extract the line locations for profile matching.
125 if (!isa<CallBase>(Val: &I) || isa<IntrinsicInst>(Val: &I))
126 continue;
127
128 if (DIL->getInlinedAt()) {
129 IRAnchors.emplace(args: FindTopLevelInlinedCallsite(DIL));
130 } else {
131 LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
132 DIL, ProfileIsFS: FunctionSamples::ProfileIsFS);
133 StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(Val: &I));
134 IRAnchors.emplace(args&: Callsite, args: FunctionId(CalleeName));
135 }
136 }
137 }
138 }
139}
140
141void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
142 AnchorMap &ProfileAnchors) const {
143 auto isInvalidLineOffset = [](uint32_t LineOffset) {
144 return LineOffset & 0x8000;
145 };
146
147 auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName,
148 AnchorMap &ProfileAnchors) {
149 auto Ret = ProfileAnchors.try_emplace(k: Loc, args: CalleeName);
150 if (!Ret.second) {
151 // For multiple callees, which indicates it's an indirect call, we use a
152 // dummy name(UnknownIndirectCallee) as the indrect callee name.
153 Ret.first->second = FunctionId(UnknownIndirectCallee);
154 }
155 };
156
157 for (const auto &I : FS.getBodySamples()) {
158 const LineLocation &Loc = I.first;
159 if (isInvalidLineOffset(Loc.LineOffset))
160 continue;
161 for (const auto &C : I.second.getCallTargets())
162 InsertAnchor(Loc, C.first, ProfileAnchors);
163 }
164
165 for (const auto &I : FS.getCallsiteSamples()) {
166 const LineLocation &Loc = I.first;
167 if (isInvalidLineOffset(Loc.LineOffset))
168 continue;
169 for (const auto &C : I.second)
170 InsertAnchor(Loc, C.first, ProfileAnchors);
171 }
172}
173
174bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName,
175 Function *&FuncWithoutProfile) {
176 FuncWithoutProfile = nullptr;
177 auto R = FunctionsWithoutProfile.find(Key: IRFuncName);
178 if (R != FunctionsWithoutProfile.end())
179 FuncWithoutProfile = R->second;
180 return !FuncWithoutProfile;
181}
182
183bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) {
184 // In post-link, the profiled function may have been optimized away from the
185 // module. Check if the function name exists in the pseudo_probe descriptors.
186 return (SymbolMap->find(Key: ProfileFuncName) == SymbolMap->end()) &&
187 (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink ||
188 !FunctionSamples::ProfileIsProbeBased ||
189 !ProfileFuncName.isStringRef() ||
190 (ProbeManager->getDesc(FProfileName: ProfileFuncName.stringRef()) == nullptr));
191}
192
193bool SampleProfileMatcher::functionMatchesProfile(
194 const FunctionId &IRFuncName, const FunctionId &ProfileFuncName,
195 bool FindMatchedProfileOnly) {
196 if (IRFuncName == ProfileFuncName)
197 return true;
198 if (!SalvageUnusedProfile)
199 return false;
200
201 // If IR function doesn't have profile and the profile is unused, try
202 // matching them.
203 Function *IRFunc = nullptr;
204 if (functionHasProfile(IRFuncName, FuncWithoutProfile&: IRFunc) ||
205 !isProfileUnused(ProfileFuncName))
206 return false;
207
208 assert(FunctionId(IRFunc->getName()) != ProfileFuncName &&
209 "IR function should be different from profile function to match");
210 return functionMatchesProfile(IRFunc&: *IRFunc, ProfFunc: ProfileFuncName,
211 FindMatchedProfileOnly);
212}
213
214LocToLocMap
215SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
216 const AnchorList &AnchorList2,
217 bool MatchUnusedFunction) {
218 LocToLocMap MatchedAnchors;
219 llvm::longestCommonSequence<LineLocation, FunctionId>(
220 AnchorList1, AnchorList2,
221 FunctionMatchesProfile: [&](const FunctionId &A, const FunctionId &B) {
222 return functionMatchesProfile(
223 IRFuncName: A, ProfileFuncName: B,
224 FindMatchedProfileOnly: !MatchUnusedFunction // Find matched function only
225 );
226 },
227 InsertMatching: [&](LineLocation A, LineLocation B) {
228 MatchedAnchors.try_emplace(Key: A, Args&: B);
229 });
230 return MatchedAnchors;
231}
232
233void SampleProfileMatcher::matchNonCallsiteLocs(
234 const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
235 LocToLocMap &IRToProfileLocationMap) {
236 auto UpdateMatching = [&](const LineLocation &From, const LineLocation &To) {
237 // Skip the unchanged location mapping to save memory.
238 if (From != To)
239 IRToProfileLocationMap.insert_or_assign(Key: From, Val: To);
240 else
241 IRToProfileLocationMap.erase(Val: From);
242 };
243
244 // Use function's beginning location as the initial anchor.
245 int32_t LocationDelta = 0;
246 SmallVector<LineLocation> LastMatchedNonAnchors;
247 for (const auto &IR : IRAnchors) {
248 const auto &Loc = IR.first;
249 bool IsMatchedAnchor = false;
250 // Match the anchor location in lexical order.
251 auto R = MatchedAnchors.find(Val: Loc);
252 if (R != MatchedAnchors.end()) {
253 const auto &Candidate = R->second;
254 UpdateMatching(Loc, Candidate);
255 LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()
256 << " is matched from " << Loc << " to " << Candidate
257 << "\n");
258 LocationDelta = Candidate.LineOffset - Loc.LineOffset;
259
260 // Match backwards for non-anchor locations.
261 // The locations in LastMatchedNonAnchors have been matched forwards
262 // based on the previous anchor, split it evenly and overwrite the
263 // second half based on the current anchor.
264 for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
265 I < LastMatchedNonAnchors.size(); I++) {
266 const auto &L = LastMatchedNonAnchors[I];
267 uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
268 LineLocation Candidate(CandidateLineOffset, L.Discriminator);
269 UpdateMatching(L, Candidate);
270 LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
271 << " to " << Candidate << "\n");
272 }
273
274 IsMatchedAnchor = true;
275 LastMatchedNonAnchors.clear();
276 }
277
278 // Match forwards for non-anchor locations.
279 if (!IsMatchedAnchor) {
280 uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
281 LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
282 UpdateMatching(Loc, Candidate);
283 LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
284 << Candidate << "\n");
285 LastMatchedNonAnchors.emplace_back(Args: Loc);
286 }
287 }
288}
289
290// Filter the non-call locations from IRAnchors and ProfileAnchors and write
291// them into a list for random access later.
292void SampleProfileMatcher::getFilteredAnchorList(
293 const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors,
294 AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) {
295 for (const auto &I : IRAnchors) {
296 if (I.second.stringRef().empty())
297 continue;
298 FilteredIRAnchorsList.emplace_back(args: I);
299 }
300
301 for (const auto &I : ProfileAnchors)
302 FilteredProfileAnchorList.emplace_back(args: I);
303}
304
305// Call target name anchor based profile fuzzy matching.
306// Input:
307// For IR locations, the anchor is the callee name of direct callsite; For
308// profile locations, it's the call target name for BodySamples or inlinee's
309// profile name for CallsiteSamples.
310// Matching heuristic:
311// First match all the anchors using the diff algorithm, then split the
312// non-anchor locations between the two anchors evenly, first half are matched
313// based on the start anchor, second half are matched based on the end anchor.
314// For example, given:
315// IR locations: [1, 2(foo), 3, 5, 6(bar), 7]
316// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
317// The matching gives:
318// [1, 2(foo), 3, 5, 6(bar), 7]
319// | | | | | |
320// [1, 2, 3(foo), 4, 7, 8(bar), 9]
321// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
322void SampleProfileMatcher::runStaleProfileMatching(
323 const Function &F, const AnchorMap &IRAnchors,
324 const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap,
325 bool RunCFGMatching, bool RunCGMatching) {
326 if (!RunCFGMatching && !RunCGMatching)
327 return;
328 LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
329 << "\n");
330 assert(IRToProfileLocationMap.empty() &&
331 "Run stale profile matching only once per function");
332
333 AnchorList FilteredProfileAnchorList;
334 AnchorList FilteredIRAnchorsList;
335 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
336 FilteredProfileAnchorList);
337
338 if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
339 return;
340
341 if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites ||
342 FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) {
343 LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName()
344 << " because the number of callsites in the IR is "
345 << FilteredIRAnchorsList.size()
346 << " and in the profile is "
347 << FilteredProfileAnchorList.size() << "\n");
348 return;
349 }
350
351 // Match the callsite anchors by finding the longest common subsequence
352 // between IR and profile.
353 // Define a match between two anchors as follows:
354 // 1) The function names of anchors are the same.
355 // 2) The similarity between the anchor functions is above a threshold if
356 // RunCGMatching is set.
357 // For 2), we only consider the anchor functions from IR and profile don't
358 // appear on either side to reduce the matching scope. Note that we need to
359 // use IR anchor as base(A side) to align with the order of
360 // IRToProfileLocationMap.
361 LocToLocMap MatchedAnchors =
362 longestCommonSequence(AnchorList1: FilteredIRAnchorsList, AnchorList2: FilteredProfileAnchorList,
363 MatchUnusedFunction: RunCGMatching /* Match unused functions */);
364
365 // Scan through the matched anchors to make sure functions and profiles are
366 // 1:1 mapped. If the profile has already been mapped to another function
367 // during previous fuzzy matching, create a new profile with the same sample
368 // counts and assumed to be pre-inlined.
369 for (const auto &IR : IRAnchors) {
370 bool ProfileConflicted = false;
371 const auto &Loc = IR.first;
372 Function *Callee = M.getFunction(Name: IR.second.stringRef());
373 if (!Callee)
374 continue;
375 FunctionId ProfAnchor;
376 auto AnchorLoc = MatchedAnchors.find(Val: Loc);
377 if (AnchorLoc == MatchedAnchors.end()) {
378 // Search within the module and find if we have conflicts in pre-matched
379 // profiles for this anchor
380 auto PreMatched = FuncToProfileNameMap.find(Key: Callee);
381 if (PreMatched == FuncToProfileNameMap.end())
382 continue;
383 ProfAnchor = PreMatched->second;
384 } else {
385 const auto &Prof = ProfileAnchors.find(x: AnchorLoc->second);
386 if (Prof == ProfileAnchors.end())
387 continue;
388 ProfAnchor = Prof->second;
389 }
390
391 // Conflicting profile previously matched
392 auto Cached = MatchedAnchorCache.find(Val: ProfAnchor);
393 if (Cached == MatchedAnchorCache.end())
394 MatchedAnchorCache[ProfAnchor] = Callee;
395 else if (Cached->second != Callee)
396 ProfileConflicted = true;
397
398 if (ProfileConflicted) {
399 // Create a flattened profile using the IR function name to avoid profile
400 // name conflicts
401 const auto *FSForMatching = getFlattenedSamplesFor(Fname: ProfAnchor);
402 if (!FSForMatching)
403 FSForMatching = Reader.getSamplesFor(Fname: ProfAnchor.stringRef());
404 if (!FSForMatching)
405 continue;
406
407 FunctionId NewAnchor(
408 FunctionSamples::getCanonicalFnName(FnName: IR.second.stringRef()));
409 auto R = FuncProfileMatchCache.find(Val: {Callee, NewAnchor});
410 if (R != FuncProfileMatchCache.end() && R->second)
411 continue;
412 FunctionSamples &NewFS = FlattenedProfiles.create(Ctx: NewAnchor);
413 NewFS.merge(Other: *FSForMatching);
414 FuncToProfileNameMap[Callee] = NewAnchor;
415 FuncProfileMatchCache[{Callee, NewAnchor}] = true;
416
417 // Update profile in the sample profile reader
418 SampleProfileMap &Profiles = Reader.getProfiles();
419 SampleContext FContext(NewAnchor);
420 auto Res = Profiles.try_emplace(Hash: FContext.getHashCode(), Key: FContext, Args&: NewFS);
421 FunctionSamples &FProfile = Res.first->second;
422 FProfile.setContext(FContext);
423 }
424 }
425
426 // CFG level matching:
427 // Apply the callsite matchings to infer matching for the basic
428 // block(non-callsite) locations and write the result to
429 // IRToProfileLocationMap.
430 if (RunCFGMatching)
431 matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
432}
433
434void SampleProfileMatcher::runOnFunction(Function &F) {
435 // We need to use flattened function samples for matching.
436 // Unlike IR, which includes all callsites from the source code, the callsites
437 // in profile only show up when they are hit by samples, i,e. the profile
438 // callsites in one context may differ from those in another context. To get
439 // the maximum number of callsites, we merge the function profiles from all
440 // contexts, aka, the flattened profile to find profile anchors.
441 const auto *FSForMatching = getFlattenedSamplesFor(F);
442 if (SalvageUnusedProfile && !FSForMatching) {
443 // Apply the matching in place to find the new function's matched profile.
444 auto R = FuncToProfileNameMap.find(Key: &F);
445 if (R != FuncToProfileNameMap.end()) {
446 FSForMatching = getFlattenedSamplesFor(Fname: R->second);
447 // Fallback for profiles loaded by functionMatchesProfileHelper but not
448 // yet in FlattenedProfiles. This should be rare now that
449 // functionMatchesProfileHelper flattens after loading.
450 if (!FSForMatching && LoadFuncProfileforCGMatching)
451 FSForMatching = Reader.getSamplesFor(Fname: R->second.stringRef());
452 }
453 }
454 if (!FSForMatching)
455 return;
456
457 // Anchors for IR. It's a map from IR location to callee name, callee name is
458 // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
459 // for unknown indrect callee name.
460 AnchorMap IRAnchors;
461 findIRAnchors(F, IRAnchors);
462 // Anchors for profile. It's a map from callsite location to a set of callee
463 // name.
464 AnchorMap ProfileAnchors;
465 findProfileAnchors(FS: *FSForMatching, ProfileAnchors);
466
467 // Compute the callsite match states for profile staleness report.
468 if (ReportProfileStaleness || PersistProfileStaleness)
469 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap: nullptr);
470
471 if (!SalvageStaleProfile)
472 return;
473 // For probe-based profiles, run matching only when profile checksum is
474 // mismatched.
475 bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
476 !ProbeManager->profileIsValid(F, Samples: *FSForMatching);
477 bool RunCFGMatching =
478 !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
479 bool RunCGMatching = SalvageUnusedProfile;
480 // For imported functions, the checksum metadata(pseudo_probe_desc) are
481 // dropped, so we leverage function attribute(profile-checksum-mismatch) to
482 // transfer the info: add the attribute during pre-link phase and check it
483 // during post-link phase(see "profileIsValid").
484 if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
485 F.addFnAttr(Kind: "profile-checksum-mismatch");
486
487 // The matching result will be saved to IRToProfileLocationMap, create a
488 // new map for each function.
489 auto &IRToProfileLocationMap = getIRToProfileLocationMap(FS: *FSForMatching);
490 runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap,
491 RunCFGMatching, RunCGMatching);
492 // Find and update callsite match states after matching.
493 if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness))
494 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
495 IRToProfileLocationMap: &IRToProfileLocationMap);
496}
497
498void SampleProfileMatcher::recordCallsiteMatchStates(
499 const Function &F, const AnchorMap &IRAnchors,
500 const AnchorMap &ProfileAnchors,
501 const LocToLocMap *IRToProfileLocationMap) {
502 bool IsPostMatch = IRToProfileLocationMap != nullptr;
503 auto &CallsiteMatchStates =
504 FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(FnName: F.getName())];
505
506 auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
507 // IRToProfileLocationMap is null in pre-match phrase.
508 if (!IRToProfileLocationMap)
509 return IRLoc;
510 const auto &ProfileLoc = IRToProfileLocationMap->find(Val: IRLoc);
511 if (ProfileLoc != IRToProfileLocationMap->end())
512 return ProfileLoc->second;
513 else
514 return IRLoc;
515 };
516
517 for (const auto &I : IRAnchors) {
518 // After fuzzy profile matching, use the matching result to remap the
519 // current IR callsite.
520 const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);
521 const auto &IRCalleeId = I.second;
522 const auto &It = ProfileAnchors.find(x: ProfileLoc);
523 if (It == ProfileAnchors.end())
524 continue;
525 const auto &ProfCalleeId = It->second;
526 if (IRCalleeId == ProfCalleeId) {
527 auto It = CallsiteMatchStates.find(Val: ProfileLoc);
528 if (It == CallsiteMatchStates.end())
529 CallsiteMatchStates.try_emplace(Key: ProfileLoc, Args: MatchState::InitialMatch);
530 else if (IsPostMatch) {
531 if (It->second == MatchState::InitialMatch)
532 It->second = MatchState::UnchangedMatch;
533 else if (It->second == MatchState::InitialMismatch)
534 It->second = MatchState::RecoveredMismatch;
535 }
536 }
537 }
538
539 // Check if there are any callsites in the profile that does not match to any
540 // IR callsites.
541 for (const auto &I : ProfileAnchors) {
542 const auto &Loc = I.first;
543 assert(!I.second.stringRef().empty() && "Callees should not be empty");
544 auto It = CallsiteMatchStates.find(Val: Loc);
545 if (It == CallsiteMatchStates.end())
546 CallsiteMatchStates.try_emplace(Key: Loc, Args: MatchState::InitialMismatch);
547 else if (IsPostMatch) {
548 // Update the state if it's not matched(UnchangedMatch or
549 // RecoveredMismatch).
550 if (It->second == MatchState::InitialMismatch)
551 It->second = MatchState::UnchangedMismatch;
552 else if (It->second == MatchState::InitialMatch)
553 It->second = MatchState::RemovedMatch;
554 }
555 }
556}
557
558void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS,
559 bool IsTopLevel) {
560 const auto *FuncDesc = ProbeManager->getDesc(GUID: FS.getGUID());
561 // Skip the function that is external or renamed.
562 if (!FuncDesc)
563 return;
564
565 if (ProbeManager->profileIsHashMismatched(FuncDesc: *FuncDesc, Samples: FS)) {
566 if (IsTopLevel)
567 NumStaleProfileFunc++;
568 // Given currently all probe ids are after block probe ids, once the
569 // checksum is mismatched, it's likely all the callites are mismatched and
570 // dropped. We conservatively count all the samples as mismatched and stop
571 // counting the inlinees' profiles.
572 MismatchedFunctionSamples += FS.getTotalSamples();
573 return;
574 }
575
576 // Even the current-level function checksum is matched, it's possible that the
577 // nested inlinees' checksums are mismatched that affect the inlinee's sample
578 // loading, we need to go deeper to check the inlinees' function samples.
579 // Similarly, count all the samples as mismatched if the inlinee's checksum is
580 // mismatched using this recursive function.
581 for (const auto &I : FS.getCallsiteSamples())
582 for (const auto &CS : I.second)
583 countMismatchedFuncSamples(FS: CS.second, IsTopLevel: false);
584}
585
586void SampleProfileMatcher::countMismatchedCallsiteSamples(
587 const FunctionSamples &FS) {
588 auto It = FuncCallsiteMatchStates.find(Key: FS.getFuncName());
589 // Skip it if no mismatched callsite or this is an external function.
590 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
591 return;
592 const auto &CallsiteMatchStates = It->second;
593
594 auto findMatchState = [&](const LineLocation &Loc) {
595 auto It = CallsiteMatchStates.find(Val: Loc);
596 if (It == CallsiteMatchStates.end())
597 return MatchState::Unknown;
598 return It->second;
599 };
600
601 auto AttributeMismatchedSamples = [&](const enum MatchState &State,
602 uint64_t Samples) {
603 if (isMismatchState(State))
604 MismatchedCallsiteSamples += Samples;
605 else if (State == MatchState::RecoveredMismatch)
606 RecoveredCallsiteSamples += Samples;
607 };
608
609 // The non-inlined callsites are saved in the body samples of function
610 // profile, go through it to count the non-inlined callsite samples.
611 for (const auto &I : FS.getBodySamples())
612 AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples());
613
614 // Count the inlined callsite samples.
615 for (const auto &I : FS.getCallsiteSamples()) {
616 auto State = findMatchState(I.first);
617 uint64_t CallsiteSamples = 0;
618 for (const auto &CS : I.second)
619 CallsiteSamples += CS.second.getTotalSamples();
620 AttributeMismatchedSamples(State, CallsiteSamples);
621
622 if (isMismatchState(State))
623 continue;
624
625 // When the current level of inlined call site matches the profiled call
626 // site, we need to go deeper along the inline tree to count mismatches from
627 // lower level inlinees.
628 for (const auto &CS : I.second)
629 countMismatchedCallsiteSamples(FS: CS.second);
630 }
631}
632
633void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) {
634 auto It = FuncCallsiteMatchStates.find(Key: FS.getFuncName());
635 // Skip it if no mismatched callsite or this is an external function.
636 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
637 return;
638 const auto &MatchStates = It->second;
639 [[maybe_unused]] bool OnInitialState =
640 isInitialState(State: MatchStates.begin()->second);
641 for (const auto &I : MatchStates) {
642 TotalProfiledCallsites++;
643 assert(
644 (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) &&
645 "Profile matching state is inconsistent");
646
647 if (isMismatchState(State: I.second))
648 NumMismatchedCallsites++;
649 else if (I.second == MatchState::RecoveredMismatch)
650 NumRecoveredCallsites++;
651 }
652}
653
654void SampleProfileMatcher::countCallGraphRecoveredSamples(
655 const FunctionSamples &FS,
656 DenseSet<FunctionId> &CallGraphRecoveredProfiles) {
657 if (CallGraphRecoveredProfiles.count(V: FS.getFunction())) {
658 NumCallGraphRecoveredFuncSamples += FS.getTotalSamples();
659 return;
660 }
661
662 for (const auto &CM : FS.getCallsiteSamples()) {
663 for (const auto &CS : CM.second) {
664 countCallGraphRecoveredSamples(FS: CS.second, CallGraphRecoveredProfiles);
665 }
666 }
667}
668
669void SampleProfileMatcher::computeAndReportProfileStaleness() {
670 if (!ReportProfileStaleness && !PersistProfileStaleness)
671 return;
672
673 DenseSet<FunctionId> CallGraphRecoveredProfiles;
674 if (SalvageUnusedProfile) {
675 for (const auto &I : FuncToProfileNameMap) {
676 CallGraphRecoveredProfiles.insert(V: I.second);
677 if (GlobalValue::isAvailableExternallyLinkage(Linkage: I.first->getLinkage()))
678 continue;
679 NumCallGraphRecoveredProfiledFunc++;
680 }
681 }
682
683 // Count profile mismatches for profile staleness report.
684 for (const auto &F : M) {
685 if (skipProfileForFunction(F))
686 continue;
687 // As the stats will be merged by linker, skip reporting the metrics for
688 // imported functions to avoid repeated counting.
689 if (GlobalValue::isAvailableExternallyLinkage(Linkage: F.getLinkage()))
690 continue;
691 const auto *FS = Reader.getSamplesFor(F);
692 if (!FS)
693 continue;
694 TotalProfiledFunc++;
695 TotalFunctionSamples += FS->getTotalSamples();
696
697 if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty())
698 countCallGraphRecoveredSamples(FS: *FS, CallGraphRecoveredProfiles);
699
700 // Checksum mismatch is only used in pseudo-probe mode.
701 if (FunctionSamples::ProfileIsProbeBased)
702 countMismatchedFuncSamples(FS: *FS, IsTopLevel: true);
703
704 // Count mismatches and samples for calliste.
705 countMismatchCallsites(FS: *FS);
706 countMismatchedCallsiteSamples(FS: *FS);
707 }
708
709 if (ReportProfileStaleness) {
710 if (FunctionSamples::ProfileIsProbeBased) {
711 errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc
712 << ") of functions' profile are invalid and ("
713 << MismatchedFunctionSamples << "/" << TotalFunctionSamples
714 << ") of samples are discarded due to function hash mismatch.\n";
715 }
716 if (SalvageUnusedProfile) {
717 errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/"
718 << TotalProfiledFunc << ") of functions' profile are matched and ("
719 << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples
720 << ") of samples are reused by call graph matching.\n";
721 }
722
723 errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"
724 << TotalProfiledCallsites
725 << ") of callsites' profile are invalid and ("
726 << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/"
727 << TotalFunctionSamples
728 << ") of samples are discarded due to callsite location mismatch.\n";
729 errs() << "(" << NumRecoveredCallsites << "/"
730 << (NumRecoveredCallsites + NumMismatchedCallsites)
731 << ") of callsites and (" << RecoveredCallsiteSamples << "/"
732 << (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
733 << ") of samples are recovered by stale profile matching.\n";
734 }
735
736 if (PersistProfileStaleness) {
737 LLVMContext &Ctx = M.getContext();
738 MDBuilder MDB(Ctx);
739
740 SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec;
741 if (FunctionSamples::ProfileIsProbeBased) {
742 ProfStatsVec.emplace_back(Args: "NumStaleProfileFunc", Args&: NumStaleProfileFunc);
743 ProfStatsVec.emplace_back(Args: "TotalProfiledFunc", Args&: TotalProfiledFunc);
744 ProfStatsVec.emplace_back(Args: "MismatchedFunctionSamples",
745 Args&: MismatchedFunctionSamples);
746 ProfStatsVec.emplace_back(Args: "TotalFunctionSamples", Args&: TotalFunctionSamples);
747 }
748
749 if (SalvageUnusedProfile) {
750 ProfStatsVec.emplace_back(Args: "NumCallGraphRecoveredProfiledFunc",
751 Args&: NumCallGraphRecoveredProfiledFunc);
752 ProfStatsVec.emplace_back(Args: "NumCallGraphRecoveredFuncSamples",
753 Args&: NumCallGraphRecoveredFuncSamples);
754 }
755
756 ProfStatsVec.emplace_back(Args: "NumMismatchedCallsites", Args&: NumMismatchedCallsites);
757 ProfStatsVec.emplace_back(Args: "NumRecoveredCallsites", Args&: NumRecoveredCallsites);
758 ProfStatsVec.emplace_back(Args: "TotalProfiledCallsites", Args&: TotalProfiledCallsites);
759 ProfStatsVec.emplace_back(Args: "MismatchedCallsiteSamples",
760 Args&: MismatchedCallsiteSamples);
761 ProfStatsVec.emplace_back(Args: "RecoveredCallsiteSamples",
762 Args&: RecoveredCallsiteSamples);
763
764 auto *MD = MDB.createLLVMStats(LLVMStatsVec: ProfStatsVec);
765 auto *NMD = M.getOrInsertNamedMetadata(Name: "llvm.stats");
766 NMD->addOperand(M: MD);
767 }
768}
769
770void SampleProfileMatcher::findFunctionsWithoutProfile() {
771 // TODO: Support MD5 profile.
772 if (FunctionSamples::UseMD5)
773 return;
774 StringSet<> NamesInProfile;
775 for (FunctionId Name : Reader.getNameTable())
776 NamesInProfile.insert(key: Name.stringRef());
777
778 for (auto &F : M) {
779 // Skip declarations, as even if the function can be matched, we have
780 // nothing to do with it.
781 if (F.isDeclaration())
782 continue;
783
784 StringRef CanonFName = FunctionSamples::getCanonicalFnName(FnName: F.getName());
785 const auto *FS = getFlattenedSamplesFor(F);
786 if (FS)
787 continue;
788
789 // For extended binary, functions fully inlined may not be loaded in the
790 // top-level profile, so check the NameTable which has the all symbol names
791 // in profile.
792 if (NamesInProfile.count(Key: CanonFName))
793 continue;
794
795 // For extended binary, non-profiled function symbols are in the profile
796 // symbol list table.
797 if (PSL && PSL->contains(Name: CanonFName))
798 continue;
799
800 LLVM_DEBUG(dbgs() << "Function " << CanonFName
801 << " is not in profile or profile symbol list.\n");
802 FunctionsWithoutProfile[FunctionId(CanonFName)] = &F;
803 }
804}
805
806// Demangle \p FName and return the base function name (stripping namespaces,
807// templates, and parameter types). Returns an empty string on failure.
808static std::string getDemangledBaseName(ItaniumPartialDemangler &Demangler,
809 StringRef FName) {
810 auto FunctionName = FName.str();
811 if (Demangler.partialDemangle(MangledName: FunctionName.c_str()))
812 return std::string();
813 size_t BaseNameSize = 0;
814 // The demangler API follows the __cxa_demangle one, and thus needs a
815 // pointer that originates from malloc (or nullptr) and the caller is
816 // responsible for free()-ing the buffer.
817 char *BaseNamePtr = Demangler.getFunctionBaseName(Buf: nullptr, N: &BaseNameSize);
818 std::string Result = (BaseNamePtr && BaseNameSize)
819 ? std::string(BaseNamePtr, BaseNameSize)
820 : std::string();
821 free(ptr: BaseNamePtr);
822 // Trim trailing whitespace/null — getFunctionBaseName may include trailing
823 // characters in the reported size.
824 while (!Result.empty() && (Result.back() == ' ' || Result.back() == '\0'))
825 Result.pop_back();
826 return Result;
827}
828
829void SampleProfileMatcher::matchFunctionsWithoutProfileByBasename() {
830 if (FunctionsWithoutProfile.empty() || !LoadFuncProfileforCGMatching)
831 return;
832 auto NameTable = Reader.getNameTable();
833 if (NameTable.empty())
834 return;
835
836 ItaniumPartialDemangler Demangler;
837
838 // Build a map from demangled basename to orphan function. Only keep
839 // basenames that map to exactly one orphan — ambiguous basenames like
840 // "get" or "operator()" would produce false positives.
841 StringMap<Function *> OrphansByBaseName;
842 StringSet<> AmbiguousBaseNames;
843 for (auto &[FuncId, Func] : FunctionsWithoutProfile) {
844 std::string BaseName = getDemangledBaseName(Demangler, FName: Func->getName());
845 if (BaseName.empty() || AmbiguousBaseNames.count(Key: BaseName))
846 continue;
847 auto [It, Inserted] = OrphansByBaseName.try_emplace(Key: BaseName, Args&: Func);
848 if (!Inserted) {
849 // More than one orphan shares this basename — mark ambiguous.
850 OrphansByBaseName.erase(I: It);
851 AmbiguousBaseNames.insert(key: BaseName);
852 }
853 }
854 if (OrphansByBaseName.empty())
855 return;
856
857 // Scan the profile NameTable for candidates whose demangled basename matches
858 // a unique orphan. Use a map to track exactly one candidate per basename.
859 StringMap<FunctionId> CandidateByBaseName;
860 for (FunctionId ProfileFuncId : NameTable) {
861 StringRef ProfName = ProfileFuncId.stringRef();
862 if (ProfName.empty())
863 continue;
864
865 std::string ProfBaseName = getDemangledBaseName(Demangler, FName: ProfName);
866 if (ProfBaseName.empty())
867 continue;
868
869 if (OrphansByBaseName.count(Key: ProfBaseName)) {
870 if (AmbiguousBaseNames.count(Key: ProfBaseName))
871 continue;
872
873 auto [It, Inserted] =
874 CandidateByBaseName.try_emplace(Key: ProfBaseName, Args&: ProfileFuncId);
875 if (!Inserted) {
876 // More than one profile entry shares this basename — mark ambiguous.
877 CandidateByBaseName.erase(I: It);
878 AmbiguousBaseNames.insert(key: ProfBaseName);
879 }
880 }
881 }
882
883 if (CandidateByBaseName.empty())
884 return;
885
886 // Load candidate profiles on demand, match, and flatten.
887 DenseSet<StringRef> ToLoad;
888 for (auto &[BaseName, ProfId] : CandidateByBaseName)
889 ToLoad.insert(V: ProfId.stringRef());
890 Reader.read(FuncsToUse: ToLoad);
891
892 unsigned MatchCount = 0;
893 SampleProfileMap NewlyLoadedProfiles;
894 for (auto &[BaseName, ProfId] : CandidateByBaseName) {
895 if (!isProfileUnused(ProfileFuncName: ProfId))
896 continue;
897 Function *OrphanFunc = OrphansByBaseName.lookup(Key: BaseName);
898 if (!OrphanFunc)
899 continue;
900
901 FuncToProfileNameMap[OrphanFunc] = ProfId;
902 MatchedAnchorCache[ProfId] = OrphanFunc;
903 if (const auto *FS = Reader.getSamplesFor(Fname: ProfId.stringRef()))
904 NewlyLoadedProfiles.create(Ctx: FS->getFunction()).merge(Other: *FS);
905 MatchCount++;
906 LLVM_DEBUG(dbgs() << "Direct basename match: " << OrphanFunc->getName()
907 << " (IR) -> " << ProfId << " (Profile)"
908 << " [basename: " << BaseName << "]\n");
909 }
910
911 // Flatten newly loaded profiles so inlined callees are available for
912 // subsequent LCS-based CG matching.
913 if (!NewlyLoadedProfiles.empty())
914 ProfileConverter::flattenProfile(InputProfiles: NewlyLoadedProfiles, OutputProfiles&: FlattenedProfiles,
915 ProfileIsCS: FunctionSamples::ProfileIsCS);
916
917 NumDirectProfileMatch += MatchCount;
918 LLVM_DEBUG(dbgs() << "Direct basename matching found " << MatchCount
919 << " matches\n");
920}
921
922bool SampleProfileMatcher::functionMatchesProfileHelper(
923 const Function &IRFunc, const FunctionId &ProfFunc) {
924 // The value is in the range [0, 1]. The bigger the value is, the more similar
925 // two sequences are.
926 float Similarity = 0.0;
927
928 // Match the functions if they have the same base name(after demangling) and
929 // skip the similarity check.
930 ItaniumPartialDemangler Demangler;
931 auto IRBaseName = getDemangledBaseName(Demangler, FName: IRFunc.getName());
932 auto ProfBaseName = getDemangledBaseName(Demangler, FName: ProfFunc.stringRef());
933 if (!IRBaseName.empty() && IRBaseName == ProfBaseName) {
934 LLVM_DEBUG(dbgs() << "The functions " << IRFunc.getName() << "(IR) and "
935 << ProfFunc << "(Profile) share the same base name: "
936 << IRBaseName << ".\n");
937 return true;
938 }
939
940 const auto *FSForMatching = getFlattenedSamplesFor(Fname: ProfFunc);
941 // With extbinary profile format, initial profile loading only reads profile
942 // based on current function names in the module.
943 // However, if a function is renamed, sample loader skips to load its original
944 // profile(which has a different name), we will miss this case. To address
945 // this, we load the top-level profile candidate explicitly for the matching.
946 if (!FSForMatching && LoadFuncProfileforCGMatching) {
947 DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()});
948 if (std::error_code EC = Reader.read(FuncsToUse: TopLevelFunc))
949 return false;
950 FSForMatching = Reader.getSamplesFor(Fname: ProfFunc.stringRef());
951 // Flatten the newly loaded profile so its inlined callees get their own
952 // entries in FlattenedProfiles, making them discoverable by subsequent
953 // CG matching steps.
954 if (FSForMatching) {
955 SampleProfileMap TempProfiles;
956 TempProfiles.create(Ctx: FSForMatching->getFunction()).merge(Other: *FSForMatching);
957 ProfileConverter::flattenProfile(InputProfiles: TempProfiles, OutputProfiles&: FlattenedProfiles,
958 ProfileIsCS: FunctionSamples::ProfileIsCS);
959 FSForMatching = getFlattenedSamplesFor(Fname: ProfFunc);
960 }
961 LLVM_DEBUG({
962 if (FSForMatching)
963 dbgs() << "Read top-level function " << ProfFunc
964 << " for call-graph matching\n";
965 });
966 }
967 if (!FSForMatching)
968 return false;
969 // The check for similarity or checksum may not be reliable if the function is
970 // tiny, we use the number of basic block as a proxy for the function
971 // complexity and skip the matching if it's too small.
972 if (IRFunc.size() < MinFuncCountForCGMatching ||
973 FSForMatching->getBodySamples().size() < MinFuncCountForCGMatching)
974 return false;
975
976 // For probe-based function, we first trust the checksum info. If the checksum
977 // doesn't match, we continue checking for similarity.
978 if (FunctionSamples::ProfileIsProbeBased) {
979 const auto *FuncDesc = ProbeManager->getDesc(F: IRFunc);
980 if (FuncDesc &&
981 !ProbeManager->profileIsHashMismatched(FuncDesc: *FuncDesc, Samples: *FSForMatching)) {
982 LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
983 << "(IR) and " << ProfFunc << "(Profile) match.\n");
984
985 return true;
986 }
987 }
988
989 AnchorMap IRAnchors;
990 findIRAnchors(F: IRFunc, IRAnchors);
991 AnchorMap ProfileAnchors;
992 findProfileAnchors(FS: *FSForMatching, ProfileAnchors);
993
994 AnchorList FilteredIRAnchorsList;
995 AnchorList FilteredProfileAnchorList;
996 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
997 FilteredProfileAnchorList);
998
999 // Similarly skip the matching if the num of anchors is not enough.
1000 if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching ||
1001 FilteredProfileAnchorList.size() < MinCallCountForCGMatching)
1002 return false;
1003
1004 // Use the diff algorithm to find the LCS between IR and profile.
1005
1006 // Don't recursively match the callee function to avoid infinite matching,
1007 // callee functions will be handled later since it's processed in top-down
1008 // order .
1009 LocToLocMap MatchedAnchors =
1010 longestCommonSequence(AnchorList1: FilteredIRAnchorsList, AnchorList2: FilteredProfileAnchorList,
1011 MatchUnusedFunction: false /* Match unused functions */);
1012
1013 Similarity = static_cast<float>(MatchedAnchors.size()) /
1014 FilteredProfileAnchorList.size();
1015
1016 LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
1017 << "(IR) and " << ProfFunc << "(profile) is "
1018 << format("%.2f", Similarity) << "\n");
1019 assert((Similarity >= 0 && Similarity <= 1.0) &&
1020 "Similarity value should be in [0, 1]");
1021 return Similarity * 100 > FuncProfileSimilarityThreshold;
1022}
1023
1024// If FindMatchedProfileOnly is set to true, only use the processed function
1025// results. This is used for skipping the repeated recursive matching.
1026bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
1027 const FunctionId &ProfFunc,
1028 bool FindMatchedProfileOnly) {
1029 auto R = FuncProfileMatchCache.find(Val: {&IRFunc, ProfFunc});
1030 if (R != FuncProfileMatchCache.end())
1031 return R->second;
1032
1033 if (FindMatchedProfileOnly)
1034 return false;
1035
1036 bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
1037 FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched;
1038 if (Matched) {
1039 FuncToProfileNameMap[&IRFunc] = ProfFunc;
1040 LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName()
1041 << " matches profile:" << ProfFunc << "\n");
1042 }
1043
1044 return Matched;
1045}
1046
1047void SampleProfileMatcher::UpdateWithSalvagedProfiles() {
1048 DenseSet<StringRef> ProfileSalvagedFuncs;
1049 // Update FuncNameToProfNameMap and SymbolMap.
1050 for (auto &I : FuncToProfileNameMap) {
1051 assert(I.first && "New function is null");
1052 FunctionId FuncName(I.first->getName());
1053 ProfileSalvagedFuncs.insert(V: I.second.stringRef());
1054 FuncNameToProfNameMap->emplace(Args&: FuncName, Args&: I.second);
1055
1056 // We need to remove the old entry to avoid duplicating the function
1057 // processing.
1058 SymbolMap->erase(Ctx: FuncName);
1059 [[maybe_unused]] auto Ret = SymbolMap->emplace(Args&: I.second, Args&: I.first);
1060 LLVM_DEBUG({
1061 if (!Ret.second)
1062 dbgs() << "Profile Function " << I.second
1063 << " has already been matched to another IR function.\n";
1064 });
1065 }
1066
1067 // With extbinary profile format, initial profile loading only reads profile
1068 // based on current function names in the module, so we need to load top-level
1069 // profiles for functions with different profile name explicitly after
1070 // function-profile name map is established with stale profile matching.
1071 Reader.read(FuncsToUse: ProfileSalvagedFuncs);
1072 Reader.setFuncNameToProfNameMap(*FuncNameToProfNameMap);
1073}
1074
1075void SampleProfileMatcher::runOnModule() {
1076 ProfileConverter::flattenProfile(InputProfiles: Reader.getProfiles(), OutputProfiles&: FlattenedProfiles,
1077 ProfileIsCS: FunctionSamples::ProfileIsCS);
1078 // Disable SalvageUnusedProfile if the module has an extremely large number of
1079 // functions to limit compile time.
1080 SalvageUnusedProfile =
1081 SalvageUnusedProfile && M.size() < SalvageUnusedProfileMaxFunctions;
1082
1083 if (SalvageUnusedProfile) {
1084 findFunctionsWithoutProfile();
1085 matchFunctionsWithoutProfileByBasename();
1086 }
1087
1088 // Process the matching in top-down order so that the caller matching result
1089 // can be used to the callee matching.
1090 std::vector<Function *> TopDownFunctionList;
1091 TopDownFunctionList.reserve(n: M.size());
1092 buildTopDownFuncOrder(CG, FunctionOrderList&: TopDownFunctionList);
1093 for (auto *F : TopDownFunctionList) {
1094 if (skipProfileForFunction(F: *F))
1095 continue;
1096 runOnFunction(F&: *F);
1097 }
1098
1099 if (SalvageUnusedProfile)
1100 UpdateWithSalvagedProfiles();
1101
1102 if (SalvageStaleProfile)
1103 distributeIRToProfileLocationMap();
1104
1105 computeAndReportProfileStaleness();
1106}
1107
1108void SampleProfileMatcher::distributeIRToProfileLocationMap(
1109 FunctionSamples &FS) {
1110 const auto ProfileMappings = FuncMappings.find(Key: FS.getFuncName());
1111 if (ProfileMappings != FuncMappings.end()) {
1112 FS.setIRToProfileLocationMap(&(ProfileMappings->second));
1113 }
1114
1115 for (auto &Callees :
1116 const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
1117 for (auto &FS : Callees.second) {
1118 distributeIRToProfileLocationMap(FS&: FS.second);
1119 }
1120 }
1121}
1122
1123// Use a central place to distribute the matching results. Outlined and inlined
1124// profile with the function name will be set to the same pointer.
1125void SampleProfileMatcher::distributeIRToProfileLocationMap() {
1126 for (auto &I : Reader.getProfiles()) {
1127 distributeIRToProfileLocationMap(FS&: I.second);
1128 }
1129}
1130