1 | //===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the SampleProfileMatcher used for stale |
10 | // profile matching. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/Transforms/IPO/SampleProfileMatcher.h" |
15 | #include "llvm/Demangle/Demangle.h" |
16 | #include "llvm/IR/IntrinsicInst.h" |
17 | #include "llvm/IR/MDBuilder.h" |
18 | #include "llvm/Support/CommandLine.h" |
19 | #include "llvm/Transforms/Utils/LongestCommonSequence.h" |
20 | |
21 | using namespace llvm; |
22 | using namespace sampleprof; |
23 | |
24 | #define DEBUG_TYPE "sample-profile-matcher" |
25 | |
26 | static cl::opt<unsigned> FuncProfileSimilarityThreshold( |
27 | "func-profile-similarity-threshold" , cl::Hidden, cl::init(Val: 80), |
28 | cl::desc("Consider a profile matches a function if the similarity of their " |
29 | "callee sequences is above the specified percentile." )); |
30 | |
31 | static cl::opt<unsigned> MinFuncCountForCGMatching( |
32 | "min-func-count-for-cg-matching" , cl::Hidden, cl::init(Val: 5), |
33 | cl::desc("The minimum number of basic blocks required for a function to " |
34 | "run stale profile call graph matching." )); |
35 | |
36 | static cl::opt<unsigned> MinCallCountForCGMatching( |
37 | "min-call-count-for-cg-matching" , cl::Hidden, cl::init(Val: 3), |
38 | cl::desc("The minimum number of call anchors required for a function to " |
39 | "run stale profile call graph matching." )); |
40 | |
41 | static cl::opt<bool> LoadFuncProfileforCGMatching( |
42 | "load-func-profile-for-cg-matching" , cl::Hidden, cl::init(Val: true), |
43 | cl::desc( |
44 | "Load top-level profiles that the sample reader initially skipped for " |
45 | "the call-graph matching (only meaningful for extended binary " |
46 | "format)" )); |
47 | |
48 | extern cl::opt<bool> SalvageStaleProfile; |
49 | extern cl::opt<bool> SalvageUnusedProfile; |
50 | extern cl::opt<bool> PersistProfileStaleness; |
51 | extern cl::opt<bool> ReportProfileStaleness; |
52 | |
53 | static cl::opt<unsigned> SalvageStaleProfileMaxCallsites( |
54 | "salvage-stale-profile-max-callsites" , cl::Hidden, cl::init(UINT_MAX), |
55 | cl::desc("The maximum number of callsites in a function, above which stale " |
56 | "profile matching will be skipped." )); |
57 | |
58 | void SampleProfileMatcher::findIRAnchors(const Function &F, |
59 | AnchorMap &IRAnchors) const { |
60 | // For inlined code, recover the original callsite and callee by finding the |
61 | // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the |
62 | // top-level frame is "main:1", the callsite is "1" and the callee is "foo". |
63 | auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) { |
64 | assert((DIL && DIL->getInlinedAt()) && "No inlined callsite" ); |
65 | const DILocation *PrevDIL = nullptr; |
66 | do { |
67 | PrevDIL = DIL; |
68 | DIL = DIL->getInlinedAt(); |
69 | } while (DIL->getInlinedAt()); |
70 | |
71 | LineLocation Callsite = FunctionSamples::getCallSiteIdentifier( |
72 | DIL, ProfileIsFS: FunctionSamples::ProfileIsFS); |
73 | StringRef CalleeName = PrevDIL->getSubprogramLinkageName(); |
74 | return std::make_pair(x&: Callsite, y: FunctionId(CalleeName)); |
75 | }; |
76 | |
77 | auto GetCanonicalCalleeName = [](const CallBase *CB) { |
78 | StringRef CalleeName = UnknownIndirectCallee; |
79 | if (Function *Callee = CB->getCalledFunction()) |
80 | CalleeName = FunctionSamples::getCanonicalFnName(FnName: Callee->getName()); |
81 | return CalleeName; |
82 | }; |
83 | |
84 | // Extract profile matching anchors in the IR. |
85 | for (auto &BB : F) { |
86 | for (auto &I : BB) { |
87 | DILocation *DIL = I.getDebugLoc(); |
88 | if (!DIL) |
89 | continue; |
90 | |
91 | if (FunctionSamples::ProfileIsProbeBased) { |
92 | if (auto Probe = extractProbe(Inst: I)) { |
93 | // Flatten inlined IR for the matching. |
94 | if (DIL->getInlinedAt()) { |
95 | IRAnchors.emplace(args: FindTopLevelInlinedCallsite(DIL)); |
96 | } else { |
97 | // Use empty StringRef for basic block probe. |
98 | StringRef CalleeName; |
99 | if (const auto *CB = dyn_cast<CallBase>(Val: &I)) { |
100 | // Skip the probe inst whose callee name is "llvm.pseudoprobe". |
101 | if (!isa<IntrinsicInst>(Val: &I)) |
102 | CalleeName = GetCanonicalCalleeName(CB); |
103 | } |
104 | LineLocation Loc = LineLocation(Probe->Id, 0); |
105 | IRAnchors.emplace(args&: Loc, args: FunctionId(CalleeName)); |
106 | } |
107 | } |
108 | } else { |
109 | // TODO: For line-number based profile(AutoFDO), currently only support |
110 | // find callsite anchors. In future, we need to parse all the non-call |
111 | // instructions to extract the line locations for profile matching. |
112 | if (!isa<CallBase>(Val: &I) || isa<IntrinsicInst>(Val: &I)) |
113 | continue; |
114 | |
115 | if (DIL->getInlinedAt()) { |
116 | IRAnchors.emplace(args: FindTopLevelInlinedCallsite(DIL)); |
117 | } else { |
118 | LineLocation Callsite = FunctionSamples::getCallSiteIdentifier( |
119 | DIL, ProfileIsFS: FunctionSamples::ProfileIsFS); |
120 | StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(Val: &I)); |
121 | IRAnchors.emplace(args&: Callsite, args: FunctionId(CalleeName)); |
122 | } |
123 | } |
124 | } |
125 | } |
126 | } |
127 | |
128 | void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS, |
129 | AnchorMap &ProfileAnchors) const { |
130 | auto isInvalidLineOffset = [](uint32_t LineOffset) { |
131 | return LineOffset & 0x8000; |
132 | }; |
133 | |
134 | auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName, |
135 | AnchorMap &ProfileAnchors) { |
136 | auto Ret = ProfileAnchors.try_emplace(k: Loc, args: CalleeName); |
137 | if (!Ret.second) { |
138 | // For multiple callees, which indicates it's an indirect call, we use a |
139 | // dummy name(UnknownIndirectCallee) as the indrect callee name. |
140 | Ret.first->second = FunctionId(UnknownIndirectCallee); |
141 | } |
142 | }; |
143 | |
144 | for (const auto &I : FS.getBodySamples()) { |
145 | const LineLocation &Loc = I.first; |
146 | if (isInvalidLineOffset(Loc.LineOffset)) |
147 | continue; |
148 | for (const auto &C : I.second.getCallTargets()) |
149 | InsertAnchor(Loc, C.first, ProfileAnchors); |
150 | } |
151 | |
152 | for (const auto &I : FS.getCallsiteSamples()) { |
153 | const LineLocation &Loc = I.first; |
154 | if (isInvalidLineOffset(Loc.LineOffset)) |
155 | continue; |
156 | for (const auto &C : I.second) |
157 | InsertAnchor(Loc, C.first, ProfileAnchors); |
158 | } |
159 | } |
160 | |
161 | bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName, |
162 | Function *&FuncWithoutProfile) { |
163 | FuncWithoutProfile = nullptr; |
164 | auto R = FunctionsWithoutProfile.find(Key: IRFuncName); |
165 | if (R != FunctionsWithoutProfile.end()) |
166 | FuncWithoutProfile = R->second; |
167 | return !FuncWithoutProfile; |
168 | } |
169 | |
170 | bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) { |
171 | return SymbolMap->find(Key: ProfileFuncName) == SymbolMap->end(); |
172 | } |
173 | |
174 | bool SampleProfileMatcher::functionMatchesProfile( |
175 | const FunctionId &IRFuncName, const FunctionId &ProfileFuncName, |
176 | bool FindMatchedProfileOnly) { |
177 | if (IRFuncName == ProfileFuncName) |
178 | return true; |
179 | if (!SalvageUnusedProfile) |
180 | return false; |
181 | |
182 | // If IR function doesn't have profile and the profile is unused, try |
183 | // matching them. |
184 | Function *IRFunc = nullptr; |
185 | if (functionHasProfile(IRFuncName, FuncWithoutProfile&: IRFunc) || |
186 | !isProfileUnused(ProfileFuncName)) |
187 | return false; |
188 | |
189 | assert(FunctionId(IRFunc->getName()) != ProfileFuncName && |
190 | "IR function should be different from profile function to match" ); |
191 | return functionMatchesProfile(IRFunc&: *IRFunc, ProfFunc: ProfileFuncName, |
192 | FindMatchedProfileOnly); |
193 | } |
194 | |
195 | LocToLocMap |
196 | SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1, |
197 | const AnchorList &AnchorList2, |
198 | bool MatchUnusedFunction) { |
199 | LocToLocMap MatchedAnchors; |
200 | llvm::longestCommonSequence<LineLocation, FunctionId>( |
201 | AnchorList1, AnchorList2, |
202 | FunctionMatchesProfile: [&](const FunctionId &A, const FunctionId &B) { |
203 | return functionMatchesProfile( |
204 | IRFuncName: A, ProfileFuncName: B, |
205 | FindMatchedProfileOnly: !MatchUnusedFunction // Find matched function only |
206 | ); |
207 | }, |
208 | InsertMatching: [&](LineLocation A, LineLocation B) { |
209 | MatchedAnchors.try_emplace(k: A, args&: B); |
210 | }); |
211 | return MatchedAnchors; |
212 | } |
213 | |
214 | void SampleProfileMatcher::matchNonCallsiteLocs( |
215 | const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors, |
216 | LocToLocMap &IRToProfileLocationMap) { |
217 | auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) { |
218 | // Skip the unchanged location mapping to save memory. |
219 | if (From != To) |
220 | IRToProfileLocationMap.insert(x: {From, To}); |
221 | }; |
222 | |
223 | // Use function's beginning location as the initial anchor. |
224 | int32_t LocationDelta = 0; |
225 | SmallVector<LineLocation> LastMatchedNonAnchors; |
226 | for (const auto &IR : IRAnchors) { |
227 | const auto &Loc = IR.first; |
228 | bool IsMatchedAnchor = false; |
229 | // Match the anchor location in lexical order. |
230 | auto R = MatchedAnchors.find(x: Loc); |
231 | if (R != MatchedAnchors.end()) { |
232 | const auto &Candidate = R->second; |
233 | InsertMatching(Loc, Candidate); |
234 | LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef() |
235 | << " is matched from " << Loc << " to " << Candidate |
236 | << "\n" ); |
237 | LocationDelta = Candidate.LineOffset - Loc.LineOffset; |
238 | |
239 | // Match backwards for non-anchor locations. |
240 | // The locations in LastMatchedNonAnchors have been matched forwards |
241 | // based on the previous anchor, spilt it evenly and overwrite the |
242 | // second half based on the current anchor. |
243 | for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2; |
244 | I < LastMatchedNonAnchors.size(); I++) { |
245 | const auto &L = LastMatchedNonAnchors[I]; |
246 | uint32_t CandidateLineOffset = L.LineOffset + LocationDelta; |
247 | LineLocation Candidate(CandidateLineOffset, L.Discriminator); |
248 | InsertMatching(L, Candidate); |
249 | LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L |
250 | << " to " << Candidate << "\n" ); |
251 | } |
252 | |
253 | IsMatchedAnchor = true; |
254 | LastMatchedNonAnchors.clear(); |
255 | } |
256 | |
257 | // Match forwards for non-anchor locations. |
258 | if (!IsMatchedAnchor) { |
259 | uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta; |
260 | LineLocation Candidate(CandidateLineOffset, Loc.Discriminator); |
261 | InsertMatching(Loc, Candidate); |
262 | LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to " |
263 | << Candidate << "\n" ); |
264 | LastMatchedNonAnchors.emplace_back(Args: Loc); |
265 | } |
266 | } |
267 | } |
268 | |
269 | // Filter the non-call locations from IRAnchors and ProfileAnchors and write |
270 | // them into a list for random access later. |
271 | void SampleProfileMatcher::getFilteredAnchorList( |
272 | const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors, |
273 | AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) { |
274 | for (const auto &I : IRAnchors) { |
275 | if (I.second.stringRef().empty()) |
276 | continue; |
277 | FilteredIRAnchorsList.emplace_back(args: I); |
278 | } |
279 | |
280 | for (const auto &I : ProfileAnchors) |
281 | FilteredProfileAnchorList.emplace_back(args: I); |
282 | } |
283 | |
284 | // Call target name anchor based profile fuzzy matching. |
285 | // Input: |
286 | // For IR locations, the anchor is the callee name of direct callsite; For |
287 | // profile locations, it's the call target name for BodySamples or inlinee's |
288 | // profile name for CallsiteSamples. |
289 | // Matching heuristic: |
290 | // First match all the anchors using the diff algorithm, then split the |
291 | // non-anchor locations between the two anchors evenly, first half are matched |
292 | // based on the start anchor, second half are matched based on the end anchor. |
293 | // For example, given: |
294 | // IR locations: [1, 2(foo), 3, 5, 6(bar), 7] |
295 | // Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9] |
296 | // The matching gives: |
297 | // [1, 2(foo), 3, 5, 6(bar), 7] |
298 | // | | | | | | |
299 | // [1, 2, 3(foo), 4, 7, 8(bar), 9] |
300 | // The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9]. |
301 | void SampleProfileMatcher::runStaleProfileMatching( |
302 | const Function &F, const AnchorMap &IRAnchors, |
303 | const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap, |
304 | bool RunCFGMatching, bool RunCGMatching) { |
305 | if (!RunCFGMatching && !RunCGMatching) |
306 | return; |
307 | LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName() |
308 | << "\n" ); |
309 | assert(IRToProfileLocationMap.empty() && |
310 | "Run stale profile matching only once per function" ); |
311 | |
312 | AnchorList FilteredProfileAnchorList; |
313 | AnchorList FilteredIRAnchorsList; |
314 | getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList, |
315 | FilteredProfileAnchorList); |
316 | |
317 | if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty()) |
318 | return; |
319 | |
320 | if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites || |
321 | FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) { |
322 | LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName() |
323 | << " because the number of callsites in the IR is " |
324 | << FilteredIRAnchorsList.size() |
325 | << " and in the profile is " |
326 | << FilteredProfileAnchorList.size() << "\n" ); |
327 | return; |
328 | } |
329 | |
330 | // Match the callsite anchors by finding the longest common subsequence |
331 | // between IR and profile. |
332 | // Define a match between two anchors as follows: |
333 | // 1) The function names of anchors are the same. |
334 | // 2) The similarity between the anchor functions is above a threshold if |
335 | // RunCGMatching is set. |
336 | // For 2), we only consider the anchor functions from IR and profile don't |
337 | // appear on either side to reduce the matching scope. Note that we need to |
338 | // use IR anchor as base(A side) to align with the order of |
339 | // IRToProfileLocationMap. |
340 | LocToLocMap MatchedAnchors = |
341 | longestCommonSequence(AnchorList1: FilteredIRAnchorsList, AnchorList2: FilteredProfileAnchorList, |
342 | MatchUnusedFunction: RunCGMatching /* Match unused functions */); |
343 | |
344 | // CFG level matching: |
345 | // Apply the callsite matchings to infer matching for the basic |
346 | // block(non-callsite) locations and write the result to |
347 | // IRToProfileLocationMap. |
348 | if (RunCFGMatching) |
349 | matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap); |
350 | } |
351 | |
352 | void SampleProfileMatcher::runOnFunction(Function &F) { |
353 | // We need to use flattened function samples for matching. |
354 | // Unlike IR, which includes all callsites from the source code, the callsites |
355 | // in profile only show up when they are hit by samples, i,e. the profile |
356 | // callsites in one context may differ from those in another context. To get |
357 | // the maximum number of callsites, we merge the function profiles from all |
358 | // contexts, aka, the flattened profile to find profile anchors. |
359 | const auto *FSForMatching = getFlattenedSamplesFor(F); |
360 | if (SalvageUnusedProfile && !FSForMatching) { |
361 | // Apply the matching in place to find the new function's matched profile. |
362 | auto R = FuncToProfileNameMap.find(x: &F); |
363 | if (R != FuncToProfileNameMap.end()) { |
364 | FSForMatching = getFlattenedSamplesFor(Fname: R->second); |
365 | // Try to find the salvaged top-level profiles that are explicitly loaded |
366 | // for the matching, see "functionMatchesProfileHelper" for the details. |
367 | if (!FSForMatching && LoadFuncProfileforCGMatching) |
368 | FSForMatching = Reader.getSamplesFor(Fname: R->second.stringRef()); |
369 | } |
370 | } |
371 | if (!FSForMatching) |
372 | return; |
373 | |
374 | // Anchors for IR. It's a map from IR location to callee name, callee name is |
375 | // empty for non-call instruction and use a dummy name(UnknownIndirectCallee) |
376 | // for unknown indrect callee name. |
377 | AnchorMap IRAnchors; |
378 | findIRAnchors(F, IRAnchors); |
379 | // Anchors for profile. It's a map from callsite location to a set of callee |
380 | // name. |
381 | AnchorMap ProfileAnchors; |
382 | findProfileAnchors(FS: *FSForMatching, ProfileAnchors); |
383 | |
384 | // Compute the callsite match states for profile staleness report. |
385 | if (ReportProfileStaleness || PersistProfileStaleness) |
386 | recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap: nullptr); |
387 | |
388 | if (!SalvageStaleProfile) |
389 | return; |
390 | // For probe-based profiles, run matching only when profile checksum is |
391 | // mismatched. |
392 | bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased && |
393 | !ProbeManager->profileIsValid(F, Samples: *FSForMatching); |
394 | bool RunCFGMatching = |
395 | !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch; |
396 | bool RunCGMatching = SalvageUnusedProfile; |
397 | // For imported functions, the checksum metadata(pseudo_probe_desc) are |
398 | // dropped, so we leverage function attribute(profile-checksum-mismatch) to |
399 | // transfer the info: add the attribute during pre-link phase and check it |
400 | // during post-link phase(see "profileIsValid"). |
401 | if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) |
402 | F.addFnAttr(Kind: "profile-checksum-mismatch" ); |
403 | |
404 | // The matching result will be saved to IRToProfileLocationMap, create a |
405 | // new map for each function. |
406 | auto &IRToProfileLocationMap = getIRToProfileLocationMap(F); |
407 | runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap, |
408 | RunCFGMatching, RunCGMatching); |
409 | // Find and update callsite match states after matching. |
410 | if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness)) |
411 | recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, |
412 | IRToProfileLocationMap: &IRToProfileLocationMap); |
413 | } |
414 | |
415 | void SampleProfileMatcher::recordCallsiteMatchStates( |
416 | const Function &F, const AnchorMap &IRAnchors, |
417 | const AnchorMap &ProfileAnchors, |
418 | const LocToLocMap *IRToProfileLocationMap) { |
419 | bool IsPostMatch = IRToProfileLocationMap != nullptr; |
420 | auto &CallsiteMatchStates = |
421 | FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(FnName: F.getName())]; |
422 | |
423 | auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) { |
424 | // IRToProfileLocationMap is null in pre-match phrase. |
425 | if (!IRToProfileLocationMap) |
426 | return IRLoc; |
427 | const auto &ProfileLoc = IRToProfileLocationMap->find(x: IRLoc); |
428 | if (ProfileLoc != IRToProfileLocationMap->end()) |
429 | return ProfileLoc->second; |
430 | else |
431 | return IRLoc; |
432 | }; |
433 | |
434 | for (const auto &I : IRAnchors) { |
435 | // After fuzzy profile matching, use the matching result to remap the |
436 | // current IR callsite. |
437 | const auto &ProfileLoc = MapIRLocToProfileLoc(I.first); |
438 | const auto &IRCalleeId = I.second; |
439 | const auto &It = ProfileAnchors.find(x: ProfileLoc); |
440 | if (It == ProfileAnchors.end()) |
441 | continue; |
442 | const auto &ProfCalleeId = It->second; |
443 | if (IRCalleeId == ProfCalleeId) { |
444 | auto It = CallsiteMatchStates.find(x: ProfileLoc); |
445 | if (It == CallsiteMatchStates.end()) |
446 | CallsiteMatchStates.emplace(args: ProfileLoc, args: MatchState::InitialMatch); |
447 | else if (IsPostMatch) { |
448 | if (It->second == MatchState::InitialMatch) |
449 | It->second = MatchState::UnchangedMatch; |
450 | else if (It->second == MatchState::InitialMismatch) |
451 | It->second = MatchState::RecoveredMismatch; |
452 | } |
453 | } |
454 | } |
455 | |
456 | // Check if there are any callsites in the profile that does not match to any |
457 | // IR callsites. |
458 | for (const auto &I : ProfileAnchors) { |
459 | const auto &Loc = I.first; |
460 | assert(!I.second.stringRef().empty() && "Callees should not be empty" ); |
461 | auto It = CallsiteMatchStates.find(x: Loc); |
462 | if (It == CallsiteMatchStates.end()) |
463 | CallsiteMatchStates.emplace(args: Loc, args: MatchState::InitialMismatch); |
464 | else if (IsPostMatch) { |
465 | // Update the state if it's not matched(UnchangedMatch or |
466 | // RecoveredMismatch). |
467 | if (It->second == MatchState::InitialMismatch) |
468 | It->second = MatchState::UnchangedMismatch; |
469 | else if (It->second == MatchState::InitialMatch) |
470 | It->second = MatchState::RemovedMatch; |
471 | } |
472 | } |
473 | } |
474 | |
475 | void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS, |
476 | bool IsTopLevel) { |
477 | const auto *FuncDesc = ProbeManager->getDesc(GUID: FS.getGUID()); |
478 | // Skip the function that is external or renamed. |
479 | if (!FuncDesc) |
480 | return; |
481 | |
482 | if (ProbeManager->profileIsHashMismatched(FuncDesc: *FuncDesc, Samples: FS)) { |
483 | if (IsTopLevel) |
484 | NumStaleProfileFunc++; |
485 | // Given currently all probe ids are after block probe ids, once the |
486 | // checksum is mismatched, it's likely all the callites are mismatched and |
487 | // dropped. We conservatively count all the samples as mismatched and stop |
488 | // counting the inlinees' profiles. |
489 | MismatchedFunctionSamples += FS.getTotalSamples(); |
490 | return; |
491 | } |
492 | |
493 | // Even the current-level function checksum is matched, it's possible that the |
494 | // nested inlinees' checksums are mismatched that affect the inlinee's sample |
495 | // loading, we need to go deeper to check the inlinees' function samples. |
496 | // Similarly, count all the samples as mismatched if the inlinee's checksum is |
497 | // mismatched using this recursive function. |
498 | for (const auto &I : FS.getCallsiteSamples()) |
499 | for (const auto &CS : I.second) |
500 | countMismatchedFuncSamples(FS: CS.second, IsTopLevel: false); |
501 | } |
502 | |
503 | void SampleProfileMatcher::countMismatchedCallsiteSamples( |
504 | const FunctionSamples &FS) { |
505 | auto It = FuncCallsiteMatchStates.find(Key: FS.getFuncName()); |
506 | // Skip it if no mismatched callsite or this is an external function. |
507 | if (It == FuncCallsiteMatchStates.end() || It->second.empty()) |
508 | return; |
509 | const auto &CallsiteMatchStates = It->second; |
510 | |
511 | auto findMatchState = [&](const LineLocation &Loc) { |
512 | auto It = CallsiteMatchStates.find(x: Loc); |
513 | if (It == CallsiteMatchStates.end()) |
514 | return MatchState::Unknown; |
515 | return It->second; |
516 | }; |
517 | |
518 | auto AttributeMismatchedSamples = [&](const enum MatchState &State, |
519 | uint64_t Samples) { |
520 | if (isMismatchState(State)) |
521 | MismatchedCallsiteSamples += Samples; |
522 | else if (State == MatchState::RecoveredMismatch) |
523 | RecoveredCallsiteSamples += Samples; |
524 | }; |
525 | |
526 | // The non-inlined callsites are saved in the body samples of function |
527 | // profile, go through it to count the non-inlined callsite samples. |
528 | for (const auto &I : FS.getBodySamples()) |
529 | AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples()); |
530 | |
531 | // Count the inlined callsite samples. |
532 | for (const auto &I : FS.getCallsiteSamples()) { |
533 | auto State = findMatchState(I.first); |
534 | uint64_t CallsiteSamples = 0; |
535 | for (const auto &CS : I.second) |
536 | CallsiteSamples += CS.second.getTotalSamples(); |
537 | AttributeMismatchedSamples(State, CallsiteSamples); |
538 | |
539 | if (isMismatchState(State)) |
540 | continue; |
541 | |
542 | // When the current level of inlined call site matches the profiled call |
543 | // site, we need to go deeper along the inline tree to count mismatches from |
544 | // lower level inlinees. |
545 | for (const auto &CS : I.second) |
546 | countMismatchedCallsiteSamples(FS: CS.second); |
547 | } |
548 | } |
549 | |
550 | void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) { |
551 | auto It = FuncCallsiteMatchStates.find(Key: FS.getFuncName()); |
552 | // Skip it if no mismatched callsite or this is an external function. |
553 | if (It == FuncCallsiteMatchStates.end() || It->second.empty()) |
554 | return; |
555 | const auto &MatchStates = It->second; |
556 | [[maybe_unused]] bool OnInitialState = |
557 | isInitialState(State: MatchStates.begin()->second); |
558 | for (const auto &I : MatchStates) { |
559 | TotalProfiledCallsites++; |
560 | assert( |
561 | (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) && |
562 | "Profile matching state is inconsistent" ); |
563 | |
564 | if (isMismatchState(State: I.second)) |
565 | NumMismatchedCallsites++; |
566 | else if (I.second == MatchState::RecoveredMismatch) |
567 | NumRecoveredCallsites++; |
568 | } |
569 | } |
570 | |
571 | void SampleProfileMatcher::countCallGraphRecoveredSamples( |
572 | const FunctionSamples &FS, |
573 | std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) { |
574 | if (CallGraphRecoveredProfiles.count(x: FS.getFunction())) { |
575 | NumCallGraphRecoveredFuncSamples += FS.getTotalSamples(); |
576 | return; |
577 | } |
578 | |
579 | for (const auto &CM : FS.getCallsiteSamples()) { |
580 | for (const auto &CS : CM.second) { |
581 | countCallGraphRecoveredSamples(FS: CS.second, CallGraphRecoveredProfiles); |
582 | } |
583 | } |
584 | } |
585 | |
586 | void SampleProfileMatcher::computeAndReportProfileStaleness() { |
587 | if (!ReportProfileStaleness && !PersistProfileStaleness) |
588 | return; |
589 | |
590 | std::unordered_set<FunctionId> CallGraphRecoveredProfiles; |
591 | if (SalvageUnusedProfile) { |
592 | for (const auto &I : FuncToProfileNameMap) { |
593 | CallGraphRecoveredProfiles.insert(x: I.second); |
594 | if (GlobalValue::isAvailableExternallyLinkage(Linkage: I.first->getLinkage())) |
595 | continue; |
596 | NumCallGraphRecoveredProfiledFunc++; |
597 | } |
598 | } |
599 | |
600 | // Count profile mismatches for profile staleness report. |
601 | for (const auto &F : M) { |
602 | if (skipProfileForFunction(F)) |
603 | continue; |
604 | // As the stats will be merged by linker, skip reporting the metrics for |
605 | // imported functions to avoid repeated counting. |
606 | if (GlobalValue::isAvailableExternallyLinkage(Linkage: F.getLinkage())) |
607 | continue; |
608 | const auto *FS = Reader.getSamplesFor(F); |
609 | if (!FS) |
610 | continue; |
611 | TotalProfiledFunc++; |
612 | TotalFunctionSamples += FS->getTotalSamples(); |
613 | |
614 | if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty()) |
615 | countCallGraphRecoveredSamples(FS: *FS, CallGraphRecoveredProfiles); |
616 | |
617 | // Checksum mismatch is only used in pseudo-probe mode. |
618 | if (FunctionSamples::ProfileIsProbeBased) |
619 | countMismatchedFuncSamples(FS: *FS, IsTopLevel: true); |
620 | |
621 | // Count mismatches and samples for calliste. |
622 | countMismatchCallsites(FS: *FS); |
623 | countMismatchedCallsiteSamples(FS: *FS); |
624 | } |
625 | |
626 | if (ReportProfileStaleness) { |
627 | if (FunctionSamples::ProfileIsProbeBased) { |
628 | errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc |
629 | << ") of functions' profile are invalid and (" |
630 | << MismatchedFunctionSamples << "/" << TotalFunctionSamples |
631 | << ") of samples are discarded due to function hash mismatch.\n" ; |
632 | } |
633 | if (SalvageUnusedProfile) { |
634 | errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/" |
635 | << TotalProfiledFunc << ") of functions' profile are matched and (" |
636 | << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples |
637 | << ") of samples are reused by call graph matching.\n" ; |
638 | } |
639 | |
640 | errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/" |
641 | << TotalProfiledCallsites |
642 | << ") of callsites' profile are invalid and (" |
643 | << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/" |
644 | << TotalFunctionSamples |
645 | << ") of samples are discarded due to callsite location mismatch.\n" ; |
646 | errs() << "(" << NumRecoveredCallsites << "/" |
647 | << (NumRecoveredCallsites + NumMismatchedCallsites) |
648 | << ") of callsites and (" << RecoveredCallsiteSamples << "/" |
649 | << (RecoveredCallsiteSamples + MismatchedCallsiteSamples) |
650 | << ") of samples are recovered by stale profile matching.\n" ; |
651 | } |
652 | |
653 | if (PersistProfileStaleness) { |
654 | LLVMContext &Ctx = M.getContext(); |
655 | MDBuilder MDB(Ctx); |
656 | |
657 | SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec; |
658 | if (FunctionSamples::ProfileIsProbeBased) { |
659 | ProfStatsVec.emplace_back(Args: "NumStaleProfileFunc" , Args&: NumStaleProfileFunc); |
660 | ProfStatsVec.emplace_back(Args: "TotalProfiledFunc" , Args&: TotalProfiledFunc); |
661 | ProfStatsVec.emplace_back(Args: "MismatchedFunctionSamples" , |
662 | Args&: MismatchedFunctionSamples); |
663 | ProfStatsVec.emplace_back(Args: "TotalFunctionSamples" , Args&: TotalFunctionSamples); |
664 | } |
665 | |
666 | if (SalvageUnusedProfile) { |
667 | ProfStatsVec.emplace_back(Args: "NumCallGraphRecoveredProfiledFunc" , |
668 | Args&: NumCallGraphRecoveredProfiledFunc); |
669 | ProfStatsVec.emplace_back(Args: "NumCallGraphRecoveredFuncSamples" , |
670 | Args&: NumCallGraphRecoveredFuncSamples); |
671 | } |
672 | |
673 | ProfStatsVec.emplace_back(Args: "NumMismatchedCallsites" , Args&: NumMismatchedCallsites); |
674 | ProfStatsVec.emplace_back(Args: "NumRecoveredCallsites" , Args&: NumRecoveredCallsites); |
675 | ProfStatsVec.emplace_back(Args: "TotalProfiledCallsites" , Args&: TotalProfiledCallsites); |
676 | ProfStatsVec.emplace_back(Args: "MismatchedCallsiteSamples" , |
677 | Args&: MismatchedCallsiteSamples); |
678 | ProfStatsVec.emplace_back(Args: "RecoveredCallsiteSamples" , |
679 | Args&: RecoveredCallsiteSamples); |
680 | |
681 | auto *MD = MDB.createLLVMStats(LLVMStatsVec: ProfStatsVec); |
682 | auto *NMD = M.getOrInsertNamedMetadata(Name: "llvm.stats" ); |
683 | NMD->addOperand(M: MD); |
684 | } |
685 | } |
686 | |
687 | void SampleProfileMatcher::findFunctionsWithoutProfile() { |
688 | // TODO: Support MD5 profile. |
689 | if (FunctionSamples::UseMD5) |
690 | return; |
691 | StringSet<> NamesInProfile; |
692 | if (auto NameTable = Reader.getNameTable()) { |
693 | for (auto Name : *NameTable) |
694 | NamesInProfile.insert(key: Name.stringRef()); |
695 | } |
696 | |
697 | for (auto &F : M) { |
698 | // Skip declarations, as even if the function can be matched, we have |
699 | // nothing to do with it. |
700 | if (F.isDeclaration()) |
701 | continue; |
702 | |
703 | StringRef CanonFName = FunctionSamples::getCanonicalFnName(FnName: F.getName()); |
704 | const auto *FS = getFlattenedSamplesFor(F); |
705 | if (FS) |
706 | continue; |
707 | |
708 | // For extended binary, functions fully inlined may not be loaded in the |
709 | // top-level profile, so check the NameTable which has the all symbol names |
710 | // in profile. |
711 | if (NamesInProfile.count(Key: CanonFName)) |
712 | continue; |
713 | |
714 | // For extended binary, non-profiled function symbols are in the profile |
715 | // symbol list table. |
716 | if (PSL && PSL->contains(Name: CanonFName)) |
717 | continue; |
718 | |
719 | LLVM_DEBUG(dbgs() << "Function " << CanonFName |
720 | << " is not in profile or profile symbol list.\n" ); |
721 | FunctionsWithoutProfile[FunctionId(CanonFName)] = &F; |
722 | } |
723 | } |
724 | |
725 | bool SampleProfileMatcher::functionMatchesProfileHelper( |
726 | const Function &IRFunc, const FunctionId &ProfFunc) { |
727 | // The value is in the range [0, 1]. The bigger the value is, the more similar |
728 | // two sequences are. |
729 | float Similarity = 0.0; |
730 | |
731 | // Match the functions if they have the same base name(after demangling) and |
732 | // skip the similarity check. |
733 | ItaniumPartialDemangler Demangler; |
734 | // Helper lambda to demangle and get the base name. If the demangling failed, |
735 | // return an empty string. |
736 | auto GetBaseName = [&](StringRef FName) { |
737 | auto FunctionName = FName.str(); |
738 | if (Demangler.partialDemangle(MangledName: FunctionName.c_str())) |
739 | return std::string(); |
740 | size_t BaseNameSize = 0; |
741 | // The demangler API follows the __cxa_demangle one, and thus needs a |
742 | // pointer that originates from malloc (or nullptr) and the caller is |
743 | // responsible for free()-ing the buffer. |
744 | char *BaseNamePtr = Demangler.getFunctionBaseName(Buf: nullptr, N: &BaseNameSize); |
745 | std::string Result = (BaseNamePtr && BaseNameSize) |
746 | ? std::string(BaseNamePtr, BaseNameSize) |
747 | : std::string(); |
748 | free(ptr: BaseNamePtr); |
749 | return Result; |
750 | }; |
751 | auto IRBaseName = GetBaseName(IRFunc.getName()); |
752 | auto ProfBaseName = GetBaseName(ProfFunc.stringRef()); |
753 | if (!IRBaseName.empty() && IRBaseName == ProfBaseName) { |
754 | LLVM_DEBUG(dbgs() << "The functions " << IRFunc.getName() << "(IR) and " |
755 | << ProfFunc << "(Profile) share the same base name: " |
756 | << IRBaseName << ".\n" ); |
757 | return true; |
758 | } |
759 | |
760 | const auto *FSForMatching = getFlattenedSamplesFor(Fname: ProfFunc); |
761 | // With extbinary profile format, initial profile loading only reads profile |
762 | // based on current function names in the module. |
763 | // However, if a function is renamed, sample loader skips to load its original |
764 | // profile(which has a different name), we will miss this case. To address |
765 | // this, we load the top-level profile candidate explicitly for the matching. |
766 | if (!FSForMatching && LoadFuncProfileforCGMatching) { |
767 | DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()}); |
768 | if (std::error_code EC = Reader.read(FuncsToUse: TopLevelFunc)) |
769 | return false; |
770 | FSForMatching = Reader.getSamplesFor(Fname: ProfFunc.stringRef()); |
771 | LLVM_DEBUG({ |
772 | if (FSForMatching) |
773 | dbgs() << "Read top-level function " << ProfFunc |
774 | << " for call-graph matching\n" ; |
775 | }); |
776 | } |
777 | if (!FSForMatching) |
778 | return false; |
779 | // The check for similarity or checksum may not be reliable if the function is |
780 | // tiny, we use the number of basic block as a proxy for the function |
781 | // complexity and skip the matching if it's too small. |
782 | if (IRFunc.size() < MinFuncCountForCGMatching || |
783 | FSForMatching->getBodySamples().size() < MinFuncCountForCGMatching) |
784 | return false; |
785 | |
786 | // For probe-based function, we first trust the checksum info. If the checksum |
787 | // doesn't match, we continue checking for similarity. |
788 | if (FunctionSamples::ProfileIsProbeBased) { |
789 | const auto *FuncDesc = ProbeManager->getDesc(F: IRFunc); |
790 | if (FuncDesc && |
791 | !ProbeManager->profileIsHashMismatched(FuncDesc: *FuncDesc, Samples: *FSForMatching)) { |
792 | LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName() |
793 | << "(IR) and " << ProfFunc << "(Profile) match.\n" ); |
794 | |
795 | return true; |
796 | } |
797 | } |
798 | |
799 | AnchorMap IRAnchors; |
800 | findIRAnchors(F: IRFunc, IRAnchors); |
801 | AnchorMap ProfileAnchors; |
802 | findProfileAnchors(FS: *FSForMatching, ProfileAnchors); |
803 | |
804 | AnchorList FilteredIRAnchorsList; |
805 | AnchorList FilteredProfileAnchorList; |
806 | getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList, |
807 | FilteredProfileAnchorList); |
808 | |
809 | // Similarly skip the matching if the num of anchors is not enough. |
810 | if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching || |
811 | FilteredProfileAnchorList.size() < MinCallCountForCGMatching) |
812 | return false; |
813 | |
814 | // Use the diff algorithm to find the LCS between IR and profile. |
815 | |
816 | // Don't recursively match the callee function to avoid infinite matching, |
817 | // callee functions will be handled later since it's processed in top-down |
818 | // order . |
819 | LocToLocMap MatchedAnchors = |
820 | longestCommonSequence(AnchorList1: FilteredIRAnchorsList, AnchorList2: FilteredProfileAnchorList, |
821 | MatchUnusedFunction: false /* Match unused functions */); |
822 | |
823 | Similarity = static_cast<float>(MatchedAnchors.size()) / |
824 | FilteredProfileAnchorList.size(); |
825 | |
826 | LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName() |
827 | << "(IR) and " << ProfFunc << "(profile) is " |
828 | << format("%.2f" , Similarity) << "\n" ); |
829 | assert((Similarity >= 0 && Similarity <= 1.0) && |
830 | "Similarity value should be in [0, 1]" ); |
831 | return Similarity * 100 > FuncProfileSimilarityThreshold; |
832 | } |
833 | |
834 | // If FindMatchedProfileOnly is set to true, only use the processed function |
835 | // results. This is used for skipping the repeated recursive matching. |
836 | bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc, |
837 | const FunctionId &ProfFunc, |
838 | bool FindMatchedProfileOnly) { |
839 | auto R = FuncProfileMatchCache.find(x: {&IRFunc, ProfFunc}); |
840 | if (R != FuncProfileMatchCache.end()) |
841 | return R->second; |
842 | |
843 | if (FindMatchedProfileOnly) |
844 | return false; |
845 | |
846 | bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc); |
847 | FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched; |
848 | if (Matched) { |
849 | FuncToProfileNameMap[&IRFunc] = ProfFunc; |
850 | LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName() |
851 | << " matches profile:" << ProfFunc << "\n" ); |
852 | } |
853 | |
854 | return Matched; |
855 | } |
856 | |
857 | void SampleProfileMatcher::UpdateWithSalvagedProfiles() { |
858 | DenseSet<StringRef> ProfileSalvagedFuncs; |
859 | // Update FuncNameToProfNameMap and SymbolMap. |
860 | for (auto &I : FuncToProfileNameMap) { |
861 | assert(I.first && "New function is null" ); |
862 | FunctionId FuncName(I.first->getName()); |
863 | ProfileSalvagedFuncs.insert(V: I.second.stringRef()); |
864 | FuncNameToProfNameMap->emplace(Args&: FuncName, Args&: I.second); |
865 | |
866 | // We need to remove the old entry to avoid duplicating the function |
867 | // processing. |
868 | SymbolMap->erase(Ctx: FuncName); |
869 | SymbolMap->emplace(Args&: I.second, Args: I.first); |
870 | } |
871 | |
872 | // With extbinary profile format, initial profile loading only reads profile |
873 | // based on current function names in the module, so we need to load top-level |
874 | // profiles for functions with different profile name explicitly after |
875 | // function-profile name map is established with stale profile matching. |
876 | Reader.read(FuncsToUse: ProfileSalvagedFuncs); |
877 | Reader.setFuncNameToProfNameMap(*FuncNameToProfNameMap); |
878 | } |
879 | |
880 | void SampleProfileMatcher::runOnModule() { |
881 | ProfileConverter::flattenProfile(InputProfiles: Reader.getProfiles(), OutputProfiles&: FlattenedProfiles, |
882 | ProfileIsCS: FunctionSamples::ProfileIsCS); |
883 | if (SalvageUnusedProfile) |
884 | findFunctionsWithoutProfile(); |
885 | |
886 | // Process the matching in top-down order so that the caller matching result |
887 | // can be used to the callee matching. |
888 | std::vector<Function *> TopDownFunctionList; |
889 | TopDownFunctionList.reserve(n: M.size()); |
890 | buildTopDownFuncOrder(CG, FunctionOrderList&: TopDownFunctionList); |
891 | for (auto *F : TopDownFunctionList) { |
892 | if (skipProfileForFunction(F: *F)) |
893 | continue; |
894 | runOnFunction(F&: *F); |
895 | } |
896 | |
897 | if (SalvageUnusedProfile) |
898 | UpdateWithSalvagedProfiles(); |
899 | |
900 | if (SalvageStaleProfile) |
901 | distributeIRToProfileLocationMap(); |
902 | |
903 | computeAndReportProfileStaleness(); |
904 | } |
905 | |
906 | void SampleProfileMatcher::distributeIRToProfileLocationMap( |
907 | FunctionSamples &FS) { |
908 | const auto ProfileMappings = FuncMappings.find(Key: FS.getFuncName()); |
909 | if (ProfileMappings != FuncMappings.end()) { |
910 | FS.setIRToProfileLocationMap(&(ProfileMappings->second)); |
911 | } |
912 | |
913 | for (auto &Callees : |
914 | const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) { |
915 | for (auto &FS : Callees.second) { |
916 | distributeIRToProfileLocationMap(FS&: FS.second); |
917 | } |
918 | } |
919 | } |
920 | |
921 | // Use a central place to distribute the matching results. Outlined and inlined |
922 | // profile with the function name will be set to the same pointer. |
923 | void SampleProfileMatcher::distributeIRToProfileLocationMap() { |
924 | for (auto &I : Reader.getProfiles()) { |
925 | distributeIRToProfileLocationMap(FS&: I.second); |
926 | } |
927 | } |
928 | |