| 1 | //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | #include "ProfileGenerator.h" |
| 9 | #include "ErrorHandling.h" |
| 10 | #include "MissingFrameInferrer.h" |
| 11 | #include "PerfReader.h" |
| 12 | #include "ProfiledBinary.h" |
| 13 | #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" |
| 14 | #include "llvm/ProfileData/ProfileCommon.h" |
| 15 | #include <algorithm> |
| 16 | #include <float.h> |
| 17 | #include <unordered_set> |
| 18 | #include <utility> |
| 19 | |
| 20 | cl::opt<std::string> OutputFilename("output" , cl::value_desc("output" ), |
| 21 | cl::Required, |
| 22 | cl::desc("Output profile file" )); |
| 23 | static cl::alias OutputA("o" , cl::desc("Alias for --output" ), |
| 24 | cl::aliasopt(OutputFilename)); |
| 25 | |
| 26 | static cl::opt<SampleProfileFormat> OutputFormat( |
| 27 | "format" , cl::desc("Format of output profile" ), cl::init(Val: SPF_Ext_Binary), |
| 28 | cl::values( |
| 29 | clEnumValN(SPF_Binary, "binary" , "Binary encoding (default)" ), |
| 30 | clEnumValN(SPF_Ext_Binary, "extbinary" , "Extensible binary encoding" ), |
| 31 | clEnumValN(SPF_Text, "text" , "Text encoding" ), |
| 32 | clEnumValN(SPF_GCC, "gcc" , |
| 33 | "GCC encoding (only meaningful for -sample)" ))); |
| 34 | |
| 35 | static cl::opt<bool> UseMD5( |
| 36 | "use-md5" , cl::Hidden, |
| 37 | cl::desc("Use md5 to represent function names in the output profile (only " |
| 38 | "meaningful for -extbinary)" )); |
| 39 | |
| 40 | static cl::opt<bool> PopulateProfileSymbolList( |
| 41 | "populate-profile-symbol-list" , cl::init(Val: false), cl::Hidden, |
| 42 | cl::desc("Populate profile symbol list (only meaningful for -extbinary)" )); |
| 43 | |
| 44 | static cl::opt<bool> FillZeroForAllFuncs( |
| 45 | "fill-zero-for-all-funcs" , cl::init(Val: false), cl::Hidden, |
| 46 | cl::desc("Attribute all functions' range with zero count " |
| 47 | "even it's not hit by any samples." )); |
| 48 | |
| 49 | static cl::opt<int32_t, true> RecursionCompression( |
| 50 | "compress-recursion" , |
| 51 | cl::desc("Compressing recursion by deduplicating adjacent frame " |
| 52 | "sequences up to the specified size. -1 means no size limit." ), |
| 53 | cl::Hidden, |
| 54 | cl::location(L&: llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); |
| 55 | |
| 56 | static cl::opt<bool> |
| 57 | TrimColdProfile("trim-cold-profile" , |
| 58 | cl::desc("If the total count of the profile is smaller " |
| 59 | "than threshold, it will be trimmed." )); |
| 60 | |
| 61 | static cl::opt<bool> CSProfMergeColdContext( |
| 62 | "csprof-merge-cold-context" , cl::init(Val: true), |
| 63 | cl::desc("If the total count of context profile is smaller than " |
| 64 | "the threshold, it will be merged into context-less base " |
| 65 | "profile." )); |
| 66 | |
| 67 | static cl::opt<uint32_t> CSProfMaxColdContextDepth( |
| 68 | "csprof-max-cold-context-depth" , cl::init(Val: 1), |
| 69 | cl::desc("Keep the last K contexts while merging cold profile. 1 means the " |
| 70 | "context-less base profile" )); |
| 71 | |
| 72 | static cl::opt<int, true> CSProfMaxContextDepth( |
| 73 | "csprof-max-context-depth" , |
| 74 | cl::desc("Keep the last K contexts while merging profile. -1 means no " |
| 75 | "depth limit." ), |
| 76 | cl::location(L&: llvm::sampleprof::CSProfileGenerator::MaxContextDepth)); |
| 77 | |
| 78 | static cl::opt<double> ProfileDensityThreshold( |
| 79 | "profile-density-threshold" , llvm::cl::init(Val: 50), |
| 80 | llvm::cl::desc("If the profile density is below the given threshold, it " |
| 81 | "will be suggested to increase the sampling rate." ), |
| 82 | llvm::cl::Optional); |
| 83 | static cl::opt<bool> ShowDensity("show-density" , llvm::cl::init(Val: false), |
| 84 | llvm::cl::desc("show profile density details" ), |
| 85 | llvm::cl::Optional); |
| 86 | static cl::opt<int> ProfileDensityCutOffHot( |
| 87 | "profile-density-cutoff-hot" , llvm::cl::init(Val: 990000), |
| 88 | llvm::cl::desc("Total samples cutoff for functions used to calculate " |
| 89 | "profile density." )); |
| 90 | |
| 91 | static cl::opt<bool> UpdateTotalSamples( |
| 92 | "update-total-samples" , llvm::cl::init(Val: false), |
| 93 | llvm::cl::desc( |
| 94 | "Update total samples by accumulating all its body samples." ), |
| 95 | llvm::cl::Optional); |
| 96 | |
| 97 | static cl::opt<bool> GenCSNestedProfile( |
| 98 | "gen-cs-nested-profile" , cl::Hidden, cl::init(Val: true), |
| 99 | cl::desc("Generate nested function profiles for CSSPGO" )); |
| 100 | |
| 101 | cl::opt<bool> InferMissingFrames( |
| 102 | "infer-missing-frames" , llvm::cl::init(Val: true), |
| 103 | llvm::cl::desc( |
| 104 | "Infer missing call frames due to compiler tail call elimination." ), |
| 105 | llvm::cl::Optional); |
| 106 | |
| 107 | using namespace llvm; |
| 108 | using namespace sampleprof; |
| 109 | |
| 110 | namespace llvm { |
| 111 | |
| 112 | namespace sampleprof { |
| 113 | |
| 114 | // Initialize the MaxCompressionSize to -1 which means no size limit |
| 115 | int32_t CSProfileGenerator::MaxCompressionSize = -1; |
| 116 | |
| 117 | int CSProfileGenerator::MaxContextDepth = -1; |
| 118 | |
| 119 | bool ProfileGeneratorBase::UseFSDiscriminator = false; |
| 120 | |
| 121 | std::unique_ptr<ProfileGeneratorBase> |
| 122 | ProfileGeneratorBase::create(ProfiledBinary *Binary, |
| 123 | const ContextSampleCounterMap *SampleCounters, |
| 124 | bool ProfileIsCS) { |
| 125 | std::unique_ptr<ProfileGeneratorBase> Generator; |
| 126 | if (ProfileIsCS) { |
| 127 | Generator.reset(p: new CSProfileGenerator(Binary, SampleCounters)); |
| 128 | } else { |
| 129 | Generator.reset(p: new ProfileGenerator(Binary, SampleCounters)); |
| 130 | } |
| 131 | ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator(); |
| 132 | FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator(); |
| 133 | |
| 134 | return Generator; |
| 135 | } |
| 136 | |
| 137 | std::unique_ptr<ProfileGeneratorBase> |
| 138 | ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles, |
| 139 | bool ProfileIsCS) { |
| 140 | std::unique_ptr<ProfileGeneratorBase> Generator; |
| 141 | if (ProfileIsCS) { |
| 142 | Generator.reset(p: new CSProfileGenerator(Binary, Profiles)); |
| 143 | } else { |
| 144 | Generator.reset(p: new ProfileGenerator(Binary, std::move(Profiles))); |
| 145 | } |
| 146 | ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator(); |
| 147 | FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator(); |
| 148 | |
| 149 | return Generator; |
| 150 | } |
| 151 | |
| 152 | void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer, |
| 153 | SampleProfileMap &ProfileMap) { |
| 154 | // Populate profile symbol list if extended binary format is used. |
| 155 | ProfileSymbolList SymbolList; |
| 156 | |
| 157 | if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) { |
| 158 | Binary->populateSymbolListFromDWARF(SymbolList); |
| 159 | Writer->setProfileSymbolList(&SymbolList); |
| 160 | } |
| 161 | |
| 162 | if (std::error_code EC = Writer->write(ProfileMap)) |
| 163 | exitWithError(EC: std::move(EC)); |
| 164 | } |
| 165 | |
| 166 | void ProfileGeneratorBase::write() { |
| 167 | auto WriterOrErr = SampleProfileWriter::create(Filename: OutputFilename, Format: OutputFormat); |
| 168 | if (std::error_code EC = WriterOrErr.getError()) |
| 169 | exitWithError(EC, Whence: OutputFilename); |
| 170 | |
| 171 | if (UseMD5) { |
| 172 | if (OutputFormat != SPF_Ext_Binary) |
| 173 | WithColor::warning() << "-use-md5 is ignored. Specify " |
| 174 | "--format=extbinary to enable it\n" ; |
| 175 | else |
| 176 | WriterOrErr.get()->setUseMD5(); |
| 177 | } |
| 178 | |
| 179 | write(Writer: std::move(WriterOrErr.get()), ProfileMap); |
| 180 | } |
| 181 | |
| 182 | void ProfileGeneratorBase::showDensitySuggestion(double Density) { |
| 183 | if (Density == 0.0) |
| 184 | WithColor::warning() << "The output profile is empty or the " |
| 185 | "--profile-density-cutoff-hot option is " |
| 186 | "set too low. Please check your command.\n" ; |
| 187 | else if (Density < ProfileDensityThreshold) |
| 188 | WithColor::warning() |
| 189 | << "Sample PGO is estimated to optimize better with " |
| 190 | << format(Fmt: "%.1f" , Vals: ProfileDensityThreshold / Density) |
| 191 | << "x more samples. Please consider increasing sampling rate or " |
| 192 | "profiling for longer duration to get more samples.\n" ; |
| 193 | |
| 194 | if (ShowDensity) |
| 195 | outs() << "Functions with density >= " << format(Fmt: "%.1f" , Vals: Density) |
| 196 | << " account for " |
| 197 | << format(Fmt: "%.2f" , |
| 198 | Vals: static_cast<double>(ProfileDensityCutOffHot) / 10000) |
| 199 | << "% total sample counts.\n" ; |
| 200 | } |
| 201 | |
| 202 | bool ProfileGeneratorBase::filterAmbiguousProfile(FunctionSamples &FS) { |
| 203 | for (const auto &Prefix : FuncPrefixsToFilter) { |
| 204 | if (FS.getFuncName().starts_with(Prefix)) |
| 205 | return true; |
| 206 | } |
| 207 | |
| 208 | // Filter the function profiles for the inlinees. It's useful for fuzzy |
| 209 | // profile matching which flattens the profile and inlinees' samples are |
| 210 | // merged into top-level function. |
| 211 | for (auto &Callees : |
| 212 | const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) { |
| 213 | auto &CalleesMap = Callees.second; |
| 214 | for (auto I = CalleesMap.begin(); I != CalleesMap.end();) { |
| 215 | auto FS = I++; |
| 216 | if (filterAmbiguousProfile(FS&: FS->second)) |
| 217 | CalleesMap.erase(position: FS); |
| 218 | } |
| 219 | } |
| 220 | return false; |
| 221 | } |
| 222 | |
| 223 | // For built-in local initialization function such as __cxx_global_var_init, |
| 224 | // __tls_init prefix function, there could be multiple versions of the functions |
| 225 | // in the final binary. However, in the profile generation, we call |
| 226 | // getCanonicalFnName to canonicalize the names which strips the suffixes. |
| 227 | // Therefore, samples from different functions queries the same profile and the |
| 228 | // samples are merged. As the functions are essentially different, entries of |
| 229 | // the merged profile are ambiguous. In sample loader, the IR from one version |
| 230 | // would be attributed towards a merged entries, which is inaccurate. Especially |
| 231 | // for fuzzy profile matching, it gets multiple callsites(from different |
| 232 | // function) but used to match one callsite, which misleads the matching and |
| 233 | // causes a lot of false positives report. Hence, we want to filter them out |
| 234 | // from the profile map during the profile generation time. The profiles are all |
| 235 | // cold functions, it won't have perf impact. |
| 236 | void ProfileGeneratorBase::filterAmbiguousProfile(SampleProfileMap &Profiles) { |
| 237 | for (auto I = ProfileMap.begin(); I != ProfileMap.end();) { |
| 238 | auto FS = I++; |
| 239 | if (filterAmbiguousProfile(FS&: FS->second)) |
| 240 | ProfileMap.erase(It: FS); |
| 241 | } |
| 242 | } |
| 243 | |
| 244 | void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges, |
| 245 | const RangeSample &Ranges) { |
| 246 | |
| 247 | /* |
| 248 | Regions may overlap with each other. Using the boundary info, find all |
| 249 | disjoint ranges and their sample count. BoundaryPoint contains the count |
| 250 | multiple samples begin/end at this points. |
| 251 | |
| 252 | |<--100-->| Sample1 |
| 253 | |<------200------>| Sample2 |
| 254 | A B C |
| 255 | |
| 256 | In the example above, |
| 257 | Sample1 begins at A, ends at B, its value is 100. |
| 258 | Sample2 beings at A, ends at C, its value is 200. |
| 259 | For A, BeginCount is the sum of sample begins at A, which is 300 and no |
| 260 | samples ends at A, so EndCount is 0. |
| 261 | Then boundary points A, B, and C with begin/end counts are: |
| 262 | A: (300, 0) |
| 263 | B: (0, 100) |
| 264 | C: (0, 200) |
| 265 | */ |
| 266 | struct BoundaryPoint { |
| 267 | // Sum of sample counts beginning at this point |
| 268 | uint64_t BeginCount = UINT64_MAX; |
| 269 | // Sum of sample counts ending at this point |
| 270 | uint64_t EndCount = UINT64_MAX; |
| 271 | // Is the begin point of a zero range. |
| 272 | bool IsZeroRangeBegin = false; |
| 273 | // Is the end point of a zero range. |
| 274 | bool IsZeroRangeEnd = false; |
| 275 | |
| 276 | void addBeginCount(uint64_t Count) { |
| 277 | if (BeginCount == UINT64_MAX) |
| 278 | BeginCount = 0; |
| 279 | BeginCount += Count; |
| 280 | } |
| 281 | |
| 282 | void addEndCount(uint64_t Count) { |
| 283 | if (EndCount == UINT64_MAX) |
| 284 | EndCount = 0; |
| 285 | EndCount += Count; |
| 286 | } |
| 287 | }; |
| 288 | |
| 289 | /* |
| 290 | For the above example. With boundary points, follwing logic finds two |
| 291 | disjoint region of |
| 292 | |
| 293 | [A,B]: 300 |
| 294 | [B+1,C]: 200 |
| 295 | |
| 296 | If there is a boundary point that both begin and end, the point itself |
| 297 | becomes a separate disjoint region. For example, if we have original |
| 298 | ranges of |
| 299 | |
| 300 | |<--- 100 --->| |
| 301 | |<--- 200 --->| |
| 302 | A B C |
| 303 | |
| 304 | there are three boundary points with their begin/end counts of |
| 305 | |
| 306 | A: (100, 0) |
| 307 | B: (200, 100) |
| 308 | C: (0, 200) |
| 309 | |
| 310 | the disjoint ranges would be |
| 311 | |
| 312 | [A, B-1]: 100 |
| 313 | [B, B]: 300 |
| 314 | [B+1, C]: 200. |
| 315 | |
| 316 | Example for zero value range: |
| 317 | |
| 318 | |<--- 100 --->| |
| 319 | |<--- 200 --->| |
| 320 | |<--------------- 0 ----------------->| |
| 321 | A B C D E F |
| 322 | |
| 323 | [A, B-1] : 0 |
| 324 | [B, C] : 100 |
| 325 | [C+1, D-1]: 0 |
| 326 | [D, E] : 200 |
| 327 | [E+1, F] : 0 |
| 328 | */ |
| 329 | std::map<uint64_t, BoundaryPoint> Boundaries; |
| 330 | |
| 331 | for (const auto &Item : Ranges) { |
| 332 | assert(Item.first.first <= Item.first.second && |
| 333 | "Invalid instruction range" ); |
| 334 | auto &BeginPoint = Boundaries[Item.first.first]; |
| 335 | auto &EndPoint = Boundaries[Item.first.second]; |
| 336 | uint64_t Count = Item.second; |
| 337 | |
| 338 | BeginPoint.addBeginCount(Count); |
| 339 | EndPoint.addEndCount(Count); |
| 340 | if (Count == 0) { |
| 341 | BeginPoint.IsZeroRangeBegin = true; |
| 342 | EndPoint.IsZeroRangeEnd = true; |
| 343 | } |
| 344 | } |
| 345 | |
| 346 | // Use UINT64_MAX to indicate there is no existing range between BeginAddress |
| 347 | // and the next valid address |
| 348 | uint64_t BeginAddress = UINT64_MAX; |
| 349 | int ZeroRangeDepth = 0; |
| 350 | uint64_t Count = 0; |
| 351 | for (const auto &Item : Boundaries) { |
| 352 | uint64_t Address = Item.first; |
| 353 | const BoundaryPoint &Point = Item.second; |
| 354 | if (Point.BeginCount != UINT64_MAX) { |
| 355 | if (BeginAddress != UINT64_MAX) |
| 356 | DisjointRanges[{BeginAddress, Address - 1}] = Count; |
| 357 | Count += Point.BeginCount; |
| 358 | BeginAddress = Address; |
| 359 | ZeroRangeDepth += Point.IsZeroRangeBegin; |
| 360 | } |
| 361 | if (Point.EndCount != UINT64_MAX) { |
| 362 | assert((BeginAddress != UINT64_MAX) && |
| 363 | "First boundary point cannot be 'end' point" ); |
| 364 | DisjointRanges[{BeginAddress, Address}] = Count; |
| 365 | assert(Count >= Point.EndCount && "Mismatched live ranges" ); |
| 366 | Count -= Point.EndCount; |
| 367 | BeginAddress = Address + 1; |
| 368 | ZeroRangeDepth -= Point.IsZeroRangeEnd; |
| 369 | // If the remaining count is zero and it's no longer in a zero range, this |
| 370 | // means we consume all the ranges before, thus mark BeginAddress as |
| 371 | // UINT64_MAX. e.g. supposing we have two non-overlapping ranges: |
| 372 | // [<---- 10 ---->] |
| 373 | // [<---- 20 ---->] |
| 374 | // A B C D |
| 375 | // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't |
| 376 | // have the [B+1, C-1] zero range. |
| 377 | if (Count == 0 && ZeroRangeDepth == 0) |
| 378 | BeginAddress = UINT64_MAX; |
| 379 | } |
| 380 | } |
| 381 | } |
| 382 | |
| 383 | void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( |
| 384 | FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, |
| 385 | uint64_t Count) { |
| 386 | // Use the maximum count of samples with same line location |
| 387 | uint32_t Discriminator = getBaseDiscriminator(Discriminator: LeafLoc.Location.Discriminator); |
| 388 | |
| 389 | // Use duplication factor to compensated for loop unroll/vectorization. |
| 390 | // Note that this is only needed when we're taking MAX of the counts at |
| 391 | // the location instead of SUM. |
| 392 | Count *= getDuplicationFactor(Discriminator: LeafLoc.Location.Discriminator); |
| 393 | |
| 394 | ErrorOr<uint64_t> R = |
| 395 | FunctionProfile.findSamplesAt(LineOffset: LeafLoc.Location.LineOffset, Discriminator); |
| 396 | |
| 397 | uint64_t PreviousCount = R ? R.get() : 0; |
| 398 | if (PreviousCount <= Count) { |
| 399 | FunctionProfile.addBodySamples(LineOffset: LeafLoc.Location.LineOffset, Discriminator, |
| 400 | Num: Count - PreviousCount); |
| 401 | } |
| 402 | } |
| 403 | |
| 404 | void ProfileGeneratorBase::updateTotalSamples() { |
| 405 | for (auto &Item : ProfileMap) { |
| 406 | FunctionSamples &FunctionProfile = Item.second; |
| 407 | FunctionProfile.updateTotalSamples(); |
| 408 | } |
| 409 | } |
| 410 | |
| 411 | void ProfileGeneratorBase::updateCallsiteSamples() { |
| 412 | for (auto &Item : ProfileMap) { |
| 413 | FunctionSamples &FunctionProfile = Item.second; |
| 414 | FunctionProfile.updateCallsiteSamples(); |
| 415 | } |
| 416 | } |
| 417 | |
| 418 | void ProfileGeneratorBase::updateFunctionSamples() { |
| 419 | updateCallsiteSamples(); |
| 420 | |
| 421 | if (UpdateTotalSamples) |
| 422 | updateTotalSamples(); |
| 423 | } |
| 424 | |
| 425 | void ProfileGeneratorBase::collectProfiledFunctions() { |
| 426 | std::unordered_set<const BinaryFunction *> ProfiledFunctions; |
| 427 | if (collectFunctionsFromRawProfile(ProfiledFunctions)) |
| 428 | Binary->setProfiledFunctions(ProfiledFunctions); |
| 429 | else if (collectFunctionsFromLLVMProfile(ProfiledFunctions)) |
| 430 | Binary->setProfiledFunctions(ProfiledFunctions); |
| 431 | else |
| 432 | llvm_unreachable("Unsupported input profile" ); |
| 433 | } |
| 434 | |
| 435 | bool ProfileGeneratorBase::collectFunctionsFromRawProfile( |
| 436 | std::unordered_set<const BinaryFunction *> &ProfiledFunctions) { |
| 437 | if (!SampleCounters) |
| 438 | return false; |
| 439 | // Go through all the stacks, ranges and branches in sample counters, use |
| 440 | // the start of the range to look up the function it belongs and record the |
| 441 | // function. |
| 442 | for (const auto &CI : *SampleCounters) { |
| 443 | if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(Val: CI.first.getPtr())) { |
| 444 | for (auto StackAddr : CtxKey->Context) { |
| 445 | if (FuncRange *FRange = Binary->findFuncRange(Address: StackAddr)) |
| 446 | ProfiledFunctions.insert(x: FRange->Func); |
| 447 | } |
| 448 | } |
| 449 | |
| 450 | for (auto Item : CI.second.RangeCounter) { |
| 451 | uint64_t StartAddress = Item.first.first; |
| 452 | if (FuncRange *FRange = Binary->findFuncRange(Address: StartAddress)) |
| 453 | ProfiledFunctions.insert(x: FRange->Func); |
| 454 | } |
| 455 | |
| 456 | for (auto Item : CI.second.BranchCounter) { |
| 457 | uint64_t SourceAddress = Item.first.first; |
| 458 | uint64_t TargetAddress = Item.first.second; |
| 459 | if (FuncRange *FRange = Binary->findFuncRange(Address: SourceAddress)) |
| 460 | ProfiledFunctions.insert(x: FRange->Func); |
| 461 | if (FuncRange *FRange = Binary->findFuncRange(Address: TargetAddress)) |
| 462 | ProfiledFunctions.insert(x: FRange->Func); |
| 463 | } |
| 464 | } |
| 465 | return true; |
| 466 | } |
| 467 | |
| 468 | bool ProfileGenerator::collectFunctionsFromLLVMProfile( |
| 469 | std::unordered_set<const BinaryFunction *> &ProfiledFunctions) { |
| 470 | for (const auto &FS : ProfileMap) { |
| 471 | if (auto *Func = Binary->getBinaryFunction(FName: FS.second.getFunction())) |
| 472 | ProfiledFunctions.insert(x: Func); |
| 473 | } |
| 474 | return true; |
| 475 | } |
| 476 | |
| 477 | bool CSProfileGenerator::collectFunctionsFromLLVMProfile( |
| 478 | std::unordered_set<const BinaryFunction *> &ProfiledFunctions) { |
| 479 | for (auto *Node : ContextTracker) { |
| 480 | if (!Node->getFuncName().empty()) |
| 481 | if (auto *Func = Binary->getBinaryFunction(FName: Node->getFuncName())) |
| 482 | ProfiledFunctions.insert(x: Func); |
| 483 | } |
| 484 | return true; |
| 485 | } |
| 486 | |
| 487 | FunctionSamples & |
| 488 | ProfileGenerator::getTopLevelFunctionProfile(FunctionId FuncName) { |
| 489 | SampleContext Context(FuncName); |
| 490 | return ProfileMap.create(Ctx: Context); |
| 491 | } |
| 492 | |
| 493 | void ProfileGenerator::generateProfile() { |
| 494 | collectProfiledFunctions(); |
| 495 | |
| 496 | if (Binary->usePseudoProbes()) |
| 497 | Binary->decodePseudoProbe(); |
| 498 | |
| 499 | if (SampleCounters) { |
| 500 | if (Binary->usePseudoProbes()) { |
| 501 | generateProbeBasedProfile(); |
| 502 | } else { |
| 503 | generateLineNumBasedProfile(); |
| 504 | } |
| 505 | } |
| 506 | |
| 507 | postProcessProfiles(); |
| 508 | } |
| 509 | |
| 510 | void ProfileGenerator::postProcessProfiles() { |
| 511 | computeSummaryAndThreshold(ProfileMap); |
| 512 | trimColdProfiles(Profiles: ProfileMap, ColdCntThreshold: ColdCountThreshold); |
| 513 | filterAmbiguousProfile(Profiles&: ProfileMap); |
| 514 | calculateAndShowDensity(Profiles: ProfileMap); |
| 515 | } |
| 516 | |
| 517 | void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles, |
| 518 | uint64_t ColdCntThreshold) { |
| 519 | if (!TrimColdProfile) |
| 520 | return; |
| 521 | |
| 522 | // Move cold profiles into a tmp container. |
| 523 | std::vector<hash_code> ColdProfileHashes; |
| 524 | for (const auto &I : ProfileMap) { |
| 525 | if (I.second.getTotalSamples() < ColdCntThreshold) |
| 526 | ColdProfileHashes.emplace_back(args: I.first); |
| 527 | } |
| 528 | |
| 529 | // Remove the cold profile from ProfileMap. |
| 530 | for (const auto &I : ColdProfileHashes) |
| 531 | ProfileMap.erase(Key: I); |
| 532 | } |
| 533 | |
| 534 | void ProfileGenerator::generateLineNumBasedProfile() { |
| 535 | assert(SampleCounters->size() == 1 && |
| 536 | "Must have one entry for profile generation." ); |
| 537 | const SampleCounter &SC = SampleCounters->begin()->second; |
| 538 | // Fill in function body samples |
| 539 | populateBodySamplesForAllFunctions(RangeCounter: SC.RangeCounter); |
| 540 | // Fill in boundary sample counts as well as call site samples for calls |
| 541 | populateBoundarySamplesForAllFunctions(BranchCounters: SC.BranchCounter); |
| 542 | |
| 543 | updateFunctionSamples(); |
| 544 | } |
| 545 | |
| 546 | void ProfileGenerator::generateProbeBasedProfile() { |
| 547 | assert(SampleCounters->size() == 1 && |
| 548 | "Must have one entry for profile generation." ); |
| 549 | // Enable pseudo probe functionalities in SampleProf |
| 550 | FunctionSamples::ProfileIsProbeBased = true; |
| 551 | const SampleCounter &SC = SampleCounters->begin()->second; |
| 552 | // Fill in function body samples |
| 553 | populateBodySamplesWithProbesForAllFunctions(RangeCounter: SC.RangeCounter); |
| 554 | // Fill in boundary sample counts as well as call site samples for calls |
| 555 | populateBoundarySamplesWithProbesForAllFunctions(BranchCounters: SC.BranchCounter); |
| 556 | |
| 557 | updateFunctionSamples(); |
| 558 | } |
| 559 | |
| 560 | void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions( |
| 561 | const RangeSample &RangeCounter) { |
| 562 | ProbeCounterMap ProbeCounter; |
| 563 | // preprocessRangeCounter returns disjoint ranges, so no longer to redo it |
| 564 | // inside extractProbesFromRange. |
| 565 | extractProbesFromRange(RangeCounter: preprocessRangeCounter(RangeCounter), ProbeCounter, |
| 566 | FindDisjointRanges: false); |
| 567 | |
| 568 | for (const auto &PI : ProbeCounter) { |
| 569 | const MCDecodedPseudoProbe *Probe = PI.first; |
| 570 | uint64_t Count = PI.second; |
| 571 | SampleContextFrameVector FrameVec; |
| 572 | Binary->getInlineContextForProbe(Probe, InlineContextStack&: FrameVec, IncludeLeaf: true); |
| 573 | FunctionSamples &FunctionProfile = |
| 574 | getLeafProfileAndAddTotalSamples(FrameVec, Count); |
| 575 | FunctionProfile.addBodySamples(LineOffset: Probe->getIndex(), Discriminator: Probe->getDiscriminator(), |
| 576 | Num: Count); |
| 577 | if (Probe->isEntry()) |
| 578 | FunctionProfile.addHeadSamples(Num: Count); |
| 579 | } |
| 580 | } |
| 581 | |
| 582 | void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions( |
| 583 | const BranchSample &BranchCounters) { |
| 584 | for (const auto &Entry : BranchCounters) { |
| 585 | uint64_t SourceAddress = Entry.first.first; |
| 586 | uint64_t TargetAddress = Entry.first.second; |
| 587 | uint64_t Count = Entry.second; |
| 588 | assert(Count != 0 && "Unexpected zero weight branch" ); |
| 589 | |
| 590 | StringRef CalleeName = getCalleeNameForAddress(TargetAddress); |
| 591 | if (CalleeName.size() == 0) |
| 592 | continue; |
| 593 | |
| 594 | const MCDecodedPseudoProbe *CallProbe = |
| 595 | Binary->getCallProbeForAddr(Address: SourceAddress); |
| 596 | if (CallProbe == nullptr) |
| 597 | continue; |
| 598 | |
| 599 | // Record called target sample and its count. |
| 600 | SampleContextFrameVector FrameVec; |
| 601 | Binary->getInlineContextForProbe(Probe: CallProbe, InlineContextStack&: FrameVec, IncludeLeaf: true); |
| 602 | |
| 603 | if (!FrameVec.empty()) { |
| 604 | FunctionSamples &FunctionProfile = |
| 605 | getLeafProfileAndAddTotalSamples(FrameVec, Count: 0); |
| 606 | FunctionProfile.addCalledTargetSamples( |
| 607 | LineOffset: FrameVec.back().Location.LineOffset, |
| 608 | Discriminator: FrameVec.back().Location.Discriminator, |
| 609 | Func: FunctionId(CalleeName), Num: Count); |
| 610 | } |
| 611 | } |
| 612 | } |
| 613 | |
| 614 | FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples( |
| 615 | const SampleContextFrameVector &FrameVec, uint64_t Count) { |
| 616 | // Get top level profile |
| 617 | FunctionSamples *FunctionProfile = |
| 618 | &getTopLevelFunctionProfile(FuncName: FrameVec[0].Func); |
| 619 | FunctionProfile->addTotalSamples(Num: Count); |
| 620 | if (Binary->usePseudoProbes()) { |
| 621 | const auto *FuncDesc = Binary->getFuncDescForGUID( |
| 622 | GUID: FunctionProfile->getFunction().getHashCode()); |
| 623 | FunctionProfile->setFunctionHash(FuncDesc->FuncHash); |
| 624 | } |
| 625 | |
| 626 | for (size_t I = 1; I < FrameVec.size(); I++) { |
| 627 | LineLocation Callsite( |
| 628 | FrameVec[I - 1].Location.LineOffset, |
| 629 | getBaseDiscriminator(Discriminator: FrameVec[I - 1].Location.Discriminator)); |
| 630 | FunctionSamplesMap &SamplesMap = |
| 631 | FunctionProfile->functionSamplesAt(Loc: Callsite); |
| 632 | auto Ret = |
| 633 | SamplesMap.emplace(args: FrameVec[I].Func, args: FunctionSamples()); |
| 634 | if (Ret.second) { |
| 635 | SampleContext Context(FrameVec[I].Func); |
| 636 | Ret.first->second.setContext(Context); |
| 637 | } |
| 638 | FunctionProfile = &Ret.first->second; |
| 639 | FunctionProfile->addTotalSamples(Num: Count); |
| 640 | if (Binary->usePseudoProbes()) { |
| 641 | const auto *FuncDesc = Binary->getFuncDescForGUID( |
| 642 | GUID: FunctionProfile->getFunction().getHashCode()); |
| 643 | FunctionProfile->setFunctionHash(FuncDesc->FuncHash); |
| 644 | } |
| 645 | } |
| 646 | |
| 647 | return *FunctionProfile; |
| 648 | } |
| 649 | |
| 650 | RangeSample |
| 651 | ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) { |
| 652 | RangeSample Ranges(RangeCounter.begin(), RangeCounter.end()); |
| 653 | if (FillZeroForAllFuncs) { |
| 654 | for (auto &FuncI : Binary->getAllBinaryFunctions()) { |
| 655 | for (auto &R : FuncI.second.Ranges) { |
| 656 | Ranges[{R.first, R.second - 1}] += 0; |
| 657 | } |
| 658 | } |
| 659 | } else { |
| 660 | // For each range, we search for all ranges of the function it belongs to |
| 661 | // and initialize it with zero count, so it remains zero if doesn't hit any |
| 662 | // samples. This is to be consistent with compiler that interpret zero count |
| 663 | // as unexecuted(cold). |
| 664 | for (const auto &I : RangeCounter) { |
| 665 | uint64_t StartAddress = I.first.first; |
| 666 | for (const auto &Range : Binary->getRanges(Address: StartAddress)) |
| 667 | Ranges[{Range.first, Range.second - 1}] += 0; |
| 668 | } |
| 669 | } |
| 670 | RangeSample DisjointRanges; |
| 671 | findDisjointRanges(DisjointRanges, Ranges); |
| 672 | return DisjointRanges; |
| 673 | } |
| 674 | |
| 675 | void ProfileGenerator::populateBodySamplesForAllFunctions( |
| 676 | const RangeSample &RangeCounter) { |
| 677 | for (const auto &Range : preprocessRangeCounter(RangeCounter)) { |
| 678 | uint64_t RangeBegin = Range.first.first; |
| 679 | uint64_t RangeEnd = Range.first.second; |
| 680 | uint64_t Count = Range.second; |
| 681 | |
| 682 | InstructionPointer IP(Binary, RangeBegin, true); |
| 683 | // Disjoint ranges may have range in the middle of two instr, |
| 684 | // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range |
| 685 | // can be Addr1+1 to Addr2-1. We should ignore such range. |
| 686 | if (IP.Address > RangeEnd) |
| 687 | continue; |
| 688 | |
| 689 | do { |
| 690 | const SampleContextFrameVector FrameVec = |
| 691 | Binary->getFrameLocationStack(Address: IP.Address); |
| 692 | if (!FrameVec.empty()) { |
| 693 | // FIXME: As accumulating total count per instruction caused some |
| 694 | // regression, we changed to accumulate total count per byte as a |
| 695 | // workaround. Tuning hotness threshold on the compiler side might be |
| 696 | // necessary in the future. |
| 697 | FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples( |
| 698 | FrameVec, Count: Count * Binary->getInstSize(Address: IP.Address)); |
| 699 | updateBodySamplesforFunctionProfile(FunctionProfile, LeafLoc: FrameVec.back(), |
| 700 | Count); |
| 701 | } |
| 702 | } while (IP.advance() && IP.Address <= RangeEnd); |
| 703 | } |
| 704 | } |
| 705 | |
| 706 | StringRef |
| 707 | ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) { |
| 708 | // Get the function range by branch target if it's a call branch. |
| 709 | auto *FRange = Binary->findFuncRangeForStartAddr(Address: TargetAddress); |
| 710 | |
| 711 | // We won't accumulate sample count for a range whose start is not the real |
| 712 | // function entry such as outlined function or inner labels. |
| 713 | if (!FRange || !FRange->IsFuncEntry) |
| 714 | return StringRef(); |
| 715 | |
| 716 | return FunctionSamples::getCanonicalFnName(FnName: FRange->getFuncName()); |
| 717 | } |
| 718 | |
| 719 | void ProfileGenerator::populateBoundarySamplesForAllFunctions( |
| 720 | const BranchSample &BranchCounters) { |
| 721 | for (const auto &Entry : BranchCounters) { |
| 722 | uint64_t SourceAddress = Entry.first.first; |
| 723 | uint64_t TargetAddress = Entry.first.second; |
| 724 | uint64_t Count = Entry.second; |
| 725 | assert(Count != 0 && "Unexpected zero weight branch" ); |
| 726 | |
| 727 | StringRef CalleeName = getCalleeNameForAddress(TargetAddress); |
| 728 | if (CalleeName.size() == 0) |
| 729 | continue; |
| 730 | // Record called target sample and its count. |
| 731 | const SampleContextFrameVector &FrameVec = |
| 732 | Binary->getCachedFrameLocationStack(Address: SourceAddress); |
| 733 | if (!FrameVec.empty()) { |
| 734 | FunctionSamples &FunctionProfile = |
| 735 | getLeafProfileAndAddTotalSamples(FrameVec, Count: 0); |
| 736 | FunctionProfile.addCalledTargetSamples( |
| 737 | LineOffset: FrameVec.back().Location.LineOffset, |
| 738 | Discriminator: getBaseDiscriminator(Discriminator: FrameVec.back().Location.Discriminator), |
| 739 | Func: FunctionId(CalleeName), Num: Count); |
| 740 | } |
| 741 | // Add head samples for callee. |
| 742 | FunctionSamples &CalleeProfile = |
| 743 | getTopLevelFunctionProfile(FuncName: FunctionId(CalleeName)); |
| 744 | CalleeProfile.addHeadSamples(Num: Count); |
| 745 | } |
| 746 | } |
| 747 | |
| 748 | void ProfileGeneratorBase::calculateBodySamplesAndSize( |
| 749 | const FunctionSamples &FSamples, uint64_t &TotalBodySamples, |
| 750 | uint64_t &FuncBodySize) { |
| 751 | // Note that ideally the size should be the number of function instruction. |
| 752 | // However, for probe-based profile, we don't have the accurate instruction |
| 753 | // count for each probe, instead, the probe sample is the samples count for |
| 754 | // the block, which is equivelant to |
| 755 | // total_instruction_samples/num_of_instruction in one block. Hence, we use |
| 756 | // the number of probe as a proxy for the function's size. |
| 757 | FuncBodySize += FSamples.getBodySamples().size(); |
| 758 | |
| 759 | // The accumulated body samples re-calculated here could be different from the |
| 760 | // TotalSamples(getTotalSamples) field of FunctionSamples for line-number |
| 761 | // based profile. The reason is that TotalSamples is the sum of all the |
| 762 | // samples of the machine instruction in one source-code line, however, the |
| 763 | // entry of Bodysamples is the only max number of them, so the TotalSamples is |
| 764 | // usually much bigger than the accumulated body samples as one souce-code |
| 765 | // line can emit many machine instructions. We observed a regression when we |
| 766 | // switched to use the accumulated body samples(by using |
| 767 | // -update-total-samples). Hence, it's safer to re-calculate here to avoid |
| 768 | // such discrepancy. There is no problem for probe-based profile, as the |
| 769 | // TotalSamples is exactly the same as the accumulated body samples. |
| 770 | for (const auto &I : FSamples.getBodySamples()) |
| 771 | TotalBodySamples += I.second.getSamples(); |
| 772 | |
| 773 | for (const auto &CallsiteSamples : FSamples.getCallsiteSamples()) |
| 774 | for (const auto &Callee : CallsiteSamples.second) { |
| 775 | // For binary-level density, the inlinees' samples and size should be |
| 776 | // included in the calculation. |
| 777 | calculateBodySamplesAndSize(FSamples: Callee.second, TotalBodySamples, |
| 778 | FuncBodySize); |
| 779 | } |
| 780 | } |
| 781 | |
| 782 | // Calculate Profile-density: |
| 783 | // Calculate the density for each function and sort them in descending order, |
| 784 | // keep accumulating their total samples unitl it exceeds the |
| 785 | // percentage_threshold(cut-off) of total profile samples, the profile-density |
| 786 | // is the last(minimum) function-density of the processed functions, which means |
| 787 | // all the functions hot to perf are on good density if the profile-density is |
| 788 | // good. The percentage_threshold(--profile-density-cutoff-hot) is configurable |
| 789 | // depending on how much regression the system want to tolerate. |
| 790 | double |
| 791 | ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles) { |
| 792 | double ProfileDensity = 0.0; |
| 793 | |
| 794 | uint64_t TotalProfileSamples = 0; |
| 795 | // A list of the function profile density and its total samples. |
| 796 | std::vector<std::pair<double, uint64_t>> FuncDensityList; |
| 797 | for (const auto &I : Profiles) { |
| 798 | uint64_t TotalBodySamples = 0; |
| 799 | uint64_t FuncBodySize = 0; |
| 800 | calculateBodySamplesAndSize(FSamples: I.second, TotalBodySamples, FuncBodySize); |
| 801 | |
| 802 | if (FuncBodySize == 0) |
| 803 | continue; |
| 804 | |
| 805 | double FuncDensity = static_cast<double>(TotalBodySamples) / FuncBodySize; |
| 806 | TotalProfileSamples += TotalBodySamples; |
| 807 | FuncDensityList.emplace_back(args&: FuncDensity, args&: TotalBodySamples); |
| 808 | } |
| 809 | |
| 810 | // Sorted by the density in descending order. |
| 811 | llvm::stable_sort(Range&: FuncDensityList, C: [&](const std::pair<double, uint64_t> &A, |
| 812 | const std::pair<double, uint64_t> &B) { |
| 813 | if (A.first != B.first) |
| 814 | return A.first > B.first; |
| 815 | return A.second < B.second; |
| 816 | }); |
| 817 | |
| 818 | uint64_t AccumulatedSamples = 0; |
| 819 | uint32_t I = 0; |
| 820 | assert(ProfileDensityCutOffHot <= 1000000 && |
| 821 | "The cutoff value is greater than 1000000(100%)" ); |
| 822 | while (AccumulatedSamples < TotalProfileSamples * |
| 823 | static_cast<float>(ProfileDensityCutOffHot) / |
| 824 | 1000000 && |
| 825 | I < FuncDensityList.size()) { |
| 826 | AccumulatedSamples += FuncDensityList[I].second; |
| 827 | ProfileDensity = FuncDensityList[I].first; |
| 828 | I++; |
| 829 | } |
| 830 | |
| 831 | return ProfileDensity; |
| 832 | } |
| 833 | |
| 834 | void ProfileGeneratorBase::calculateAndShowDensity( |
| 835 | const SampleProfileMap &Profiles) { |
| 836 | double Density = calculateDensity(Profiles); |
| 837 | showDensitySuggestion(Density); |
| 838 | } |
| 839 | |
| 840 | FunctionSamples * |
| 841 | CSProfileGenerator::getOrCreateFunctionSamples(ContextTrieNode *ContextNode, |
| 842 | bool WasLeafInlined) { |
| 843 | FunctionSamples *FProfile = ContextNode->getFunctionSamples(); |
| 844 | if (!FProfile) { |
| 845 | FSamplesList.emplace_back(); |
| 846 | FProfile = &FSamplesList.back(); |
| 847 | FProfile->setFunction(ContextNode->getFuncName()); |
| 848 | ContextNode->setFunctionSamples(FProfile); |
| 849 | } |
| 850 | // Update ContextWasInlined attribute for existing contexts. |
| 851 | // The current function can be called in two ways: |
| 852 | // - when processing a probe of the current frame |
| 853 | // - when processing the entry probe of an inlinee's frame, which |
| 854 | // is then used to update the callsite count of the current frame. |
| 855 | // The two can happen in any order, hence here we are making sure |
| 856 | // `ContextWasInlined` is always set as expected. |
| 857 | // TODO: Note that the former does not always happen if no probes of the |
| 858 | // current frame has samples, and if the latter happens, we could lose the |
| 859 | // attribute. This should be fixed. |
| 860 | if (WasLeafInlined) |
| 861 | FProfile->getContext().setAttribute(ContextWasInlined); |
| 862 | return FProfile; |
| 863 | } |
| 864 | |
| 865 | ContextTrieNode * |
| 866 | CSProfileGenerator::getOrCreateContextNode(const SampleContextFrames Context, |
| 867 | bool WasLeafInlined) { |
| 868 | ContextTrieNode *ContextNode = |
| 869 | ContextTracker.getOrCreateContextPath(Context, AllowCreate: true); |
| 870 | getOrCreateFunctionSamples(ContextNode, WasLeafInlined); |
| 871 | return ContextNode; |
| 872 | } |
| 873 | |
| 874 | void CSProfileGenerator::generateProfile() { |
| 875 | FunctionSamples::ProfileIsCS = true; |
| 876 | |
| 877 | collectProfiledFunctions(); |
| 878 | |
| 879 | if (Binary->usePseudoProbes()) { |
| 880 | Binary->decodePseudoProbe(); |
| 881 | if (InferMissingFrames) |
| 882 | initializeMissingFrameInferrer(); |
| 883 | } |
| 884 | |
| 885 | if (SampleCounters) { |
| 886 | if (Binary->usePseudoProbes()) { |
| 887 | generateProbeBasedProfile(); |
| 888 | } else { |
| 889 | generateLineNumBasedProfile(); |
| 890 | } |
| 891 | } |
| 892 | |
| 893 | if (Binary->getTrackFuncContextSize()) |
| 894 | computeSizeForProfiledFunctions(); |
| 895 | |
| 896 | postProcessProfiles(); |
| 897 | } |
| 898 | |
| 899 | void CSProfileGenerator::initializeMissingFrameInferrer() { |
| 900 | Binary->getMissingContextInferrer()->initialize(SampleCounters); |
| 901 | } |
| 902 | |
| 903 | void CSProfileGenerator::inferMissingFrames( |
| 904 | const SmallVectorImpl<uint64_t> &Context, |
| 905 | SmallVectorImpl<uint64_t> &NewContext) { |
| 906 | Binary->inferMissingFrames(Context, NewContext); |
| 907 | } |
| 908 | |
| 909 | void CSProfileGenerator::computeSizeForProfiledFunctions() { |
| 910 | for (auto *Func : Binary->getProfiledFunctions()) |
| 911 | Binary->computeInlinedContextSizeForFunc(Func); |
| 912 | |
| 913 | // Flush the symbolizer to save memory. |
| 914 | Binary->flushSymbolizer(); |
| 915 | } |
| 916 | |
| 917 | void CSProfileGenerator::updateFunctionSamples() { |
| 918 | for (auto *Node : ContextTracker) { |
| 919 | FunctionSamples *FSamples = Node->getFunctionSamples(); |
| 920 | if (FSamples) { |
| 921 | if (UpdateTotalSamples) |
| 922 | FSamples->updateTotalSamples(); |
| 923 | FSamples->updateCallsiteSamples(); |
| 924 | } |
| 925 | } |
| 926 | } |
| 927 | |
| 928 | void CSProfileGenerator::generateLineNumBasedProfile() { |
| 929 | for (const auto &CI : *SampleCounters) { |
| 930 | const auto *CtxKey = cast<StringBasedCtxKey>(Val: CI.first.getPtr()); |
| 931 | |
| 932 | ContextTrieNode *ContextNode = &getRootContext(); |
| 933 | // Sample context will be empty if the jump is an external-to-internal call |
| 934 | // pattern, the head samples should be added for the internal function. |
| 935 | if (!CtxKey->Context.empty()) { |
| 936 | // Get or create function profile for the range |
| 937 | ContextNode = |
| 938 | getOrCreateContextNode(Context: CtxKey->Context, WasLeafInlined: CtxKey->WasLeafInlined); |
| 939 | // Fill in function body samples |
| 940 | populateBodySamplesForFunction(FunctionProfile&: *ContextNode->getFunctionSamples(), |
| 941 | RangeCounters: CI.second.RangeCounter); |
| 942 | } |
| 943 | // Fill in boundary sample counts as well as call site samples for calls |
| 944 | populateBoundarySamplesForFunction(CallerNode: ContextNode, BranchCounters: CI.second.BranchCounter); |
| 945 | } |
| 946 | // Fill in call site value sample for inlined calls and also use context to |
| 947 | // infer missing samples. Since we don't have call count for inlined |
| 948 | // functions, we estimate it from inlinee's profile using the entry of the |
| 949 | // body sample. |
| 950 | populateInferredFunctionSamples(Node&: getRootContext()); |
| 951 | |
| 952 | updateFunctionSamples(); |
| 953 | } |
| 954 | |
| 955 | void CSProfileGenerator::populateBodySamplesForFunction( |
| 956 | FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) { |
| 957 | // Compute disjoint ranges first, so we can use MAX |
| 958 | // for calculating count for each location. |
| 959 | RangeSample Ranges; |
| 960 | findDisjointRanges(DisjointRanges&: Ranges, Ranges: RangeCounter); |
| 961 | for (const auto &Range : Ranges) { |
| 962 | uint64_t RangeBegin = Range.first.first; |
| 963 | uint64_t RangeEnd = Range.first.second; |
| 964 | uint64_t Count = Range.second; |
| 965 | // Disjoint ranges have introduce zero-filled gap that |
| 966 | // doesn't belong to current context, filter them out. |
| 967 | if (Count == 0) |
| 968 | continue; |
| 969 | |
| 970 | InstructionPointer IP(Binary, RangeBegin, true); |
| 971 | // Disjoint ranges may have range in the middle of two instr, |
| 972 | // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range |
| 973 | // can be Addr1+1 to Addr2-1. We should ignore such range. |
| 974 | if (IP.Address > RangeEnd) |
| 975 | continue; |
| 976 | |
| 977 | do { |
| 978 | auto LeafLoc = Binary->getInlineLeafFrameLoc(Address: IP.Address); |
| 979 | if (LeafLoc) { |
| 980 | // Recording body sample for this specific context |
| 981 | updateBodySamplesforFunctionProfile(FunctionProfile, LeafLoc: *LeafLoc, Count); |
| 982 | FunctionProfile.addTotalSamples(Num: Count); |
| 983 | } |
| 984 | } while (IP.advance() && IP.Address <= RangeEnd); |
| 985 | } |
| 986 | } |
| 987 | |
| 988 | void CSProfileGenerator::populateBoundarySamplesForFunction( |
| 989 | ContextTrieNode *Node, const BranchSample &BranchCounters) { |
| 990 | |
| 991 | for (const auto &Entry : BranchCounters) { |
| 992 | uint64_t SourceAddress = Entry.first.first; |
| 993 | uint64_t TargetAddress = Entry.first.second; |
| 994 | uint64_t Count = Entry.second; |
| 995 | assert(Count != 0 && "Unexpected zero weight branch" ); |
| 996 | |
| 997 | StringRef CalleeName = getCalleeNameForAddress(TargetAddress); |
| 998 | if (CalleeName.size() == 0) |
| 999 | continue; |
| 1000 | |
| 1001 | ContextTrieNode *CallerNode = Node; |
| 1002 | LineLocation CalleeCallSite(0, 0); |
| 1003 | if (CallerNode != &getRootContext()) { |
| 1004 | // Record called target sample and its count |
| 1005 | auto LeafLoc = Binary->getInlineLeafFrameLoc(Address: SourceAddress); |
| 1006 | if (LeafLoc) { |
| 1007 | CallerNode->getFunctionSamples()->addCalledTargetSamples( |
| 1008 | LineOffset: LeafLoc->Location.LineOffset, |
| 1009 | Discriminator: getBaseDiscriminator(Discriminator: LeafLoc->Location.Discriminator), |
| 1010 | Func: FunctionId(CalleeName), |
| 1011 | Num: Count); |
| 1012 | // Record head sample for called target(callee) |
| 1013 | CalleeCallSite = LeafLoc->Location; |
| 1014 | } |
| 1015 | } |
| 1016 | |
| 1017 | ContextTrieNode *CalleeNode = |
| 1018 | CallerNode->getOrCreateChildContext(CallSite: CalleeCallSite, |
| 1019 | ChildName: FunctionId(CalleeName)); |
| 1020 | FunctionSamples *CalleeProfile = getOrCreateFunctionSamples(ContextNode: CalleeNode); |
| 1021 | CalleeProfile->addHeadSamples(Num: Count); |
| 1022 | } |
| 1023 | } |
| 1024 | |
| 1025 | void CSProfileGenerator::populateInferredFunctionSamples( |
| 1026 | ContextTrieNode &Node) { |
| 1027 | // There is no call jmp sample between the inliner and inlinee, we need to use |
| 1028 | // the inlinee's context to infer inliner's context, i.e. parent(inliner)'s |
| 1029 | // sample depends on child(inlinee)'s sample, so traverse the tree in |
| 1030 | // post-order. |
| 1031 | for (auto &It : Node.getAllChildContext()) |
| 1032 | populateInferredFunctionSamples(Node&: It.second); |
| 1033 | |
| 1034 | FunctionSamples *CalleeProfile = Node.getFunctionSamples(); |
| 1035 | if (!CalleeProfile) |
| 1036 | return; |
| 1037 | // If we already have head sample counts, we must have value profile |
| 1038 | // for call sites added already. Skip to avoid double counting. |
| 1039 | if (CalleeProfile->getHeadSamples()) |
| 1040 | return; |
| 1041 | ContextTrieNode *CallerNode = Node.getParentContext(); |
| 1042 | // If we don't have context, nothing to do for caller's call site. |
| 1043 | // This could happen for entry point function. |
| 1044 | if (CallerNode == &getRootContext()) |
| 1045 | return; |
| 1046 | |
| 1047 | LineLocation CallerLeafFrameLoc = Node.getCallSiteLoc(); |
| 1048 | FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(ContextNode: CallerNode); |
| 1049 | // Since we don't have call count for inlined functions, we |
| 1050 | // estimate it from inlinee's profile using entry body sample. |
| 1051 | uint64_t EstimatedCallCount = CalleeProfile->getHeadSamplesEstimate(); |
| 1052 | // If we don't have samples with location, use 1 to indicate live. |
| 1053 | if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size()) |
| 1054 | EstimatedCallCount = 1; |
| 1055 | CallerProfile.addCalledTargetSamples(LineOffset: CallerLeafFrameLoc.LineOffset, |
| 1056 | Discriminator: CallerLeafFrameLoc.Discriminator, |
| 1057 | Func: Node.getFuncName(), Num: EstimatedCallCount); |
| 1058 | CallerProfile.addBodySamples(LineOffset: CallerLeafFrameLoc.LineOffset, |
| 1059 | Discriminator: CallerLeafFrameLoc.Discriminator, |
| 1060 | Num: EstimatedCallCount); |
| 1061 | CallerProfile.addTotalSamples(Num: EstimatedCallCount); |
| 1062 | } |
| 1063 | |
| 1064 | void CSProfileGenerator::convertToProfileMap( |
| 1065 | ContextTrieNode &Node, SampleContextFrameVector &Context) { |
| 1066 | FunctionSamples *FProfile = Node.getFunctionSamples(); |
| 1067 | if (FProfile) { |
| 1068 | Context.emplace_back(Args: Node.getFuncName(), Args: LineLocation(0, 0)); |
| 1069 | // Save the new context for future references. |
| 1070 | SampleContextFrames NewContext = *Contexts.insert(x: Context).first; |
| 1071 | auto Ret = ProfileMap.emplace(Args&: NewContext, Args: std::move(*FProfile)); |
| 1072 | FunctionSamples &NewProfile = Ret.first->second; |
| 1073 | NewProfile.getContext().setContext(Context: NewContext); |
| 1074 | Context.pop_back(); |
| 1075 | } |
| 1076 | |
| 1077 | for (auto &It : Node.getAllChildContext()) { |
| 1078 | ContextTrieNode &ChildNode = It.second; |
| 1079 | Context.emplace_back(Args: Node.getFuncName(), Args: ChildNode.getCallSiteLoc()); |
| 1080 | convertToProfileMap(Node&: ChildNode, Context); |
| 1081 | Context.pop_back(); |
| 1082 | } |
| 1083 | } |
| 1084 | |
| 1085 | void CSProfileGenerator::convertToProfileMap() { |
| 1086 | assert(ProfileMap.empty() && |
| 1087 | "ProfileMap should be empty before converting from the trie" ); |
| 1088 | assert(IsProfileValidOnTrie && |
| 1089 | "Do not convert the trie twice, it's already destroyed" ); |
| 1090 | |
| 1091 | SampleContextFrameVector Context; |
| 1092 | for (auto &It : getRootContext().getAllChildContext()) |
| 1093 | convertToProfileMap(Node&: It.second, Context); |
| 1094 | |
| 1095 | IsProfileValidOnTrie = false; |
| 1096 | } |
| 1097 | |
| 1098 | void CSProfileGenerator::postProcessProfiles() { |
| 1099 | // Compute hot/cold threshold based on profile. This will be used for cold |
| 1100 | // context profile merging/trimming. |
| 1101 | computeSummaryAndThreshold(); |
| 1102 | |
| 1103 | // Run global pre-inliner to adjust/merge context profile based on estimated |
| 1104 | // inline decisions. |
| 1105 | if (EnableCSPreInliner) { |
| 1106 | ContextTracker.populateFuncToCtxtMap(); |
| 1107 | CSPreInliner(ContextTracker, *Binary, Summary.get()).run(); |
| 1108 | // Turn off the profile merger by default unless it is explicitly enabled. |
| 1109 | if (!CSProfMergeColdContext.getNumOccurrences()) |
| 1110 | CSProfMergeColdContext = false; |
| 1111 | } |
| 1112 | |
| 1113 | convertToProfileMap(); |
| 1114 | |
| 1115 | // Trim and merge cold context profile using cold threshold above. |
| 1116 | if (TrimColdProfile || CSProfMergeColdContext) { |
| 1117 | SampleContextTrimmer(ProfileMap) |
| 1118 | .trimAndMergeColdContextProfiles( |
| 1119 | ColdCountThreshold: HotCountThreshold, TrimColdContext: TrimColdProfile, MergeColdContext: CSProfMergeColdContext, |
| 1120 | ColdContextFrameLength: CSProfMaxColdContextDepth, TrimBaseProfileOnly: EnableCSPreInliner); |
| 1121 | } |
| 1122 | |
| 1123 | if (GenCSNestedProfile) { |
| 1124 | ProfileConverter CSConverter(ProfileMap); |
| 1125 | CSConverter.convertCSProfiles(); |
| 1126 | FunctionSamples::ProfileIsCS = false; |
| 1127 | } |
| 1128 | filterAmbiguousProfile(Profiles&: ProfileMap); |
| 1129 | ProfileGeneratorBase::calculateAndShowDensity(Profiles: ProfileMap); |
| 1130 | } |
| 1131 | |
| 1132 | void ProfileGeneratorBase::computeSummaryAndThreshold( |
| 1133 | SampleProfileMap &Profiles) { |
| 1134 | SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); |
| 1135 | Summary = Builder.computeSummaryForProfiles(Profiles); |
| 1136 | HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( |
| 1137 | DS: (Summary->getDetailedSummary())); |
| 1138 | ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( |
| 1139 | DS: (Summary->getDetailedSummary())); |
| 1140 | } |
| 1141 | |
| 1142 | void CSProfileGenerator::computeSummaryAndThreshold() { |
| 1143 | // Always merge and use context-less profile map to compute summary. |
| 1144 | SampleProfileMap ContextLessProfiles; |
| 1145 | ContextTracker.createContextLessProfileMap(ContextLessProfiles); |
| 1146 | |
| 1147 | // Set the flag below to avoid merging the profile again in |
| 1148 | // computeSummaryAndThreshold |
| 1149 | FunctionSamples::ProfileIsCS = false; |
| 1150 | assert( |
| 1151 | (!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) && |
| 1152 | "Don't set --profile-summary-contextless to false for profile " |
| 1153 | "generation" ); |
| 1154 | ProfileGeneratorBase::computeSummaryAndThreshold(Profiles&: ContextLessProfiles); |
| 1155 | // Recover the old value. |
| 1156 | FunctionSamples::ProfileIsCS = true; |
| 1157 | } |
| 1158 | |
| 1159 | void ProfileGeneratorBase::( |
| 1160 | const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter, |
| 1161 | bool FindDisjointRanges) { |
| 1162 | const RangeSample *PRanges = &RangeCounter; |
| 1163 | RangeSample Ranges; |
| 1164 | if (FindDisjointRanges) { |
| 1165 | findDisjointRanges(DisjointRanges&: Ranges, Ranges: RangeCounter); |
| 1166 | PRanges = &Ranges; |
| 1167 | } |
| 1168 | |
| 1169 | for (const auto &Range : *PRanges) { |
| 1170 | uint64_t RangeBegin = Range.first.first; |
| 1171 | uint64_t RangeEnd = Range.first.second; |
| 1172 | uint64_t Count = Range.second; |
| 1173 | |
| 1174 | InstructionPointer IP(Binary, RangeBegin, true); |
| 1175 | // Disjoint ranges may have range in the middle of two instr, |
| 1176 | // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range |
| 1177 | // can be Addr1+1 to Addr2-1. We should ignore such range. |
| 1178 | if (IP.Address > RangeEnd) |
| 1179 | continue; |
| 1180 | |
| 1181 | do { |
| 1182 | const AddressProbesMap &Address2ProbesMap = |
| 1183 | Binary->getAddress2ProbesMap(); |
| 1184 | for (const MCDecodedPseudoProbe &Probe : |
| 1185 | Address2ProbesMap.find(Address: IP.Address)) { |
| 1186 | ProbeCounter[&Probe] += Count; |
| 1187 | } |
| 1188 | } while (IP.advance() && IP.Address <= RangeEnd); |
| 1189 | } |
| 1190 | } |
| 1191 | |
| 1192 | static void (SampleContextFrameVector &ContextStack, |
| 1193 | const SmallVectorImpl<uint64_t> &AddrVec, |
| 1194 | ProfiledBinary *Binary) { |
| 1195 | SmallVector<const MCDecodedPseudoProbe *, 16> Probes; |
| 1196 | for (auto Address : reverse(C: AddrVec)) { |
| 1197 | const MCDecodedPseudoProbe *CallProbe = |
| 1198 | Binary->getCallProbeForAddr(Address); |
| 1199 | // These could be the cases when a probe is not found at a calliste. Cutting |
| 1200 | // off the context from here since the inliner will not know how to consume |
| 1201 | // a context with unknown callsites. |
| 1202 | // 1. for functions that are not sampled when |
| 1203 | // --decode-probe-for-profiled-functions-only is on. |
| 1204 | // 2. for a merged callsite. Callsite merging may cause the loss of original |
| 1205 | // probe IDs. |
| 1206 | // 3. for an external callsite. |
| 1207 | if (!CallProbe) |
| 1208 | break; |
| 1209 | Probes.push_back(Elt: CallProbe); |
| 1210 | } |
| 1211 | |
| 1212 | std::reverse(first: Probes.begin(), last: Probes.end()); |
| 1213 | |
| 1214 | // Extract context stack for reusing, leaf context stack will be added |
| 1215 | // compressed while looking up function profile. |
| 1216 | for (const auto *P : Probes) { |
| 1217 | Binary->getInlineContextForProbe(Probe: P, InlineContextStack&: ContextStack, IncludeLeaf: true); |
| 1218 | } |
| 1219 | } |
| 1220 | |
| 1221 | void CSProfileGenerator::generateProbeBasedProfile() { |
| 1222 | // Enable pseudo probe functionalities in SampleProf |
| 1223 | FunctionSamples::ProfileIsProbeBased = true; |
| 1224 | for (const auto &CI : *SampleCounters) { |
| 1225 | const AddrBasedCtxKey *CtxKey = |
| 1226 | dyn_cast<AddrBasedCtxKey>(Val: CI.first.getPtr()); |
| 1227 | // Fill in function body samples from probes, also infer caller's samples |
| 1228 | // from callee's probe |
| 1229 | populateBodySamplesWithProbes(RangeCounter: CI.second.RangeCounter, CtxKey); |
| 1230 | // Fill in boundary samples for a call probe |
| 1231 | populateBoundarySamplesWithProbes(BranchCounter: CI.second.BranchCounter, CtxKey); |
| 1232 | } |
| 1233 | } |
| 1234 | |
| 1235 | void CSProfileGenerator::populateBodySamplesWithProbes( |
| 1236 | const RangeSample &RangeCounter, const AddrBasedCtxKey *CtxKey) { |
| 1237 | ProbeCounterMap ProbeCounter; |
| 1238 | // Extract the top frame probes by looking up each address among the range in |
| 1239 | // the Address2ProbeMap |
| 1240 | extractProbesFromRange(RangeCounter, ProbeCounter); |
| 1241 | std::unordered_map<MCDecodedPseudoProbeInlineTree *, |
| 1242 | std::unordered_set<FunctionSamples *>> |
| 1243 | FrameSamples; |
| 1244 | for (const auto &PI : ProbeCounter) { |
| 1245 | const MCDecodedPseudoProbe *Probe = PI.first; |
| 1246 | uint64_t Count = PI.second; |
| 1247 | // Disjoint ranges have introduce zero-filled gap that |
| 1248 | // doesn't belong to current context, filter them out. |
| 1249 | if (!Probe->isBlock() || Count == 0) |
| 1250 | continue; |
| 1251 | |
| 1252 | ContextTrieNode *ContextNode = getContextNodeForLeafProbe(CtxKey, LeafProbe: Probe); |
| 1253 | FunctionSamples &FunctionProfile = *ContextNode->getFunctionSamples(); |
| 1254 | // Record the current frame and FunctionProfile whenever samples are |
| 1255 | // collected for non-danglie probes. This is for reporting all of the |
| 1256 | // zero count probes of the frame later. |
| 1257 | FrameSamples[Probe->getInlineTreeNode()].insert(x: &FunctionProfile); |
| 1258 | FunctionProfile.addBodySamples(LineOffset: Probe->getIndex(), Discriminator: Probe->getDiscriminator(), |
| 1259 | Num: Count); |
| 1260 | FunctionProfile.addTotalSamples(Num: Count); |
| 1261 | if (Probe->isEntry()) { |
| 1262 | FunctionProfile.addHeadSamples(Num: Count); |
| 1263 | // Look up for the caller's function profile |
| 1264 | const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe); |
| 1265 | ContextTrieNode *CallerNode = ContextNode->getParentContext(); |
| 1266 | if (InlinerDesc != nullptr && CallerNode != &getRootContext()) { |
| 1267 | // Since the context id will be compressed, we have to use callee's |
| 1268 | // context id to infer caller's context id to ensure they share the |
| 1269 | // same context prefix. |
| 1270 | uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset; |
| 1271 | uint64_t CallerDiscriminator = ContextNode->getCallSiteLoc().Discriminator; |
| 1272 | assert(CallerIndex && |
| 1273 | "Inferred caller's location index shouldn't be zero!" ); |
| 1274 | assert(!CallerDiscriminator && |
| 1275 | "Callsite probe should not have a discriminator!" ); |
| 1276 | FunctionSamples &CallerProfile = |
| 1277 | *getOrCreateFunctionSamples(ContextNode: CallerNode); |
| 1278 | CallerProfile.setFunctionHash(InlinerDesc->FuncHash); |
| 1279 | CallerProfile.addBodySamples(LineOffset: CallerIndex, Discriminator: CallerDiscriminator, Num: Count); |
| 1280 | CallerProfile.addTotalSamples(Num: Count); |
| 1281 | CallerProfile.addCalledTargetSamples(LineOffset: CallerIndex, Discriminator: CallerDiscriminator, |
| 1282 | Func: ContextNode->getFuncName(), Num: Count); |
| 1283 | } |
| 1284 | } |
| 1285 | } |
| 1286 | |
| 1287 | // Assign zero count for remaining probes without sample hits to |
| 1288 | // differentiate from probes optimized away, of which the counts are unknown |
| 1289 | // and will be inferred by the compiler. |
| 1290 | for (auto &I : FrameSamples) { |
| 1291 | for (auto *FunctionProfile : I.second) { |
| 1292 | for (const MCDecodedPseudoProbe &Probe : I.first->getProbes()) { |
| 1293 | FunctionProfile->addBodySamples(LineOffset: Probe.getIndex(), |
| 1294 | Discriminator: Probe.getDiscriminator(), Num: 0); |
| 1295 | } |
| 1296 | } |
| 1297 | } |
| 1298 | } |
| 1299 | |
| 1300 | void CSProfileGenerator::populateBoundarySamplesWithProbes( |
| 1301 | const BranchSample &BranchCounter, const AddrBasedCtxKey *CtxKey) { |
| 1302 | for (const auto &BI : BranchCounter) { |
| 1303 | uint64_t SourceAddress = BI.first.first; |
| 1304 | uint64_t TargetAddress = BI.first.second; |
| 1305 | uint64_t Count = BI.second; |
| 1306 | const MCDecodedPseudoProbe *CallProbe = |
| 1307 | Binary->getCallProbeForAddr(Address: SourceAddress); |
| 1308 | if (CallProbe == nullptr) |
| 1309 | continue; |
| 1310 | FunctionSamples &FunctionProfile = |
| 1311 | getFunctionProfileForLeafProbe(CtxKey, LeafProbe: CallProbe); |
| 1312 | FunctionProfile.addBodySamples(LineOffset: CallProbe->getIndex(), Discriminator: 0, Num: Count); |
| 1313 | FunctionProfile.addTotalSamples(Num: Count); |
| 1314 | StringRef CalleeName = getCalleeNameForAddress(TargetAddress); |
| 1315 | if (CalleeName.size() == 0) |
| 1316 | continue; |
| 1317 | FunctionProfile.addCalledTargetSamples(LineOffset: CallProbe->getIndex(), |
| 1318 | Discriminator: CallProbe->getDiscriminator(), |
| 1319 | Func: FunctionId(CalleeName), Num: Count); |
| 1320 | } |
| 1321 | } |
| 1322 | |
| 1323 | ContextTrieNode *CSProfileGenerator::getContextNodeForLeafProbe( |
| 1324 | const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) { |
| 1325 | |
| 1326 | const SmallVectorImpl<uint64_t> *PContext = &CtxKey->Context; |
| 1327 | SmallVector<uint64_t, 16> NewContext; |
| 1328 | |
| 1329 | if (InferMissingFrames) { |
| 1330 | SmallVector<uint64_t, 16> Context = CtxKey->Context; |
| 1331 | // Append leaf frame for a complete inference. |
| 1332 | Context.push_back(Elt: LeafProbe->getAddress()); |
| 1333 | inferMissingFrames(Context, NewContext); |
| 1334 | // Pop out the leaf probe that was pushed in above. |
| 1335 | NewContext.pop_back(); |
| 1336 | PContext = &NewContext; |
| 1337 | } |
| 1338 | |
| 1339 | SampleContextFrameVector ContextStack; |
| 1340 | extractPrefixContextStack(ContextStack, AddrVec: *PContext, Binary); |
| 1341 | |
| 1342 | // Explicitly copy the context for appending the leaf context |
| 1343 | SampleContextFrameVector NewContextStack(ContextStack.begin(), |
| 1344 | ContextStack.end()); |
| 1345 | Binary->getInlineContextForProbe(Probe: LeafProbe, InlineContextStack&: NewContextStack, IncludeLeaf: true); |
| 1346 | // For leaf inlined context with the top frame, we should strip off the top |
| 1347 | // frame's probe id, like: |
| 1348 | // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" |
| 1349 | auto LeafFrame = NewContextStack.back(); |
| 1350 | LeafFrame.Location = LineLocation(0, 0); |
| 1351 | NewContextStack.pop_back(); |
| 1352 | // Compress the context string except for the leaf frame |
| 1353 | CSProfileGenerator::compressRecursionContext(Context&: NewContextStack); |
| 1354 | CSProfileGenerator::trimContext(S&: NewContextStack); |
| 1355 | NewContextStack.push_back(Elt: LeafFrame); |
| 1356 | |
| 1357 | const auto *FuncDesc = Binary->getFuncDescForGUID(GUID: LeafProbe->getGuid()); |
| 1358 | bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite(); |
| 1359 | ContextTrieNode *ContextNode = |
| 1360 | getOrCreateContextNode(Context: NewContextStack, WasLeafInlined); |
| 1361 | ContextNode->getFunctionSamples()->setFunctionHash(FuncDesc->FuncHash); |
| 1362 | return ContextNode; |
| 1363 | } |
| 1364 | |
| 1365 | FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( |
| 1366 | const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) { |
| 1367 | return *getContextNodeForLeafProbe(CtxKey, LeafProbe)->getFunctionSamples(); |
| 1368 | } |
| 1369 | |
| 1370 | } // end namespace sampleprof |
| 1371 | } // end namespace llvm |
| 1372 | |