| 1 | //=-- ProfilesummaryBuilder.cpp - Profile summary computation ---------------=// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains support for computing profile summary data. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "llvm/IR/ProfileSummary.h" |
| 14 | #include "llvm/ProfileData/InstrProf.h" |
| 15 | #include "llvm/ProfileData/ProfileCommon.h" |
| 16 | #include "llvm/ProfileData/SampleProf.h" |
| 17 | #include "llvm/Support/CommandLine.h" |
| 18 | |
| 19 | using namespace llvm; |
| 20 | |
| 21 | namespace llvm { |
| 22 | cl::opt<bool> UseContextLessSummary( |
| 23 | "profile-summary-contextless" , cl::Hidden, |
| 24 | cl::desc("Merge context profiles before calculating thresholds." )); |
| 25 | |
| 26 | // The following two parameters determine the threshold for a count to be |
| 27 | // considered hot/cold. These two parameters are percentile values (multiplied |
| 28 | // by 10000). If the counts are sorted in descending order, the minimum count to |
| 29 | // reach ProfileSummaryCutoffHot gives the threshold to determine a hot count. |
| 30 | // Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the |
| 31 | // threshold for determining cold count (everything <= this threshold is |
| 32 | // considered cold). |
| 33 | cl::opt<int> ProfileSummaryCutoffHot( |
| 34 | "profile-summary-cutoff-hot" , cl::Hidden, cl::init(Val: 990000), |
| 35 | cl::desc("A count is hot if it exceeds the minimum count to" |
| 36 | " reach this percentile of total counts." )); |
| 37 | |
| 38 | cl::opt<int> ProfileSummaryCutoffCold( |
| 39 | "profile-summary-cutoff-cold" , cl::Hidden, cl::init(Val: 999999), |
| 40 | cl::desc("A count is cold if it is below the minimum count" |
| 41 | " to reach this percentile of total counts." )); |
| 42 | |
| 43 | cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold( |
| 44 | "profile-summary-huge-working-set-size-threshold" , cl::Hidden, |
| 45 | cl::init(Val: 15000), |
| 46 | cl::desc("The code working set size is considered huge if the number of" |
| 47 | " blocks required to reach the -profile-summary-cutoff-hot" |
| 48 | " percentile exceeds this count." )); |
| 49 | |
| 50 | cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold( |
| 51 | "profile-summary-large-working-set-size-threshold" , cl::Hidden, |
| 52 | cl::init(Val: 12500), |
| 53 | cl::desc("The code working set size is considered large if the number of" |
| 54 | " blocks required to reach the -profile-summary-cutoff-hot" |
| 55 | " percentile exceeds this count." )); |
| 56 | |
| 57 | // The next two options override the counts derived from summary computation and |
| 58 | // are useful for debugging purposes. |
| 59 | cl::opt<uint64_t> ProfileSummaryHotCount( |
| 60 | "profile-summary-hot-count" , cl::ReallyHidden, |
| 61 | cl::desc("A fixed hot count that overrides the count derived from" |
| 62 | " profile-summary-cutoff-hot" )); |
| 63 | |
| 64 | cl::opt<uint64_t> ProfileSummaryColdCount( |
| 65 | "profile-summary-cold-count" , cl::ReallyHidden, |
| 66 | cl::desc("A fixed cold count that overrides the count derived from" |
| 67 | " profile-summary-cutoff-cold" )); |
| 68 | } // namespace llvm |
| 69 | |
| 70 | // A set of cutoff values. Each value, when divided by ProfileSummary::Scale |
| 71 | // (which is 1000000) is a desired percentile of total counts. |
| 72 | static const uint32_t DefaultCutoffsData[] = { |
| 73 | 10000, /* 1% */ |
| 74 | 100000, /* 10% */ |
| 75 | 200000, 300000, 400000, 500000, 600000, 700000, 800000, |
| 76 | 900000, 950000, 990000, 999000, 999900, 999990, 999999}; |
| 77 | const ArrayRef<uint32_t> ProfileSummaryBuilder::DefaultCutoffs = |
| 78 | DefaultCutoffsData; |
| 79 | |
| 80 | // An entry for the 0th percentile to correctly calculate hot/cold count |
| 81 | // thresholds when -profile-summary-cutoff-hot/cold is 0. If the hot cutoff is |
| 82 | // 0, no sample counts are treated as hot. If the cold cutoff is 0, all sample |
| 83 | // counts are treated as cold. Assumes there is no UINT64_MAX sample counts. |
| 84 | static const ProfileSummaryEntry ZeroCutoffEntry = {0, UINT64_MAX, 0}; |
| 85 | |
| 86 | const ProfileSummaryEntry & |
| 87 | ProfileSummaryBuilder::getEntryForPercentile(const SummaryEntryVector &DS, |
| 88 | uint64_t Percentile) { |
| 89 | if (Percentile == 0) |
| 90 | return ZeroCutoffEntry; |
| 91 | |
| 92 | auto It = partition_point(Range: DS, P: [=](const ProfileSummaryEntry &Entry) { |
| 93 | return Entry.Cutoff < Percentile; |
| 94 | }); |
| 95 | // The required percentile has to be <= one of the percentiles in the |
| 96 | // detailed summary. |
| 97 | if (It == DS.end()) |
| 98 | report_fatal_error(reason: "Desired percentile exceeds the maximum cutoff" ); |
| 99 | return *It; |
| 100 | } |
| 101 | |
| 102 | void InstrProfSummaryBuilder::addRecord(const InstrProfRecord &R) { |
| 103 | // The first counter is not necessarily an entry count for IR |
| 104 | // instrumentation profiles. |
| 105 | // Eventually MaxFunctionCount will become obsolete and this can be |
| 106 | // removed. |
| 107 | |
| 108 | if (R.getCountPseudoKind() != InstrProfRecord::NotPseudo) |
| 109 | return; |
| 110 | |
| 111 | addEntryCount(Count: R.Counts[0]); |
| 112 | for (size_t I = 1, E = R.Counts.size(); I < E; ++I) |
| 113 | addInternalCount(Count: R.Counts[I]); |
| 114 | } |
| 115 | |
| 116 | // To compute the detailed summary, we consider each line containing samples as |
| 117 | // equivalent to a block with a count in the instrumented profile. |
| 118 | void SampleProfileSummaryBuilder::addRecord( |
| 119 | const sampleprof::FunctionSamples &FS, bool isCallsiteSample) { |
| 120 | if (!isCallsiteSample) { |
| 121 | NumFunctions++; |
| 122 | if (FS.getHeadSamples() > MaxFunctionCount) |
| 123 | MaxFunctionCount = FS.getHeadSamples(); |
| 124 | } else if (FS.getContext().hasAttribute( |
| 125 | A: sampleprof::ContextDuplicatedIntoBase)) { |
| 126 | // Do not recount callee samples if they are already merged into their base |
| 127 | // profiles. This can happen to CS nested profile. |
| 128 | return; |
| 129 | } |
| 130 | |
| 131 | for (const auto &I : FS.getBodySamples()) { |
| 132 | uint64_t Count = I.second.getSamples(); |
| 133 | addCount(Count); |
| 134 | } |
| 135 | for (const auto &I : FS.getCallsiteSamples()) |
| 136 | for (const auto &CS : I.second) |
| 137 | addRecord(FS: CS.second, isCallsiteSample: true); |
| 138 | } |
| 139 | |
| 140 | // The argument to this method is a vector of cutoff percentages and the return |
| 141 | // value is a vector of (Cutoff, MinCount, NumCounts) triplets. |
| 142 | void ProfileSummaryBuilder::computeDetailedSummary() { |
| 143 | if (DetailedSummaryCutoffs.empty()) |
| 144 | return; |
| 145 | llvm::sort(C&: DetailedSummaryCutoffs); |
| 146 | auto Iter = CountFrequencies.begin(); |
| 147 | const auto End = CountFrequencies.end(); |
| 148 | |
| 149 | uint32_t CountsSeen = 0; |
| 150 | uint64_t CurrSum = 0, Count = 0; |
| 151 | |
| 152 | for (const uint32_t Cutoff : DetailedSummaryCutoffs) { |
| 153 | assert(Cutoff <= 999999); |
| 154 | APInt Temp(128, TotalCount); |
| 155 | APInt N(128, Cutoff); |
| 156 | APInt D(128, ProfileSummary::Scale); |
| 157 | Temp *= N; |
| 158 | Temp = Temp.sdiv(RHS: D); |
| 159 | uint64_t DesiredCount = Temp.getZExtValue(); |
| 160 | assert(DesiredCount <= TotalCount); |
| 161 | while (CurrSum < DesiredCount && Iter != End) { |
| 162 | Count = Iter->first; |
| 163 | uint32_t Freq = Iter->second; |
| 164 | CurrSum += (Count * Freq); |
| 165 | CountsSeen += Freq; |
| 166 | Iter++; |
| 167 | } |
| 168 | assert(CurrSum >= DesiredCount); |
| 169 | ProfileSummaryEntry PSE = {Cutoff, Count, CountsSeen}; |
| 170 | DetailedSummary.push_back(x: PSE); |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | uint64_t |
| 175 | ProfileSummaryBuilder::getHotCountThreshold(const SummaryEntryVector &DS) { |
| 176 | auto &HotEntry = |
| 177 | ProfileSummaryBuilder::getEntryForPercentile(DS, Percentile: ProfileSummaryCutoffHot); |
| 178 | uint64_t HotCountThreshold = HotEntry.MinCount; |
| 179 | if (ProfileSummaryHotCount.getNumOccurrences() > 0) |
| 180 | HotCountThreshold = ProfileSummaryHotCount; |
| 181 | return HotCountThreshold; |
| 182 | } |
| 183 | |
| 184 | uint64_t |
| 185 | ProfileSummaryBuilder::getColdCountThreshold(const SummaryEntryVector &DS) { |
| 186 | auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile( |
| 187 | DS, Percentile: ProfileSummaryCutoffCold); |
| 188 | uint64_t ColdCountThreshold = ColdEntry.MinCount; |
| 189 | if (ProfileSummaryColdCount.getNumOccurrences() > 0) |
| 190 | ColdCountThreshold = ProfileSummaryColdCount; |
| 191 | return ColdCountThreshold; |
| 192 | } |
| 193 | |
| 194 | std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() { |
| 195 | computeDetailedSummary(); |
| 196 | return std::make_unique<ProfileSummary>( |
| 197 | args: ProfileSummary::PSK_Sample, args&: DetailedSummary, args&: TotalCount, args&: MaxCount, args: 0, |
| 198 | args&: MaxFunctionCount, args&: NumCounts, args&: NumFunctions); |
| 199 | } |
| 200 | |
| 201 | std::unique_ptr<ProfileSummary> |
| 202 | SampleProfileSummaryBuilder::computeSummaryForProfiles( |
| 203 | const SampleProfileMap &Profiles) { |
| 204 | assert(NumFunctions == 0 && |
| 205 | "This can only be called on an empty summary builder" ); |
| 206 | sampleprof::SampleProfileMap ContextLessProfiles; |
| 207 | const sampleprof::SampleProfileMap *ProfilesToUse = &Profiles; |
| 208 | // For CSSPGO, context-sensitive profile effectively split a function profile |
| 209 | // into many copies each representing the CFG profile of a particular calling |
| 210 | // context. That makes the count distribution looks more flat as we now have |
| 211 | // more function profiles each with lower counts, which in turn leads to lower |
| 212 | // hot thresholds. To compensate for that, by default we merge context |
| 213 | // profiles before computing profile summary. |
| 214 | if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS && |
| 215 | !UseContextLessSummary.getNumOccurrences())) { |
| 216 | ProfileConverter::flattenProfile(InputProfiles: Profiles, OutputProfiles&: ContextLessProfiles, ProfileIsCS: true); |
| 217 | ProfilesToUse = &ContextLessProfiles; |
| 218 | } |
| 219 | |
| 220 | for (const auto &I : *ProfilesToUse) { |
| 221 | const sampleprof::FunctionSamples &Profile = I.second; |
| 222 | addRecord(FS: Profile); |
| 223 | } |
| 224 | |
| 225 | return getSummary(); |
| 226 | } |
| 227 | |
| 228 | std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() { |
| 229 | computeDetailedSummary(); |
| 230 | return std::make_unique<ProfileSummary>( |
| 231 | args: ProfileSummary::PSK_Instr, args&: DetailedSummary, args&: TotalCount, args&: MaxCount, |
| 232 | args&: MaxInternalBlockCount, args&: MaxFunctionCount, args&: NumCounts, args&: NumFunctions); |
| 233 | } |
| 234 | |
| 235 | void InstrProfSummaryBuilder::addEntryCount(uint64_t Count) { |
| 236 | assert(Count <= getInstrMaxCountValue() && |
| 237 | "Count value should be less than the max count value." ); |
| 238 | NumFunctions++; |
| 239 | addCount(Count); |
| 240 | if (Count > MaxFunctionCount) |
| 241 | MaxFunctionCount = Count; |
| 242 | } |
| 243 | |
| 244 | void InstrProfSummaryBuilder::addInternalCount(uint64_t Count) { |
| 245 | assert(Count <= getInstrMaxCountValue() && |
| 246 | "Count value should be less than the max count value." ); |
| 247 | addCount(Count); |
| 248 | if (Count > MaxInternalBlockCount) |
| 249 | MaxInternalBlockCount = Count; |
| 250 | } |
| 251 | |