1 | //=- SyntheticCountsPropagation.cpp - Propagate function counts --*- C++ -*-=// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements a transformation that synthesizes entry counts for |
10 | // functions and attaches !prof metadata to functions with the synthesized |
11 | // counts. The presence of !prof metadata with counter name set to |
12 | // 'synthesized_function_entry_count' indicate that the value of the counter is |
13 | // an estimation of the likely execution count of the function. This transform |
14 | // is applied only in non PGO mode as functions get 'real' profile-based |
15 | // function entry counts in the PGO mode. |
16 | // |
17 | // The transformation works by first assigning some initial values to the entry |
18 | // counts of all functions and then doing a top-down traversal of the |
19 | // callgraph-scc to propagate the counts. For each function the set of callsites |
20 | // and their relative block frequency is gathered. The relative block frequency |
21 | // multiplied by the entry count of the caller and added to the callee's entry |
22 | // count. For non-trivial SCCs, the new counts are computed from the previous |
23 | // counts and updated in one shot. |
24 | // |
25 | //===----------------------------------------------------------------------===// |
26 | |
27 | #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h" |
28 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
29 | #include "llvm/Analysis/CallGraph.h" |
30 | #include "llvm/Analysis/SyntheticCountsUtils.h" |
31 | #include "llvm/IR/Function.h" |
32 | #include "llvm/IR/Instructions.h" |
33 | #include "llvm/IR/Module.h" |
34 | #include "llvm/Support/CommandLine.h" |
35 | |
36 | using namespace llvm; |
37 | using Scaled64 = ScaledNumber<uint64_t>; |
38 | using ProfileCount = Function::ProfileCount; |
39 | |
40 | #define DEBUG_TYPE "synthetic-counts-propagation" |
41 | |
42 | namespace llvm { |
43 | cl::opt<int> |
44 | InitialSyntheticCount("initial-synthetic-count" , cl::Hidden, cl::init(Val: 10), |
45 | cl::desc("Initial value of synthetic entry count" )); |
46 | } // namespace llvm |
47 | |
48 | /// Initial synthetic count assigned to inline functions. |
49 | static cl::opt<int> InlineSyntheticCount( |
50 | "inline-synthetic-count" , cl::Hidden, cl::init(Val: 15), |
51 | cl::desc("Initial synthetic entry count for inline functions." )); |
52 | |
53 | /// Initial synthetic count assigned to cold functions. |
54 | static cl::opt<int> ColdSyntheticCount( |
55 | "cold-synthetic-count" , cl::Hidden, cl::init(Val: 5), |
56 | cl::desc("Initial synthetic entry count for cold functions." )); |
57 | |
58 | // Assign initial synthetic entry counts to functions. |
59 | static void |
60 | initializeCounts(Module &M, function_ref<void(Function *, uint64_t)> SetCount) { |
61 | auto MayHaveIndirectCalls = [](Function &F) { |
62 | for (auto *U : F.users()) { |
63 | if (!isa<CallInst>(Val: U) && !isa<InvokeInst>(Val: U)) |
64 | return true; |
65 | } |
66 | return false; |
67 | }; |
68 | |
69 | for (Function &F : M) { |
70 | uint64_t InitialCount = InitialSyntheticCount; |
71 | if (F.isDeclaration()) |
72 | continue; |
73 | if (F.hasFnAttribute(Kind: Attribute::AlwaysInline) || |
74 | F.hasFnAttribute(Kind: Attribute::InlineHint)) { |
75 | // Use a higher value for inline functions to account for the fact that |
76 | // these are usually beneficial to inline. |
77 | InitialCount = InlineSyntheticCount; |
78 | } else if (F.hasLocalLinkage() && !MayHaveIndirectCalls(F)) { |
79 | // Local functions without inline hints get counts only through |
80 | // propagation. |
81 | InitialCount = 0; |
82 | } else if (F.hasFnAttribute(Kind: Attribute::Cold) || |
83 | F.hasFnAttribute(Kind: Attribute::NoInline)) { |
84 | // Use a lower value for noinline and cold functions. |
85 | InitialCount = ColdSyntheticCount; |
86 | } |
87 | SetCount(&F, InitialCount); |
88 | } |
89 | } |
90 | |
91 | PreservedAnalyses SyntheticCountsPropagation::run(Module &M, |
92 | ModuleAnalysisManager &MAM) { |
93 | FunctionAnalysisManager &FAM = |
94 | MAM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager(); |
95 | DenseMap<Function *, Scaled64> Counts; |
96 | // Set initial entry counts. |
97 | initializeCounts( |
98 | M, SetCount: [&](Function *F, uint64_t Count) { Counts[F] = Scaled64(Count, 0); }); |
99 | |
100 | // Edge includes information about the source. Hence ignore the first |
101 | // parameter. |
102 | auto GetCallSiteProfCount = [&](const CallGraphNode *, |
103 | const CallGraphNode::CallRecord &Edge) { |
104 | std::optional<Scaled64> Res; |
105 | if (!Edge.first) |
106 | return Res; |
107 | CallBase &CB = *cast<CallBase>(Val: *Edge.first); |
108 | Function *Caller = CB.getCaller(); |
109 | auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(IR&: *Caller); |
110 | |
111 | // Now compute the callsite count from relative frequency and |
112 | // entry count: |
113 | BasicBlock *CSBB = CB.getParent(); |
114 | Scaled64 EntryFreq(BFI.getEntryFreq().getFrequency(), 0); |
115 | Scaled64 BBCount(BFI.getBlockFreq(BB: CSBB).getFrequency(), 0); |
116 | BBCount /= EntryFreq; |
117 | BBCount *= Counts[Caller]; |
118 | return std::optional<Scaled64>(BBCount); |
119 | }; |
120 | |
121 | CallGraph CG(M); |
122 | // Propgate the entry counts on the callgraph. |
123 | SyntheticCountsUtils<const CallGraph *>::propagate( |
124 | CG: &CG, GetProfCount: GetCallSiteProfCount, AddCount: [&](const CallGraphNode *N, Scaled64 New) { |
125 | auto F = N->getFunction(); |
126 | if (!F || F->isDeclaration()) |
127 | return; |
128 | |
129 | Counts[F] += New; |
130 | }); |
131 | |
132 | // Set the counts as metadata. |
133 | for (auto Entry : Counts) { |
134 | Entry.first->setEntryCount(Count: ProfileCount( |
135 | Entry.second.template toInt<uint64_t>(), Function::PCT_Synthetic)); |
136 | } |
137 | |
138 | return PreservedAnalyses::all(); |
139 | } |
140 | |