1 | //===-- BlockCoverageInference.cpp - Minimal Execution Coverage -*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Our algorithm works by first identifying a subset of nodes that must always |
10 | // be instrumented. We call these nodes ambiguous because knowing the coverage |
11 | // of all remaining nodes is not enough to infer their coverage status. |
12 | // |
13 | // In general a node v is ambiguous if there exists two entry-to-terminal paths |
14 | // P_1 and P_2 such that: |
15 | // 1. v not in P_1 but P_1 visits a predecessor of v, and |
16 | // 2. v not in P_2 but P_2 visits a successor of v. |
17 | // |
18 | // If a node v is not ambiguous, then if condition 1 fails, we can infer v’s |
19 | // coverage from the coverage of its predecessors, or if condition 2 fails, we |
20 | // can infer v’s coverage from the coverage of its successors. |
21 | // |
22 | // Sadly, there are example CFGs where it is not possible to infer all nodes |
23 | // from the ambiguous nodes alone. Our algorithm selects a minimum number of |
24 | // extra nodes to add to the ambiguous nodes to form a valid instrumentation S. |
25 | // |
26 | // Details on this algorithm can be found in https://arxiv.org/abs/2208.13907 |
27 | // |
28 | //===----------------------------------------------------------------------===// |
29 | |
30 | #include "llvm/Transforms/Instrumentation/BlockCoverageInference.h" |
31 | #include "llvm/ADT/DepthFirstIterator.h" |
32 | #include "llvm/ADT/Statistic.h" |
33 | #include "llvm/Support/CRC.h" |
34 | #include "llvm/Support/Debug.h" |
35 | #include "llvm/Support/GraphWriter.h" |
36 | #include "llvm/Support/raw_ostream.h" |
37 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
38 | |
39 | using namespace llvm; |
40 | |
41 | #define DEBUG_TYPE "pgo-block-coverage" |
42 | |
43 | STATISTIC(NumFunctions, "Number of total functions that BCI has processed" ); |
44 | STATISTIC(NumIneligibleFunctions, |
45 | "Number of functions for which BCI cannot run on" ); |
46 | STATISTIC(NumBlocks, "Number of total basic blocks that BCI has processed" ); |
47 | STATISTIC(NumInstrumentedBlocks, |
48 | "Number of basic blocks instrumented for coverage" ); |
49 | |
50 | BlockCoverageInference::BlockCoverageInference(const Function &F, |
51 | bool ForceInstrumentEntry) |
52 | : F(F), ForceInstrumentEntry(ForceInstrumentEntry) { |
53 | findDependencies(); |
54 | assert(!ForceInstrumentEntry || shouldInstrumentBlock(F.getEntryBlock())); |
55 | |
56 | ++NumFunctions; |
57 | for (auto &BB : F) { |
58 | ++NumBlocks; |
59 | if (shouldInstrumentBlock(BB)) |
60 | ++NumInstrumentedBlocks; |
61 | } |
62 | } |
63 | |
64 | BlockCoverageInference::BlockSet |
65 | BlockCoverageInference::getDependencies(const BasicBlock &BB) const { |
66 | assert(BB.getParent() == &F); |
67 | BlockSet Dependencies; |
68 | auto It = PredecessorDependencies.find(Val: &BB); |
69 | if (It != PredecessorDependencies.end()) |
70 | Dependencies.set_union(It->second); |
71 | It = SuccessorDependencies.find(Val: &BB); |
72 | if (It != SuccessorDependencies.end()) |
73 | Dependencies.set_union(It->second); |
74 | return Dependencies; |
75 | } |
76 | |
77 | uint64_t BlockCoverageInference::getInstrumentedBlocksHash() const { |
78 | JamCRC JC; |
79 | uint64_t Index = 0; |
80 | for (auto &BB : F) { |
81 | if (shouldInstrumentBlock(BB)) { |
82 | uint8_t Data[8]; |
83 | support::endian::write64le(P: Data, V: Index); |
84 | JC.update(Data); |
85 | } |
86 | Index++; |
87 | } |
88 | return JC.getCRC(); |
89 | } |
90 | |
91 | bool BlockCoverageInference::shouldInstrumentBlock(const BasicBlock &BB) const { |
92 | assert(BB.getParent() == &F); |
93 | auto It = PredecessorDependencies.find(Val: &BB); |
94 | if (It != PredecessorDependencies.end() && It->second.size()) |
95 | return false; |
96 | It = SuccessorDependencies.find(Val: &BB); |
97 | if (It != SuccessorDependencies.end() && It->second.size()) |
98 | return false; |
99 | return true; |
100 | } |
101 | |
102 | void BlockCoverageInference::findDependencies() { |
103 | assert(PredecessorDependencies.empty() && SuccessorDependencies.empty()); |
104 | // Empirical analysis shows that this algorithm finishes within 5 seconds for |
105 | // functions with fewer than 1.5K blocks. |
106 | if (F.hasFnAttribute(Kind: Attribute::NoReturn) || F.size() > 1500) { |
107 | ++NumIneligibleFunctions; |
108 | return; |
109 | } |
110 | |
111 | SmallVector<const BasicBlock *, 4> TerminalBlocks; |
112 | for (auto &BB : F) |
113 | if (succ_empty(BB: &BB)) |
114 | TerminalBlocks.push_back(Elt: &BB); |
115 | |
116 | // Traverse the CFG backwards from the terminal blocks to make sure every |
117 | // block can reach some terminal block. Otherwise this algorithm will not work |
118 | // and we must fall back to instrumenting every block. |
119 | df_iterator_default_set<const BasicBlock *> Visited; |
120 | for (auto *BB : TerminalBlocks) |
121 | for (auto *N : inverse_depth_first_ext(G: BB, S&: Visited)) |
122 | (void)N; |
123 | if (F.size() != Visited.size()) { |
124 | ++NumIneligibleFunctions; |
125 | return; |
126 | } |
127 | |
128 | // The current implementation for computing `PredecessorDependencies` and |
129 | // `SuccessorDependencies` runs in quadratic time with respect to the number |
130 | // of basic blocks. While we do have a more complicated linear time algorithm |
131 | // in https://arxiv.org/abs/2208.13907 we do not know if it will give a |
132 | // significant speedup in practice given that most functions tend to be |
133 | // relatively small in size for intended use cases. |
134 | auto &EntryBlock = F.getEntryBlock(); |
135 | for (auto &BB : F) { |
136 | // The set of blocks that are reachable while avoiding BB. |
137 | BlockSet ReachableFromEntry, ReachableFromTerminal; |
138 | getReachableAvoiding(Start: EntryBlock, Avoid: BB, /*IsForward=*/true, |
139 | Reachable&: ReachableFromEntry); |
140 | for (auto *TerminalBlock : TerminalBlocks) |
141 | getReachableAvoiding(Start: *TerminalBlock, Avoid: BB, /*IsForward=*/false, |
142 | Reachable&: ReachableFromTerminal); |
143 | |
144 | auto Preds = predecessors(BB: &BB); |
145 | bool HasSuperReachablePred = llvm::any_of(Range&: Preds, P: [&](auto *Pred) { |
146 | return ReachableFromEntry.count(key: Pred) && |
147 | ReachableFromTerminal.count(key: Pred); |
148 | }); |
149 | if (!HasSuperReachablePred) |
150 | for (auto *Pred : Preds) |
151 | if (ReachableFromEntry.count(key: Pred)) |
152 | PredecessorDependencies[&BB].insert(X: Pred); |
153 | |
154 | auto Succs = successors(BB: &BB); |
155 | bool HasSuperReachableSucc = llvm::any_of(Range&: Succs, P: [&](auto *Succ) { |
156 | return ReachableFromEntry.count(key: Succ) && |
157 | ReachableFromTerminal.count(key: Succ); |
158 | }); |
159 | if (!HasSuperReachableSucc) |
160 | for (auto *Succ : Succs) |
161 | if (ReachableFromTerminal.count(key: Succ)) |
162 | SuccessorDependencies[&BB].insert(X: Succ); |
163 | } |
164 | |
165 | if (ForceInstrumentEntry) { |
166 | // Force the entry block to be instrumented by clearing the blocks it can |
167 | // infer coverage from. |
168 | PredecessorDependencies[&EntryBlock].clear(); |
169 | SuccessorDependencies[&EntryBlock].clear(); |
170 | } |
171 | |
172 | // Construct a graph where blocks are connected if there is a mutual |
173 | // dependency between them. This graph has a special property that it contains |
174 | // only paths. |
175 | DenseMap<const BasicBlock *, BlockSet> AdjacencyList; |
176 | for (auto &BB : F) { |
177 | for (auto *Succ : successors(BB: &BB)) { |
178 | if (SuccessorDependencies[&BB].count(key: Succ) && |
179 | PredecessorDependencies[Succ].count(key: &BB)) { |
180 | AdjacencyList[&BB].insert(X: Succ); |
181 | AdjacencyList[Succ].insert(X: &BB); |
182 | } |
183 | } |
184 | } |
185 | |
186 | // Given a path with at least one node, return the next node on the path. |
187 | auto getNextOnPath = [&](BlockSet &Path) -> const BasicBlock * { |
188 | assert(Path.size()); |
189 | auto &Neighbors = AdjacencyList[Path.back()]; |
190 | if (Path.size() == 1) { |
191 | // This is the first node on the path, return its neighbor. |
192 | assert(Neighbors.size() == 1); |
193 | return Neighbors.front(); |
194 | } else if (Neighbors.size() == 2) { |
195 | // This is the middle of the path, find the neighbor that is not on the |
196 | // path already. |
197 | assert(Path.size() >= 2); |
198 | return Path.count(key: Neighbors[0]) ? Neighbors[1] : Neighbors[0]; |
199 | } |
200 | // This is the end of the path. |
201 | assert(Neighbors.size() == 1); |
202 | return nullptr; |
203 | }; |
204 | |
205 | // Remove all cycles in the inferencing graph. |
206 | for (auto &BB : F) { |
207 | if (AdjacencyList[&BB].size() == 1) { |
208 | // We found the head of some path. |
209 | BlockSet Path; |
210 | Path.insert(X: &BB); |
211 | while (const BasicBlock *Next = getNextOnPath(Path)) |
212 | Path.insert(X: Next); |
213 | LLVM_DEBUG(dbgs() << "Found path: " << getBlockNames(Path) << "\n" ); |
214 | |
215 | // Remove these nodes from the graph so we don't discover this path again. |
216 | for (auto *BB : Path) |
217 | AdjacencyList[BB].clear(); |
218 | |
219 | // Finally, remove the cycles. |
220 | if (PredecessorDependencies[Path.front()].size()) { |
221 | for (auto *BB : Path) |
222 | if (BB != Path.back()) |
223 | SuccessorDependencies[BB].clear(); |
224 | } else { |
225 | for (auto *BB : Path) |
226 | if (BB != Path.front()) |
227 | PredecessorDependencies[BB].clear(); |
228 | } |
229 | } |
230 | } |
231 | LLVM_DEBUG(dump(dbgs())); |
232 | } |
233 | |
234 | void BlockCoverageInference::getReachableAvoiding(const BasicBlock &Start, |
235 | const BasicBlock &Avoid, |
236 | bool IsForward, |
237 | BlockSet &Reachable) const { |
238 | df_iterator_default_set<const BasicBlock *> Visited; |
239 | Visited.insert(N: &Avoid); |
240 | if (IsForward) { |
241 | auto Range = depth_first_ext(G: &Start, S&: Visited); |
242 | Reachable.insert(Start: Range.begin(), End: Range.end()); |
243 | } else { |
244 | auto Range = inverse_depth_first_ext(G: &Start, S&: Visited); |
245 | Reachable.insert(Start: Range.begin(), End: Range.end()); |
246 | } |
247 | } |
248 | |
249 | namespace llvm { |
250 | class DotFuncBCIInfo { |
251 | private: |
252 | const BlockCoverageInference *BCI; |
253 | const DenseMap<const BasicBlock *, bool> *Coverage; |
254 | |
255 | public: |
256 | DotFuncBCIInfo(const BlockCoverageInference *BCI, |
257 | const DenseMap<const BasicBlock *, bool> *Coverage) |
258 | : BCI(BCI), Coverage(Coverage) {} |
259 | |
260 | const Function &getFunction() { return BCI->F; } |
261 | |
262 | bool isInstrumented(const BasicBlock *BB) const { |
263 | return BCI->shouldInstrumentBlock(BB: *BB); |
264 | } |
265 | |
266 | bool isCovered(const BasicBlock *BB) const { |
267 | return Coverage && Coverage->lookup(Val: BB); |
268 | } |
269 | |
270 | bool isDependent(const BasicBlock *Src, const BasicBlock *Dest) const { |
271 | return BCI->getDependencies(BB: *Src).count(key: Dest); |
272 | } |
273 | }; |
274 | |
275 | template <> |
276 | struct GraphTraits<DotFuncBCIInfo *> : public GraphTraits<const BasicBlock *> { |
277 | static NodeRef getEntryNode(DotFuncBCIInfo *Info) { |
278 | return &(Info->getFunction().getEntryBlock()); |
279 | } |
280 | |
281 | // nodes_iterator/begin/end - Allow iteration over all nodes in the graph |
282 | using nodes_iterator = pointer_iterator<Function::const_iterator>; |
283 | |
284 | static nodes_iterator nodes_begin(DotFuncBCIInfo *Info) { |
285 | return nodes_iterator(Info->getFunction().begin()); |
286 | } |
287 | |
288 | static nodes_iterator nodes_end(DotFuncBCIInfo *Info) { |
289 | return nodes_iterator(Info->getFunction().end()); |
290 | } |
291 | |
292 | static size_t size(DotFuncBCIInfo *Info) { |
293 | return Info->getFunction().size(); |
294 | } |
295 | }; |
296 | |
297 | template <> |
298 | struct DOTGraphTraits<DotFuncBCIInfo *> : public DefaultDOTGraphTraits { |
299 | |
300 | DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} |
301 | |
302 | static std::string getGraphName(DotFuncBCIInfo *Info) { |
303 | return "BCI CFG for " + Info->getFunction().getName().str(); |
304 | } |
305 | |
306 | std::string getNodeLabel(const BasicBlock *Node, DotFuncBCIInfo *Info) { |
307 | return Node->getName().str(); |
308 | } |
309 | |
310 | std::string getEdgeAttributes(const BasicBlock *Src, const_succ_iterator I, |
311 | DotFuncBCIInfo *Info) { |
312 | const BasicBlock *Dest = *I; |
313 | if (Info->isDependent(Src, Dest)) |
314 | return "color=red" ; |
315 | if (Info->isDependent(Src: Dest, Dest: Src)) |
316 | return "color=blue" ; |
317 | return "" ; |
318 | } |
319 | |
320 | std::string getNodeAttributes(const BasicBlock *Node, DotFuncBCIInfo *Info) { |
321 | std::string Result; |
322 | if (Info->isInstrumented(BB: Node)) |
323 | Result += "style=filled,fillcolor=gray" ; |
324 | if (Info->isCovered(BB: Node)) |
325 | Result += std::string(Result.empty() ? "" : "," ) + "color=red" ; |
326 | return Result; |
327 | } |
328 | }; |
329 | |
330 | } // namespace llvm |
331 | |
332 | void BlockCoverageInference::viewBlockCoverageGraph( |
333 | const DenseMap<const BasicBlock *, bool> *Coverage) const { |
334 | DotFuncBCIInfo Info(this, Coverage); |
335 | WriteGraph(G: &Info, Name: "BCI" , ShortNames: false, |
336 | Title: "Block Coverage Inference for " + F.getName()); |
337 | } |
338 | |
339 | void BlockCoverageInference::dump(raw_ostream &OS) const { |
340 | OS << "Minimal block coverage for function \'" << F.getName() |
341 | << "\' (Instrumented=*)\n" ; |
342 | for (auto &BB : F) { |
343 | OS << (shouldInstrumentBlock(BB) ? "* " : " " ) << BB.getName() << "\n" ; |
344 | auto It = PredecessorDependencies.find(Val: &BB); |
345 | if (It != PredecessorDependencies.end() && It->second.size()) |
346 | OS << " PredDeps = " << getBlockNames(BBs: It->second) << "\n" ; |
347 | It = SuccessorDependencies.find(Val: &BB); |
348 | if (It != SuccessorDependencies.end() && It->second.size()) |
349 | OS << " SuccDeps = " << getBlockNames(BBs: It->second) << "\n" ; |
350 | } |
351 | OS << " Instrumented Blocks Hash = 0x" |
352 | << Twine::utohexstr(Val: getInstrumentedBlocksHash()) << "\n" ; |
353 | } |
354 | |
355 | std::string |
356 | BlockCoverageInference::getBlockNames(ArrayRef<const BasicBlock *> BBs) { |
357 | std::string Result; |
358 | raw_string_ostream OS(Result); |
359 | OS << "[" ; |
360 | if (!BBs.empty()) { |
361 | OS << BBs.front()->getName(); |
362 | BBs = BBs.drop_front(); |
363 | } |
364 | for (auto *BB : BBs) |
365 | OS << ", " << BB->getName(); |
366 | OS << "]" ; |
367 | return OS.str(); |
368 | } |
369 | |