1 | //===- FunctionPropertiesAnalysis.cpp - Function Properties Analysis ------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the FunctionPropertiesInfo and FunctionPropertiesAnalysis |
10 | // classes used to extract function properties. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/Analysis/FunctionPropertiesAnalysis.h" |
15 | #include "llvm/ADT/STLExtras.h" |
16 | #include "llvm/ADT/SetVector.h" |
17 | #include "llvm/Analysis/LoopInfo.h" |
18 | #include "llvm/IR/CFG.h" |
19 | #include "llvm/IR/Constants.h" |
20 | #include "llvm/IR/Dominators.h" |
21 | #include "llvm/IR/Instructions.h" |
22 | #include "llvm/IR/IntrinsicInst.h" |
23 | #include "llvm/Support/CommandLine.h" |
24 | #include "llvm/Support/Compiler.h" |
25 | #include <deque> |
26 | |
27 | using namespace llvm; |
28 | |
29 | namespace llvm { |
30 | LLVM_ABI cl::opt<bool> EnableDetailedFunctionProperties( |
31 | "enable-detailed-function-properties" , cl::Hidden, cl::init(Val: false), |
32 | cl::desc("Whether or not to compute detailed function properties." )); |
33 | |
34 | static cl::opt<unsigned> BigBasicBlockInstructionThreshold( |
35 | "big-basic-block-instruction-threshold" , cl::Hidden, cl::init(Val: 500), |
36 | cl::desc("The minimum number of instructions a basic block should contain " |
37 | "before being considered big." )); |
38 | |
39 | static cl::opt<unsigned> MediumBasicBlockInstructionThreshold( |
40 | "medium-basic-block-instruction-threshold" , cl::Hidden, cl::init(Val: 15), |
41 | cl::desc("The minimum number of instructions a basic block should contain " |
42 | "before being considered medium-sized." )); |
43 | } // namespace llvm |
44 | |
45 | static cl::opt<unsigned> CallWithManyArgumentsThreshold( |
46 | "call-with-many-arguments-threshold" , cl::Hidden, cl::init(Val: 4), |
47 | cl::desc("The minimum number of arguments a function call must have before " |
48 | "it is considered having many arguments." )); |
49 | |
50 | namespace { |
51 | int64_t getNumBlocksFromCond(const BasicBlock &BB) { |
52 | int64_t Ret = 0; |
53 | if (const auto *BI = dyn_cast<BranchInst>(Val: BB.getTerminator())) { |
54 | if (BI->isConditional()) |
55 | Ret += BI->getNumSuccessors(); |
56 | } else if (const auto *SI = dyn_cast<SwitchInst>(Val: BB.getTerminator())) { |
57 | Ret += (SI->getNumCases() + (nullptr != SI->getDefaultDest())); |
58 | } |
59 | return Ret; |
60 | } |
61 | |
62 | int64_t getUses(const Function &F) { |
63 | return ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses(); |
64 | } |
65 | } // namespace |
66 | |
67 | void FunctionPropertiesInfo::reIncludeBB(const BasicBlock &BB) { |
68 | updateForBB(BB, Direction: +1); |
69 | } |
70 | |
71 | void FunctionPropertiesInfo::updateForBB(const BasicBlock &BB, |
72 | int64_t Direction) { |
73 | assert(Direction == 1 || Direction == -1); |
74 | BasicBlockCount += Direction; |
75 | BlocksReachedFromConditionalInstruction += |
76 | (Direction * getNumBlocksFromCond(BB)); |
77 | for (const auto &I : BB) { |
78 | if (auto *CS = dyn_cast<CallBase>(Val: &I)) { |
79 | const auto *Callee = CS->getCalledFunction(); |
80 | if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration()) |
81 | DirectCallsToDefinedFunctions += Direction; |
82 | } |
83 | if (I.getOpcode() == Instruction::Load) { |
84 | LoadInstCount += Direction; |
85 | } else if (I.getOpcode() == Instruction::Store) { |
86 | StoreInstCount += Direction; |
87 | } |
88 | } |
89 | TotalInstructionCount += Direction * BB.sizeWithoutDebug(); |
90 | |
91 | if (EnableDetailedFunctionProperties) { |
92 | unsigned SuccessorCount = succ_size(BB: &BB); |
93 | if (SuccessorCount == 1) |
94 | BasicBlocksWithSingleSuccessor += Direction; |
95 | else if (SuccessorCount == 2) |
96 | BasicBlocksWithTwoSuccessors += Direction; |
97 | else if (SuccessorCount > 2) |
98 | BasicBlocksWithMoreThanTwoSuccessors += Direction; |
99 | |
100 | unsigned PredecessorCount = pred_size(BB: &BB); |
101 | if (PredecessorCount == 1) |
102 | BasicBlocksWithSinglePredecessor += Direction; |
103 | else if (PredecessorCount == 2) |
104 | BasicBlocksWithTwoPredecessors += Direction; |
105 | else if (PredecessorCount > 2) |
106 | BasicBlocksWithMoreThanTwoPredecessors += Direction; |
107 | |
108 | if (TotalInstructionCount > BigBasicBlockInstructionThreshold) |
109 | BigBasicBlocks += Direction; |
110 | else if (TotalInstructionCount > MediumBasicBlockInstructionThreshold) |
111 | MediumBasicBlocks += Direction; |
112 | else |
113 | SmallBasicBlocks += Direction; |
114 | |
115 | // Calculate critical edges by looking through all successors of a basic |
116 | // block that has multiple successors and finding ones that have multiple |
117 | // predecessors, which represent critical edges. |
118 | if (SuccessorCount > 1) { |
119 | for (const auto *Successor : successors(BB: &BB)) { |
120 | if (pred_size(BB: Successor) > 1) |
121 | CriticalEdgeCount += Direction; |
122 | } |
123 | } |
124 | |
125 | ControlFlowEdgeCount += Direction * SuccessorCount; |
126 | |
127 | if (const auto *BI = dyn_cast<BranchInst>(Val: BB.getTerminator())) { |
128 | if (!BI->isConditional()) |
129 | UnconditionalBranchCount += Direction; |
130 | } |
131 | |
132 | for (const Instruction &I : BB.instructionsWithoutDebug()) { |
133 | if (I.isCast()) |
134 | CastInstructionCount += Direction; |
135 | |
136 | if (I.getType()->isFloatTy()) |
137 | FloatingPointInstructionCount += Direction; |
138 | else if (I.getType()->isIntegerTy()) |
139 | IntegerInstructionCount += Direction; |
140 | |
141 | if (isa<IntrinsicInst>(Val: I)) |
142 | ++IntrinsicCount; |
143 | |
144 | if (const auto *Call = dyn_cast<CallInst>(Val: &I)) { |
145 | if (Call->isIndirectCall()) |
146 | IndirectCallCount += Direction; |
147 | else |
148 | DirectCallCount += Direction; |
149 | |
150 | if (Call->getType()->isIntegerTy()) |
151 | CallReturnsIntegerCount += Direction; |
152 | else if (Call->getType()->isFloatingPointTy()) |
153 | CallReturnsFloatCount += Direction; |
154 | else if (Call->getType()->isPointerTy()) |
155 | CallReturnsPointerCount += Direction; |
156 | else if (Call->getType()->isVectorTy()) { |
157 | if (Call->getType()->getScalarType()->isIntegerTy()) |
158 | CallReturnsVectorIntCount += Direction; |
159 | else if (Call->getType()->getScalarType()->isFloatingPointTy()) |
160 | CallReturnsVectorFloatCount += Direction; |
161 | else if (Call->getType()->getScalarType()->isPointerTy()) |
162 | CallReturnsVectorPointerCount += Direction; |
163 | } |
164 | |
165 | if (Call->arg_size() > CallWithManyArgumentsThreshold) |
166 | CallWithManyArgumentsCount += Direction; |
167 | |
168 | for (const auto &Arg : Call->args()) { |
169 | if (Arg->getType()->isPointerTy()) { |
170 | CallWithPointerArgumentCount += Direction; |
171 | break; |
172 | } |
173 | } |
174 | } |
175 | |
176 | #define COUNT_OPERAND(OPTYPE) \ |
177 | if (isa<OPTYPE>(Operand)) { \ |
178 | OPTYPE##OperandCount += Direction; \ |
179 | continue; \ |
180 | } |
181 | |
182 | for (unsigned int OperandIndex = 0; OperandIndex < I.getNumOperands(); |
183 | ++OperandIndex) { |
184 | Value *Operand = I.getOperand(i: OperandIndex); |
185 | COUNT_OPERAND(GlobalValue) |
186 | COUNT_OPERAND(ConstantInt) |
187 | COUNT_OPERAND(ConstantFP) |
188 | COUNT_OPERAND(Constant) |
189 | COUNT_OPERAND(Instruction) |
190 | COUNT_OPERAND(BasicBlock) |
191 | COUNT_OPERAND(InlineAsm) |
192 | COUNT_OPERAND(Argument) |
193 | |
194 | // We only get to this point if we haven't matched any of the other |
195 | // operand types. |
196 | UnknownOperandCount += Direction; |
197 | } |
198 | |
199 | #undef CHECK_OPERAND |
200 | } |
201 | } |
202 | |
203 | if (IR2VecVocab) { |
204 | // We instantiate the IR2Vec embedder each time, as having an unique |
205 | // pointer to the embedder as member of the class would make it |
206 | // non-copyable. Instantiating the embedder in itself is not costly. |
207 | auto Embedder = ir2vec::Embedder::create(Mode: IR2VecKind::Symbolic, |
208 | F: *BB.getParent(), Vocabulary: *IR2VecVocab); |
209 | if (!Embedder) { |
210 | BB.getContext().emitError(ErrorStr: "Error creating IR2Vec embeddings" ); |
211 | return; |
212 | } |
213 | const auto &BBEmbedding = Embedder->getBBVector(BB); |
214 | // Subtract BBEmbedding from Function embedding if the direction is -1, |
215 | // and add it if the direction is +1. |
216 | if (Direction == -1) |
217 | FunctionEmbedding -= BBEmbedding; |
218 | else |
219 | FunctionEmbedding += BBEmbedding; |
220 | } |
221 | } |
222 | |
223 | void FunctionPropertiesInfo::updateAggregateStats(const Function &F, |
224 | const LoopInfo &LI) { |
225 | |
226 | Uses = getUses(F); |
227 | TopLevelLoopCount = llvm::size(Range: LI); |
228 | MaxLoopDepth = 0; |
229 | std::deque<const Loop *> Worklist; |
230 | llvm::append_range(C&: Worklist, R: LI); |
231 | while (!Worklist.empty()) { |
232 | const auto *L = Worklist.front(); |
233 | MaxLoopDepth = |
234 | std::max(a: MaxLoopDepth, b: static_cast<int64_t>(L->getLoopDepth())); |
235 | Worklist.pop_front(); |
236 | llvm::append_range(C&: Worklist, R: L->getSubLoops()); |
237 | } |
238 | } |
239 | |
240 | FunctionPropertiesInfo FunctionPropertiesInfo::getFunctionPropertiesInfo( |
241 | Function &F, FunctionAnalysisManager &FAM) { |
242 | // We use the cached result of the IR2VecVocabAnalysis run by |
243 | // InlineAdvisorAnalysis. If the IR2VecVocabAnalysis is not run, we don't |
244 | // use IR2Vec embeddings. |
245 | auto VocabResult = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F) |
246 | .getCachedResult<IR2VecVocabAnalysis>(IR&: *F.getParent()); |
247 | return getFunctionPropertiesInfo(F, DT: FAM.getResult<DominatorTreeAnalysis>(IR&: F), |
248 | LI: FAM.getResult<LoopAnalysis>(IR&: F), VocabResult); |
249 | } |
250 | |
251 | FunctionPropertiesInfo FunctionPropertiesInfo::getFunctionPropertiesInfo( |
252 | const Function &F, const DominatorTree &DT, const LoopInfo &LI, |
253 | const IR2VecVocabResult *VocabResult) { |
254 | |
255 | FunctionPropertiesInfo FPI; |
256 | if (VocabResult && VocabResult->isValid()) { |
257 | FPI.IR2VecVocab = VocabResult->getVocabulary(); |
258 | FPI.FunctionEmbedding = ir2vec::Embedding(VocabResult->getDimension(), 0.0); |
259 | } |
260 | for (const auto &BB : F) |
261 | if (DT.isReachableFromEntry(A: &BB)) |
262 | FPI.reIncludeBB(BB); |
263 | FPI.updateAggregateStats(F, LI); |
264 | return FPI; |
265 | } |
266 | |
267 | bool FunctionPropertiesInfo::operator==( |
268 | const FunctionPropertiesInfo &FPI) const { |
269 | if (BasicBlockCount != FPI.BasicBlockCount || |
270 | BlocksReachedFromConditionalInstruction != |
271 | FPI.BlocksReachedFromConditionalInstruction || |
272 | Uses != FPI.Uses || |
273 | DirectCallsToDefinedFunctions != FPI.DirectCallsToDefinedFunctions || |
274 | LoadInstCount != FPI.LoadInstCount || |
275 | StoreInstCount != FPI.StoreInstCount || |
276 | MaxLoopDepth != FPI.MaxLoopDepth || |
277 | TopLevelLoopCount != FPI.TopLevelLoopCount || |
278 | TotalInstructionCount != FPI.TotalInstructionCount || |
279 | BasicBlocksWithSingleSuccessor != FPI.BasicBlocksWithSingleSuccessor || |
280 | BasicBlocksWithTwoSuccessors != FPI.BasicBlocksWithTwoSuccessors || |
281 | BasicBlocksWithMoreThanTwoSuccessors != |
282 | FPI.BasicBlocksWithMoreThanTwoSuccessors || |
283 | BasicBlocksWithSinglePredecessor != |
284 | FPI.BasicBlocksWithSinglePredecessor || |
285 | BasicBlocksWithTwoPredecessors != FPI.BasicBlocksWithTwoPredecessors || |
286 | BasicBlocksWithMoreThanTwoPredecessors != |
287 | FPI.BasicBlocksWithMoreThanTwoPredecessors || |
288 | BigBasicBlocks != FPI.BigBasicBlocks || |
289 | MediumBasicBlocks != FPI.MediumBasicBlocks || |
290 | SmallBasicBlocks != FPI.SmallBasicBlocks || |
291 | CastInstructionCount != FPI.CastInstructionCount || |
292 | FloatingPointInstructionCount != FPI.FloatingPointInstructionCount || |
293 | IntegerInstructionCount != FPI.IntegerInstructionCount || |
294 | ConstantIntOperandCount != FPI.ConstantIntOperandCount || |
295 | ConstantFPOperandCount != FPI.ConstantFPOperandCount || |
296 | ConstantOperandCount != FPI.ConstantOperandCount || |
297 | InstructionOperandCount != FPI.InstructionOperandCount || |
298 | BasicBlockOperandCount != FPI.BasicBlockOperandCount || |
299 | GlobalValueOperandCount != FPI.GlobalValueOperandCount || |
300 | InlineAsmOperandCount != FPI.InlineAsmOperandCount || |
301 | ArgumentOperandCount != FPI.ArgumentOperandCount || |
302 | UnknownOperandCount != FPI.UnknownOperandCount || |
303 | CriticalEdgeCount != FPI.CriticalEdgeCount || |
304 | ControlFlowEdgeCount != FPI.ControlFlowEdgeCount || |
305 | UnconditionalBranchCount != FPI.UnconditionalBranchCount || |
306 | IntrinsicCount != FPI.IntrinsicCount || |
307 | DirectCallCount != FPI.DirectCallCount || |
308 | IndirectCallCount != FPI.IndirectCallCount || |
309 | CallReturnsIntegerCount != FPI.CallReturnsIntegerCount || |
310 | CallReturnsFloatCount != FPI.CallReturnsFloatCount || |
311 | CallReturnsPointerCount != FPI.CallReturnsPointerCount || |
312 | CallReturnsVectorIntCount != FPI.CallReturnsVectorIntCount || |
313 | CallReturnsVectorFloatCount != FPI.CallReturnsVectorFloatCount || |
314 | CallReturnsVectorPointerCount != FPI.CallReturnsVectorPointerCount || |
315 | CallWithManyArgumentsCount != FPI.CallWithManyArgumentsCount || |
316 | CallWithPointerArgumentCount != FPI.CallWithPointerArgumentCount) { |
317 | return false; |
318 | } |
319 | // Check the equality of the function embeddings. We don't check the equality |
320 | // of Vocabulary as it remains the same. |
321 | if (!FunctionEmbedding.approximatelyEquals(RHS: FPI.FunctionEmbedding)) |
322 | return false; |
323 | |
324 | return true; |
325 | } |
326 | |
327 | void FunctionPropertiesInfo::print(raw_ostream &OS) const { |
328 | #define PRINT_PROPERTY(PROP_NAME) OS << #PROP_NAME ": " << PROP_NAME << "\n"; |
329 | |
330 | PRINT_PROPERTY(BasicBlockCount) |
331 | PRINT_PROPERTY(BlocksReachedFromConditionalInstruction) |
332 | PRINT_PROPERTY(Uses) |
333 | PRINT_PROPERTY(DirectCallsToDefinedFunctions) |
334 | PRINT_PROPERTY(LoadInstCount) |
335 | PRINT_PROPERTY(StoreInstCount) |
336 | PRINT_PROPERTY(MaxLoopDepth) |
337 | PRINT_PROPERTY(TopLevelLoopCount) |
338 | PRINT_PROPERTY(TotalInstructionCount) |
339 | |
340 | if (EnableDetailedFunctionProperties) { |
341 | PRINT_PROPERTY(BasicBlocksWithSingleSuccessor) |
342 | PRINT_PROPERTY(BasicBlocksWithTwoSuccessors) |
343 | PRINT_PROPERTY(BasicBlocksWithMoreThanTwoSuccessors) |
344 | PRINT_PROPERTY(BasicBlocksWithSinglePredecessor) |
345 | PRINT_PROPERTY(BasicBlocksWithTwoPredecessors) |
346 | PRINT_PROPERTY(BasicBlocksWithMoreThanTwoPredecessors) |
347 | PRINT_PROPERTY(BigBasicBlocks) |
348 | PRINT_PROPERTY(MediumBasicBlocks) |
349 | PRINT_PROPERTY(SmallBasicBlocks) |
350 | PRINT_PROPERTY(CastInstructionCount) |
351 | PRINT_PROPERTY(FloatingPointInstructionCount) |
352 | PRINT_PROPERTY(IntegerInstructionCount) |
353 | PRINT_PROPERTY(ConstantIntOperandCount) |
354 | PRINT_PROPERTY(ConstantFPOperandCount) |
355 | PRINT_PROPERTY(ConstantOperandCount) |
356 | PRINT_PROPERTY(InstructionOperandCount) |
357 | PRINT_PROPERTY(BasicBlockOperandCount) |
358 | PRINT_PROPERTY(GlobalValueOperandCount) |
359 | PRINT_PROPERTY(InlineAsmOperandCount) |
360 | PRINT_PROPERTY(ArgumentOperandCount) |
361 | PRINT_PROPERTY(UnknownOperandCount) |
362 | PRINT_PROPERTY(CriticalEdgeCount) |
363 | PRINT_PROPERTY(ControlFlowEdgeCount) |
364 | PRINT_PROPERTY(UnconditionalBranchCount) |
365 | PRINT_PROPERTY(IntrinsicCount) |
366 | PRINT_PROPERTY(DirectCallCount) |
367 | PRINT_PROPERTY(IndirectCallCount) |
368 | PRINT_PROPERTY(CallReturnsIntegerCount) |
369 | PRINT_PROPERTY(CallReturnsFloatCount) |
370 | PRINT_PROPERTY(CallReturnsPointerCount) |
371 | PRINT_PROPERTY(CallReturnsVectorIntCount) |
372 | PRINT_PROPERTY(CallReturnsVectorFloatCount) |
373 | PRINT_PROPERTY(CallReturnsVectorPointerCount) |
374 | PRINT_PROPERTY(CallWithManyArgumentsCount) |
375 | PRINT_PROPERTY(CallWithPointerArgumentCount) |
376 | } |
377 | |
378 | #undef PRINT_PROPERTY |
379 | |
380 | OS << "\n" ; |
381 | } |
382 | |
383 | AnalysisKey FunctionPropertiesAnalysis::Key; |
384 | |
385 | FunctionPropertiesInfo |
386 | FunctionPropertiesAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { |
387 | return FunctionPropertiesInfo::getFunctionPropertiesInfo(F, FAM); |
388 | } |
389 | |
390 | PreservedAnalyses |
391 | FunctionPropertiesPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { |
392 | OS << "Printing analysis results of CFA for function " |
393 | << "'" << F.getName() << "':" |
394 | << "\n" ; |
395 | AM.getResult<FunctionPropertiesAnalysis>(IR&: F).print(OS); |
396 | return PreservedAnalyses::all(); |
397 | } |
398 | |
399 | FunctionPropertiesUpdater::FunctionPropertiesUpdater( |
400 | FunctionPropertiesInfo &FPI, CallBase &CB) |
401 | : FPI(FPI), CallSiteBB(*CB.getParent()), Caller(*CallSiteBB.getParent()) { |
402 | assert(isa<CallInst>(CB) || isa<InvokeInst>(CB)); |
403 | // For BBs that are likely to change, we subtract from feature totals their |
404 | // contribution. Some features, like max loop counts or depths, are left |
405 | // invalid, as they will be updated post-inlining. |
406 | SmallPtrSet<const BasicBlock *, 4> LikelyToChangeBBs; |
407 | // The CB BB will change - it'll either be split or the callee's body (single |
408 | // BB) will be pasted in. |
409 | LikelyToChangeBBs.insert(Ptr: &CallSiteBB); |
410 | |
411 | // The caller's entry BB may change due to new alloca instructions. |
412 | LikelyToChangeBBs.insert(Ptr: &*Caller.begin()); |
413 | |
414 | // The users of the value returned by call instruction can change |
415 | // leading to the change in embeddings being computed, when used. |
416 | // We conservatively add the BBs with such uses to LikelyToChangeBBs. |
417 | for (const auto *User : CB.users()) |
418 | CallUsers.insert(V: dyn_cast<Instruction>(Val: User)->getParent()); |
419 | // CallSiteBB can be removed from CallUsers if present, it's taken care |
420 | // separately. |
421 | CallUsers.erase(V: &CallSiteBB); |
422 | LikelyToChangeBBs.insert_range(R&: CallUsers); |
423 | |
424 | // The successors may become unreachable in the case of `invoke` inlining. |
425 | // We track successors separately, too, because they form a boundary, together |
426 | // with the CB BB ('Entry') between which the inlined callee will be pasted. |
427 | Successors.insert_range(R: successors(BB: &CallSiteBB)); |
428 | |
429 | // the outcome of the inlining may be that some edges get lost (DCEd BBs |
430 | // because inlining brought some constant, for example). We don't know which |
431 | // edges will be removed, so we list all of them as potentially removable. |
432 | // Some BBs have (at this point) duplicate edges. Remove duplicates, otherwise |
433 | // the DT updater will not apply changes correctly. |
434 | DenseSet<const BasicBlock *> Inserted; |
435 | for (auto *Succ : successors(BB: &CallSiteBB)) |
436 | if (Inserted.insert(V: Succ).second) |
437 | DomTreeUpdates.emplace_back(Args: DominatorTree::UpdateKind::Delete, |
438 | Args: const_cast<BasicBlock *>(&CallSiteBB), |
439 | Args: const_cast<BasicBlock *>(Succ)); |
440 | // Reuse Inserted (which has some allocated capacity at this point) below, if |
441 | // we have an invoke. |
442 | Inserted.clear(); |
443 | // Inlining only handles invoke and calls. If this is an invoke, and inlining |
444 | // it pulls another invoke, the original landing pad may get split, so as to |
445 | // share its content with other potential users. So the edge up to which we |
446 | // need to invalidate and then re-account BB data is the successors of the |
447 | // current landing pad. We can leave the current lp, too - if it doesn't get |
448 | // split, then it will be the place traversal stops. Either way, the |
449 | // discounted BBs will be checked if reachable and re-added. |
450 | if (const auto *II = dyn_cast<InvokeInst>(Val: &CB)) { |
451 | const auto *UnwindDest = II->getUnwindDest(); |
452 | Successors.insert_range(R: successors(BB: UnwindDest)); |
453 | // Same idea as above, we pretend we lose all these edges. |
454 | for (auto *Succ : successors(BB: UnwindDest)) |
455 | if (Inserted.insert(V: Succ).second) |
456 | DomTreeUpdates.emplace_back(Args: DominatorTree::UpdateKind::Delete, |
457 | Args: const_cast<BasicBlock *>(UnwindDest), |
458 | Args: const_cast<BasicBlock *>(Succ)); |
459 | } |
460 | |
461 | // Exclude the CallSiteBB, if it happens to be its own successor (1-BB loop). |
462 | // We are only interested in BBs the graph moves past the callsite BB to |
463 | // define the frontier past which we don't want to re-process BBs. Including |
464 | // the callsite BB in this case would prematurely stop the traversal in |
465 | // finish(). |
466 | Successors.erase(V: &CallSiteBB); |
467 | |
468 | LikelyToChangeBBs.insert_range(R&: Successors); |
469 | |
470 | // Commit the change. While some of the BBs accounted for above may play dual |
471 | // role - e.g. caller's entry BB may be the same as the callsite BB - set |
472 | // insertion semantics make sure we account them once. This needs to be |
473 | // followed in `finish`, too. |
474 | for (const auto *BB : LikelyToChangeBBs) |
475 | FPI.updateForBB(BB: *BB, Direction: -1); |
476 | } |
477 | |
478 | DominatorTree &FunctionPropertiesUpdater::getUpdatedDominatorTree( |
479 | FunctionAnalysisManager &FAM) const { |
480 | auto &DT = |
481 | FAM.getResult<DominatorTreeAnalysis>(IR&: const_cast<Function &>(Caller)); |
482 | |
483 | SmallVector<DominatorTree::UpdateType, 2> FinalDomTreeUpdates; |
484 | |
485 | DenseSet<const BasicBlock *> Inserted; |
486 | for (auto *Succ : successors(BB: &CallSiteBB)) |
487 | if (Inserted.insert(V: Succ).second) |
488 | FinalDomTreeUpdates.push_back(Elt: {DominatorTree::UpdateKind::Insert, |
489 | const_cast<BasicBlock *>(&CallSiteBB), |
490 | const_cast<BasicBlock *>(Succ)}); |
491 | |
492 | // Perform the deletes last, so that any new nodes connected to nodes |
493 | // participating in the edge deletion are known to the DT. |
494 | for (auto &Upd : DomTreeUpdates) |
495 | if (!llvm::is_contained(Range: successors(BB: Upd.getFrom()), Element: Upd.getTo())) |
496 | FinalDomTreeUpdates.push_back(Elt: Upd); |
497 | |
498 | DT.applyUpdates(Updates: FinalDomTreeUpdates); |
499 | #ifdef EXPENSIVE_CHECKS |
500 | assert(DT.verify(DominatorTree::VerificationLevel::Full)); |
501 | #endif |
502 | return DT; |
503 | } |
504 | |
505 | void FunctionPropertiesUpdater::finish(FunctionAnalysisManager &FAM) const { |
506 | // Update feature values from the BBs that were copied from the callee, or |
507 | // might have been modified because of inlining. The latter have been |
508 | // subtracted in the FunctionPropertiesUpdater ctor. |
509 | // There could be successors that were reached before but now are only |
510 | // reachable from elsewhere in the CFG. |
511 | // One example is the following diamond CFG (lines are arrows pointing down): |
512 | // A |
513 | // / \ |
514 | // B C |
515 | // | | |
516 | // | D |
517 | // | | |
518 | // | E |
519 | // \ / |
520 | // F |
521 | // There's a call site in C that is inlined. Upon doing that, it turns out |
522 | // it expands to |
523 | // call void @llvm.trap() |
524 | // unreachable |
525 | // F isn't reachable from C anymore, but we did discount it when we set up |
526 | // FunctionPropertiesUpdater, so we need to re-include it here. |
527 | // At the same time, D and E were reachable before, but now are not anymore, |
528 | // so we need to leave D out (we discounted it at setup), and explicitly |
529 | // remove E. |
530 | SetVector<const BasicBlock *> Reinclude; |
531 | SetVector<const BasicBlock *> Unreachable; |
532 | auto &DT = getUpdatedDominatorTree(FAM); |
533 | |
534 | if (&CallSiteBB != &*Caller.begin()) |
535 | Reinclude.insert(X: &*Caller.begin()); |
536 | |
537 | // Reinclude the BBs which use the values returned by call instruction |
538 | Reinclude.insert_range(R: CallUsers); |
539 | |
540 | // Distribute the successors to the 2 buckets. |
541 | for (const auto *Succ : Successors) |
542 | if (DT.isReachableFromEntry(A: Succ)) |
543 | Reinclude.insert(X: Succ); |
544 | else |
545 | Unreachable.insert(X: Succ); |
546 | |
547 | // For reinclusion, we want to stop at the reachable successors, who are at |
548 | // the beginning of the worklist; but, starting from the callsite bb and |
549 | // ending at those successors, we also want to perform a traversal. |
550 | // IncludeSuccessorsMark is the index after which we include successors. |
551 | const auto IncludeSuccessorsMark = Reinclude.size(); |
552 | bool CSInsertion = Reinclude.insert(X: &CallSiteBB); |
553 | (void)CSInsertion; |
554 | assert(CSInsertion); |
555 | for (size_t I = 0; I < Reinclude.size(); ++I) { |
556 | const auto *BB = Reinclude[I]; |
557 | FPI.reIncludeBB(BB: *BB); |
558 | if (I >= IncludeSuccessorsMark) |
559 | Reinclude.insert_range(R: successors(BB)); |
560 | } |
561 | |
562 | // For exclusion, we don't need to exclude the set of BBs that were successors |
563 | // before and are now unreachable, because we already did that at setup. For |
564 | // the rest, as long as a successor is unreachable, we want to explicitly |
565 | // exclude it. |
566 | const auto AlreadyExcludedMark = Unreachable.size(); |
567 | for (size_t I = 0; I < Unreachable.size(); ++I) { |
568 | const auto *U = Unreachable[I]; |
569 | if (I >= AlreadyExcludedMark) |
570 | FPI.updateForBB(BB: *U, Direction: -1); |
571 | for (const auto *Succ : successors(BB: U)) |
572 | if (!DT.isReachableFromEntry(A: Succ)) |
573 | Unreachable.insert(X: Succ); |
574 | } |
575 | |
576 | const auto &LI = FAM.getResult<LoopAnalysis>(IR&: const_cast<Function &>(Caller)); |
577 | FPI.updateAggregateStats(F: Caller, LI); |
578 | #ifdef EXPENSIVE_CHECKS |
579 | assert(isUpdateValid(Caller, FPI, FAM)); |
580 | #endif |
581 | } |
582 | |
583 | bool FunctionPropertiesUpdater::isUpdateValid(Function &F, |
584 | const FunctionPropertiesInfo &FPI, |
585 | FunctionAnalysisManager &FAM) { |
586 | if (!FAM.getResult<DominatorTreeAnalysis>(IR&: F).verify( |
587 | VL: DominatorTree::VerificationLevel::Full)) |
588 | return false; |
589 | DominatorTree DT(F); |
590 | LoopInfo LI(DT); |
591 | auto VocabResult = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F) |
592 | .getCachedResult<IR2VecVocabAnalysis>(IR&: *F.getParent()); |
593 | auto Fresh = |
594 | FunctionPropertiesInfo::getFunctionPropertiesInfo(F, DT, LI, VocabResult); |
595 | return FPI == Fresh; |
596 | } |
597 | |