| 1 | //===- FunctionPropertiesAnalysis.cpp - Function Properties Analysis ------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines the FunctionPropertiesInfo and FunctionPropertiesAnalysis |
| 10 | // classes used to extract function properties. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "llvm/Analysis/FunctionPropertiesAnalysis.h" |
| 15 | #include "llvm/ADT/STLExtras.h" |
| 16 | #include "llvm/ADT/SetVector.h" |
| 17 | #include "llvm/ADT/Statistic.h" |
| 18 | #include "llvm/Analysis/LoopInfo.h" |
| 19 | #include "llvm/IR/CFG.h" |
| 20 | #include "llvm/IR/Constants.h" |
| 21 | #include "llvm/IR/Dominators.h" |
| 22 | #include "llvm/IR/Instructions.h" |
| 23 | #include "llvm/IR/IntrinsicInst.h" |
| 24 | #include "llvm/Support/CommandLine.h" |
| 25 | #include "llvm/Support/Compiler.h" |
| 26 | #include <deque> |
| 27 | |
| 28 | using namespace llvm; |
| 29 | |
| 30 | #define DEBUG_TYPE "func-properties-stats" |
| 31 | |
| 32 | #define FUNCTION_PROPERTY(Name, Description) STATISTIC(Num##Name, Description); |
| 33 | #define DETAILED_FUNCTION_PROPERTY(Name, Description) \ |
| 34 | STATISTIC(Num##Name, Description); |
| 35 | #include "llvm/IR/FunctionProperties.def" |
| 36 | |
| 37 | namespace llvm { |
| 38 | LLVM_ABI cl::opt<bool> EnableDetailedFunctionProperties( |
| 39 | "enable-detailed-function-properties" , cl::Hidden, cl::init(Val: false), |
| 40 | cl::desc("Whether or not to compute detailed function properties." )); |
| 41 | |
| 42 | static cl::opt<unsigned> BigBasicBlockInstructionThreshold( |
| 43 | "big-basic-block-instruction-threshold" , cl::Hidden, cl::init(Val: 500), |
| 44 | cl::desc("The minimum number of instructions a basic block should contain " |
| 45 | "before being considered big." )); |
| 46 | |
| 47 | static cl::opt<unsigned> MediumBasicBlockInstructionThreshold( |
| 48 | "medium-basic-block-instruction-threshold" , cl::Hidden, cl::init(Val: 15), |
| 49 | cl::desc("The minimum number of instructions a basic block should contain " |
| 50 | "before being considered medium-sized." )); |
| 51 | } // namespace llvm |
| 52 | |
| 53 | static cl::opt<unsigned> CallWithManyArgumentsThreshold( |
| 54 | "call-with-many-arguments-threshold" , cl::Hidden, cl::init(Val: 4), |
| 55 | cl::desc("The minimum number of arguments a function call must have before " |
| 56 | "it is considered having many arguments." )); |
| 57 | |
| 58 | namespace { |
| 59 | int64_t getNumBlocksFromCond(const BasicBlock &BB) { |
| 60 | int64_t Ret = 0; |
| 61 | if (const auto *BI = dyn_cast<BranchInst>(Val: BB.getTerminator())) { |
| 62 | if (BI->isConditional()) |
| 63 | Ret += BI->getNumSuccessors(); |
| 64 | } else if (const auto *SI = dyn_cast<SwitchInst>(Val: BB.getTerminator())) { |
| 65 | Ret += (SI->getNumCases() + (nullptr != SI->getDefaultDest())); |
| 66 | } |
| 67 | return Ret; |
| 68 | } |
| 69 | |
| 70 | int64_t getUses(const Function &F) { |
| 71 | return ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses(); |
| 72 | } |
| 73 | } // namespace |
| 74 | |
| 75 | void FunctionPropertiesInfo::reIncludeBB(const BasicBlock &BB) { |
| 76 | updateForBB(BB, Direction: +1); |
| 77 | } |
| 78 | |
| 79 | void FunctionPropertiesInfo::updateForBB(const BasicBlock &BB, |
| 80 | int64_t Direction) { |
| 81 | assert(Direction == 1 || Direction == -1); |
| 82 | BasicBlockCount += Direction; |
| 83 | BlocksReachedFromConditionalInstruction += |
| 84 | (Direction * getNumBlocksFromCond(BB)); |
| 85 | for (const auto &I : BB) { |
| 86 | if (auto *CS = dyn_cast<CallBase>(Val: &I)) { |
| 87 | const auto *Callee = CS->getCalledFunction(); |
| 88 | if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration()) |
| 89 | DirectCallsToDefinedFunctions += Direction; |
| 90 | } |
| 91 | if (I.getOpcode() == Instruction::Load) { |
| 92 | LoadInstCount += Direction; |
| 93 | } else if (I.getOpcode() == Instruction::Store) { |
| 94 | StoreInstCount += Direction; |
| 95 | } |
| 96 | } |
| 97 | TotalInstructionCount += Direction * BB.sizeWithoutDebug(); |
| 98 | |
| 99 | if (EnableDetailedFunctionProperties) { |
| 100 | unsigned SuccessorCount = succ_size(BB: &BB); |
| 101 | if (SuccessorCount == 1) |
| 102 | BasicBlocksWithSingleSuccessor += Direction; |
| 103 | else if (SuccessorCount == 2) |
| 104 | BasicBlocksWithTwoSuccessors += Direction; |
| 105 | else if (SuccessorCount > 2) |
| 106 | BasicBlocksWithMoreThanTwoSuccessors += Direction; |
| 107 | |
| 108 | unsigned PredecessorCount = pred_size(BB: &BB); |
| 109 | if (PredecessorCount == 1) |
| 110 | BasicBlocksWithSinglePredecessor += Direction; |
| 111 | else if (PredecessorCount == 2) |
| 112 | BasicBlocksWithTwoPredecessors += Direction; |
| 113 | else if (PredecessorCount > 2) |
| 114 | BasicBlocksWithMoreThanTwoPredecessors += Direction; |
| 115 | |
| 116 | if (TotalInstructionCount > BigBasicBlockInstructionThreshold) |
| 117 | BigBasicBlocks += Direction; |
| 118 | else if (TotalInstructionCount > MediumBasicBlockInstructionThreshold) |
| 119 | MediumBasicBlocks += Direction; |
| 120 | else |
| 121 | SmallBasicBlocks += Direction; |
| 122 | |
| 123 | // Calculate critical edges by looking through all successors of a basic |
| 124 | // block that has multiple successors and finding ones that have multiple |
| 125 | // predecessors, which represent critical edges. |
| 126 | if (SuccessorCount > 1) { |
| 127 | for (const auto *Successor : successors(BB: &BB)) { |
| 128 | if (pred_size(BB: Successor) > 1) |
| 129 | CriticalEdgeCount += Direction; |
| 130 | } |
| 131 | } |
| 132 | |
| 133 | ControlFlowEdgeCount += Direction * SuccessorCount; |
| 134 | |
| 135 | const Instruction *TI = BB.getTerminator(); |
| 136 | const int64_t InstructionSuccessorCount = TI->getNumSuccessors(); |
| 137 | if (isa<BranchInst>(Val: TI)) { |
| 138 | BranchInstructionCount += Direction; |
| 139 | BranchSuccessorCount += Direction * InstructionSuccessorCount; |
| 140 | const auto *BI = dyn_cast<BranchInst>(Val: TI); |
| 141 | if (BI->isConditional()) |
| 142 | ConditionalBranchCount += Direction; |
| 143 | else |
| 144 | UnconditionalBranchCount += Direction; |
| 145 | } else if (isa<SwitchInst>(Val: TI)) { |
| 146 | SwitchInstructionCount += Direction; |
| 147 | SwitchSuccessorCount += Direction * InstructionSuccessorCount; |
| 148 | } |
| 149 | |
| 150 | for (const Instruction &I : BB.instructionsWithoutDebug()) { |
| 151 | if (I.isCast()) |
| 152 | CastInstructionCount += Direction; |
| 153 | |
| 154 | if (I.getType()->isFloatTy()) |
| 155 | FloatingPointInstructionCount += Direction; |
| 156 | else if (I.getType()->isIntegerTy()) |
| 157 | IntegerInstructionCount += Direction; |
| 158 | |
| 159 | if (isa<IntrinsicInst>(Val: I)) |
| 160 | ++IntrinsicCount; |
| 161 | |
| 162 | if (const auto *Call = dyn_cast<CallInst>(Val: &I)) { |
| 163 | if (Call->isIndirectCall()) |
| 164 | IndirectCallCount += Direction; |
| 165 | else |
| 166 | DirectCallCount += Direction; |
| 167 | |
| 168 | if (Call->getType()->isIntegerTy()) |
| 169 | CallReturnsIntegerCount += Direction; |
| 170 | else if (Call->getType()->isFloatingPointTy()) |
| 171 | CallReturnsFloatCount += Direction; |
| 172 | else if (Call->getType()->isPointerTy()) |
| 173 | CallReturnsPointerCount += Direction; |
| 174 | else if (Call->getType()->isVectorTy()) { |
| 175 | if (Call->getType()->getScalarType()->isIntegerTy()) |
| 176 | CallReturnsVectorIntCount += Direction; |
| 177 | else if (Call->getType()->getScalarType()->isFloatingPointTy()) |
| 178 | CallReturnsVectorFloatCount += Direction; |
| 179 | else if (Call->getType()->getScalarType()->isPointerTy()) |
| 180 | CallReturnsVectorPointerCount += Direction; |
| 181 | } |
| 182 | |
| 183 | if (Call->arg_size() > CallWithManyArgumentsThreshold) |
| 184 | CallWithManyArgumentsCount += Direction; |
| 185 | |
| 186 | for (const auto &Arg : Call->args()) { |
| 187 | if (Arg->getType()->isPointerTy()) { |
| 188 | CallWithPointerArgumentCount += Direction; |
| 189 | break; |
| 190 | } |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | #define COUNT_OPERAND(OPTYPE) \ |
| 195 | if (isa<OPTYPE>(Operand)) { \ |
| 196 | OPTYPE##OperandCount += Direction; \ |
| 197 | continue; \ |
| 198 | } |
| 199 | |
| 200 | for (unsigned int OperandIndex = 0; OperandIndex < I.getNumOperands(); |
| 201 | ++OperandIndex) { |
| 202 | Value *Operand = I.getOperand(i: OperandIndex); |
| 203 | COUNT_OPERAND(GlobalValue) |
| 204 | COUNT_OPERAND(ConstantInt) |
| 205 | COUNT_OPERAND(ConstantFP) |
| 206 | COUNT_OPERAND(Constant) |
| 207 | COUNT_OPERAND(Instruction) |
| 208 | COUNT_OPERAND(BasicBlock) |
| 209 | COUNT_OPERAND(InlineAsm) |
| 210 | COUNT_OPERAND(Argument) |
| 211 | |
| 212 | // We only get to this point if we haven't matched any of the other |
| 213 | // operand types. |
| 214 | UnknownOperandCount += Direction; |
| 215 | } |
| 216 | |
| 217 | #undef CHECK_OPERAND |
| 218 | } |
| 219 | } |
| 220 | |
| 221 | if (IR2VecVocab) { |
| 222 | // We instantiate the IR2Vec embedder each time, as having an unique |
| 223 | // pointer to the embedder as member of the class would make it |
| 224 | // non-copyable. Instantiating the embedder in itself is not costly. |
| 225 | auto Embedder = ir2vec::Embedder::create(Mode: IR2VecKind::Symbolic, |
| 226 | F: *BB.getParent(), Vocab: *IR2VecVocab); |
| 227 | if (!Embedder) { |
| 228 | BB.getContext().emitError(ErrorStr: "Error creating IR2Vec embeddings" ); |
| 229 | return; |
| 230 | } |
| 231 | const auto &BBEmbedding = Embedder->getBBVector(BB); |
| 232 | // Subtract BBEmbedding from Function embedding if the direction is -1, |
| 233 | // and add it if the direction is +1. |
| 234 | if (Direction == -1) |
| 235 | FunctionEmbedding -= BBEmbedding; |
| 236 | else |
| 237 | FunctionEmbedding += BBEmbedding; |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | void FunctionPropertiesInfo::updateAggregateStats(const Function &F, |
| 242 | const LoopInfo &LI) { |
| 243 | |
| 244 | Uses = getUses(F); |
| 245 | TopLevelLoopCount = llvm::size(Range: LI); |
| 246 | MaxLoopDepth = 0; |
| 247 | std::deque<const Loop *> Worklist; |
| 248 | llvm::append_range(C&: Worklist, R: LI); |
| 249 | while (!Worklist.empty()) { |
| 250 | const auto *L = Worklist.front(); |
| 251 | MaxLoopDepth = |
| 252 | std::max(a: MaxLoopDepth, b: static_cast<int64_t>(L->getLoopDepth())); |
| 253 | Worklist.pop_front(); |
| 254 | llvm::append_range(C&: Worklist, R: L->getSubLoops()); |
| 255 | } |
| 256 | } |
| 257 | |
| 258 | FunctionPropertiesInfo FunctionPropertiesInfo::getFunctionPropertiesInfo( |
| 259 | Function &F, FunctionAnalysisManager &FAM) { |
| 260 | // We use the cached result of the IR2VecVocabAnalysis run by |
| 261 | // InlineAdvisorAnalysis. If the IR2VecVocabAnalysis is not run, we don't |
| 262 | // use IR2Vec embeddings. |
| 263 | auto Vocabulary = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F) |
| 264 | .getCachedResult<IR2VecVocabAnalysis>(IR&: *F.getParent()); |
| 265 | return getFunctionPropertiesInfo(F, DT: FAM.getResult<DominatorTreeAnalysis>(IR&: F), |
| 266 | LI: FAM.getResult<LoopAnalysis>(IR&: F), Vocabulary); |
| 267 | } |
| 268 | |
| 269 | FunctionPropertiesInfo FunctionPropertiesInfo::getFunctionPropertiesInfo( |
| 270 | const Function &F, const DominatorTree &DT, const LoopInfo &LI, |
| 271 | const ir2vec::Vocabulary *Vocabulary) { |
| 272 | |
| 273 | FunctionPropertiesInfo FPI; |
| 274 | if (Vocabulary && Vocabulary->isValid()) { |
| 275 | FPI.IR2VecVocab = Vocabulary; |
| 276 | FPI.FunctionEmbedding = ir2vec::Embedding(Vocabulary->getDimension(), 0.0); |
| 277 | } |
| 278 | for (const auto &BB : F) |
| 279 | if (DT.isReachableFromEntry(A: &BB)) |
| 280 | FPI.reIncludeBB(BB); |
| 281 | FPI.updateAggregateStats(F, LI); |
| 282 | return FPI; |
| 283 | } |
| 284 | |
| 285 | bool FunctionPropertiesInfo::operator==( |
| 286 | const FunctionPropertiesInfo &FPI) const { |
| 287 | if (BasicBlockCount != FPI.BasicBlockCount || |
| 288 | BlocksReachedFromConditionalInstruction != |
| 289 | FPI.BlocksReachedFromConditionalInstruction || |
| 290 | Uses != FPI.Uses || |
| 291 | DirectCallsToDefinedFunctions != FPI.DirectCallsToDefinedFunctions || |
| 292 | LoadInstCount != FPI.LoadInstCount || |
| 293 | StoreInstCount != FPI.StoreInstCount || |
| 294 | MaxLoopDepth != FPI.MaxLoopDepth || |
| 295 | TopLevelLoopCount != FPI.TopLevelLoopCount || |
| 296 | TotalInstructionCount != FPI.TotalInstructionCount || |
| 297 | BasicBlocksWithSingleSuccessor != FPI.BasicBlocksWithSingleSuccessor || |
| 298 | BasicBlocksWithTwoSuccessors != FPI.BasicBlocksWithTwoSuccessors || |
| 299 | BasicBlocksWithMoreThanTwoSuccessors != |
| 300 | FPI.BasicBlocksWithMoreThanTwoSuccessors || |
| 301 | BasicBlocksWithSinglePredecessor != |
| 302 | FPI.BasicBlocksWithSinglePredecessor || |
| 303 | BasicBlocksWithTwoPredecessors != FPI.BasicBlocksWithTwoPredecessors || |
| 304 | BasicBlocksWithMoreThanTwoPredecessors != |
| 305 | FPI.BasicBlocksWithMoreThanTwoPredecessors || |
| 306 | BigBasicBlocks != FPI.BigBasicBlocks || |
| 307 | MediumBasicBlocks != FPI.MediumBasicBlocks || |
| 308 | SmallBasicBlocks != FPI.SmallBasicBlocks || |
| 309 | CastInstructionCount != FPI.CastInstructionCount || |
| 310 | FloatingPointInstructionCount != FPI.FloatingPointInstructionCount || |
| 311 | IntegerInstructionCount != FPI.IntegerInstructionCount || |
| 312 | ConstantIntOperandCount != FPI.ConstantIntOperandCount || |
| 313 | ConstantFPOperandCount != FPI.ConstantFPOperandCount || |
| 314 | ConstantOperandCount != FPI.ConstantOperandCount || |
| 315 | InstructionOperandCount != FPI.InstructionOperandCount || |
| 316 | BasicBlockOperandCount != FPI.BasicBlockOperandCount || |
| 317 | GlobalValueOperandCount != FPI.GlobalValueOperandCount || |
| 318 | InlineAsmOperandCount != FPI.InlineAsmOperandCount || |
| 319 | ArgumentOperandCount != FPI.ArgumentOperandCount || |
| 320 | UnknownOperandCount != FPI.UnknownOperandCount || |
| 321 | CriticalEdgeCount != FPI.CriticalEdgeCount || |
| 322 | ControlFlowEdgeCount != FPI.ControlFlowEdgeCount || |
| 323 | UnconditionalBranchCount != FPI.UnconditionalBranchCount || |
| 324 | IntrinsicCount != FPI.IntrinsicCount || |
| 325 | DirectCallCount != FPI.DirectCallCount || |
| 326 | IndirectCallCount != FPI.IndirectCallCount || |
| 327 | CallReturnsIntegerCount != FPI.CallReturnsIntegerCount || |
| 328 | CallReturnsFloatCount != FPI.CallReturnsFloatCount || |
| 329 | CallReturnsPointerCount != FPI.CallReturnsPointerCount || |
| 330 | CallReturnsVectorIntCount != FPI.CallReturnsVectorIntCount || |
| 331 | CallReturnsVectorFloatCount != FPI.CallReturnsVectorFloatCount || |
| 332 | CallReturnsVectorPointerCount != FPI.CallReturnsVectorPointerCount || |
| 333 | CallWithManyArgumentsCount != FPI.CallWithManyArgumentsCount || |
| 334 | CallWithPointerArgumentCount != FPI.CallWithPointerArgumentCount) { |
| 335 | return false; |
| 336 | } |
| 337 | // Check the equality of the function embeddings. We don't check the equality |
| 338 | // of Vocabulary as it remains the same. |
| 339 | if (!FunctionEmbedding.approximatelyEquals(RHS: FPI.FunctionEmbedding)) |
| 340 | return false; |
| 341 | |
| 342 | return true; |
| 343 | } |
| 344 | |
| 345 | void FunctionPropertiesInfo::print(raw_ostream &OS) const { |
| 346 | #define FUNCTION_PROPERTY(Name, Description) OS << #Name ": " << Name << "\n"; |
| 347 | |
| 348 | #define DETAILED_FUNCTION_PROPERTY(Name, Description) \ |
| 349 | if (EnableDetailedFunctionProperties) { \ |
| 350 | OS << #Name ": " << Name << "\n"; \ |
| 351 | } |
| 352 | |
| 353 | #include "llvm/IR/FunctionProperties.def" |
| 354 | |
| 355 | #undef FUNCTION_PROPERTY |
| 356 | #undef DETAILED_FUNCTION_PROPERTY |
| 357 | |
| 358 | OS << "\n" ; |
| 359 | } |
| 360 | |
| 361 | AnalysisKey FunctionPropertiesAnalysis::Key; |
| 362 | |
| 363 | FunctionPropertiesInfo |
| 364 | FunctionPropertiesAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { |
| 365 | return FunctionPropertiesInfo::getFunctionPropertiesInfo(F, FAM); |
| 366 | } |
| 367 | |
| 368 | PreservedAnalyses |
| 369 | FunctionPropertiesPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { |
| 370 | OS << "Printing analysis results of CFA for function " |
| 371 | << "'" << F.getName() << "':" |
| 372 | << "\n" ; |
| 373 | AM.getResult<FunctionPropertiesAnalysis>(IR&: F).print(OS); |
| 374 | return PreservedAnalyses::all(); |
| 375 | } |
| 376 | |
| 377 | PreservedAnalyses |
| 378 | FunctionPropertiesStatisticsPass::run(Function &F, |
| 379 | FunctionAnalysisManager &FAM) { |
| 380 | LLVM_DEBUG(dbgs() << "STATSCOUNT: running on function " << F.getName() |
| 381 | << "\n" ); |
| 382 | auto &AnalysisResults = FAM.getResult<FunctionPropertiesAnalysis>(IR&: F); |
| 383 | |
| 384 | #define FUNCTION_PROPERTY(Name, Description) Num##Name += AnalysisResults.Name; |
| 385 | #define DETAILED_FUNCTION_PROPERTY(Name, Description) \ |
| 386 | Num##Name += AnalysisResults.Name; |
| 387 | #include "llvm/IR/FunctionProperties.def" |
| 388 | |
| 389 | return PreservedAnalyses::all(); |
| 390 | } |
| 391 | |
| 392 | FunctionPropertiesUpdater::FunctionPropertiesUpdater( |
| 393 | FunctionPropertiesInfo &FPI, CallBase &CB) |
| 394 | : FPI(FPI), CallSiteBB(*CB.getParent()), Caller(*CallSiteBB.getParent()) { |
| 395 | assert(isa<CallInst>(CB) || isa<InvokeInst>(CB)); |
| 396 | // For BBs that are likely to change, we subtract from feature totals their |
| 397 | // contribution. Some features, like max loop counts or depths, are left |
| 398 | // invalid, as they will be updated post-inlining. |
| 399 | SmallPtrSet<const BasicBlock *, 4> LikelyToChangeBBs; |
| 400 | // The CB BB will change - it'll either be split or the callee's body (single |
| 401 | // BB) will be pasted in. |
| 402 | LikelyToChangeBBs.insert(Ptr: &CallSiteBB); |
| 403 | |
| 404 | // The caller's entry BB may change due to new alloca instructions. |
| 405 | LikelyToChangeBBs.insert(Ptr: &*Caller.begin()); |
| 406 | |
| 407 | // The users of the value returned by call instruction can change |
| 408 | // leading to the change in embeddings being computed, when used. |
| 409 | // We conservatively add the BBs with such uses to LikelyToChangeBBs. |
| 410 | for (const auto *User : CB.users()) |
| 411 | CallUsers.insert(V: dyn_cast<Instruction>(Val: User)->getParent()); |
| 412 | // CallSiteBB can be removed from CallUsers if present, it's taken care |
| 413 | // separately. |
| 414 | CallUsers.erase(V: &CallSiteBB); |
| 415 | LikelyToChangeBBs.insert_range(R&: CallUsers); |
| 416 | |
| 417 | // The successors may become unreachable in the case of `invoke` inlining. |
| 418 | // We track successors separately, too, because they form a boundary, together |
| 419 | // with the CB BB ('Entry') between which the inlined callee will be pasted. |
| 420 | Successors.insert_range(R: successors(BB: &CallSiteBB)); |
| 421 | |
| 422 | // the outcome of the inlining may be that some edges get lost (DCEd BBs |
| 423 | // because inlining brought some constant, for example). We don't know which |
| 424 | // edges will be removed, so we list all of them as potentially removable. |
| 425 | // Some BBs have (at this point) duplicate edges. Remove duplicates, otherwise |
| 426 | // the DT updater will not apply changes correctly. |
| 427 | DenseSet<const BasicBlock *> Inserted; |
| 428 | for (auto *Succ : successors(BB: &CallSiteBB)) |
| 429 | if (Inserted.insert(V: Succ).second) |
| 430 | DomTreeUpdates.emplace_back(Args: DominatorTree::UpdateKind::Delete, |
| 431 | Args: const_cast<BasicBlock *>(&CallSiteBB), |
| 432 | Args: const_cast<BasicBlock *>(Succ)); |
| 433 | // Reuse Inserted (which has some allocated capacity at this point) below, if |
| 434 | // we have an invoke. |
| 435 | Inserted.clear(); |
| 436 | // Inlining only handles invoke and calls. If this is an invoke, and inlining |
| 437 | // it pulls another invoke, the original landing pad may get split, so as to |
| 438 | // share its content with other potential users. So the edge up to which we |
| 439 | // need to invalidate and then re-account BB data is the successors of the |
| 440 | // current landing pad. We can leave the current lp, too - if it doesn't get |
| 441 | // split, then it will be the place traversal stops. Either way, the |
| 442 | // discounted BBs will be checked if reachable and re-added. |
| 443 | if (const auto *II = dyn_cast<InvokeInst>(Val: &CB)) { |
| 444 | const auto *UnwindDest = II->getUnwindDest(); |
| 445 | Successors.insert_range(R: successors(BB: UnwindDest)); |
| 446 | // Same idea as above, we pretend we lose all these edges. |
| 447 | for (auto *Succ : successors(BB: UnwindDest)) |
| 448 | if (Inserted.insert(V: Succ).second) |
| 449 | DomTreeUpdates.emplace_back(Args: DominatorTree::UpdateKind::Delete, |
| 450 | Args: const_cast<BasicBlock *>(UnwindDest), |
| 451 | Args: const_cast<BasicBlock *>(Succ)); |
| 452 | } |
| 453 | |
| 454 | // Exclude the CallSiteBB, if it happens to be its own successor (1-BB loop). |
| 455 | // We are only interested in BBs the graph moves past the callsite BB to |
| 456 | // define the frontier past which we don't want to re-process BBs. Including |
| 457 | // the callsite BB in this case would prematurely stop the traversal in |
| 458 | // finish(). |
| 459 | Successors.erase(V: &CallSiteBB); |
| 460 | |
| 461 | LikelyToChangeBBs.insert_range(R&: Successors); |
| 462 | |
| 463 | // Commit the change. While some of the BBs accounted for above may play dual |
| 464 | // role - e.g. caller's entry BB may be the same as the callsite BB - set |
| 465 | // insertion semantics make sure we account them once. This needs to be |
| 466 | // followed in `finish`, too. |
| 467 | for (const auto *BB : LikelyToChangeBBs) |
| 468 | FPI.updateForBB(BB: *BB, Direction: -1); |
| 469 | } |
| 470 | |
| 471 | DominatorTree &FunctionPropertiesUpdater::getUpdatedDominatorTree( |
| 472 | FunctionAnalysisManager &FAM) const { |
| 473 | auto &DT = |
| 474 | FAM.getResult<DominatorTreeAnalysis>(IR&: const_cast<Function &>(Caller)); |
| 475 | |
| 476 | SmallVector<DominatorTree::UpdateType, 2> FinalDomTreeUpdates; |
| 477 | |
| 478 | DenseSet<const BasicBlock *> Inserted; |
| 479 | for (auto *Succ : successors(BB: &CallSiteBB)) |
| 480 | if (Inserted.insert(V: Succ).second) |
| 481 | FinalDomTreeUpdates.push_back(Elt: {DominatorTree::UpdateKind::Insert, |
| 482 | const_cast<BasicBlock *>(&CallSiteBB), |
| 483 | const_cast<BasicBlock *>(Succ)}); |
| 484 | |
| 485 | // Perform the deletes last, so that any new nodes connected to nodes |
| 486 | // participating in the edge deletion are known to the DT. |
| 487 | for (auto &Upd : DomTreeUpdates) |
| 488 | if (!llvm::is_contained(Range: successors(BB: Upd.getFrom()), Element: Upd.getTo())) |
| 489 | FinalDomTreeUpdates.push_back(Elt: Upd); |
| 490 | |
| 491 | DT.applyUpdates(Updates: FinalDomTreeUpdates); |
| 492 | #ifdef EXPENSIVE_CHECKS |
| 493 | assert(DT.verify(DominatorTree::VerificationLevel::Full)); |
| 494 | #endif |
| 495 | return DT; |
| 496 | } |
| 497 | |
| 498 | void FunctionPropertiesUpdater::finish(FunctionAnalysisManager &FAM) const { |
| 499 | // Update feature values from the BBs that were copied from the callee, or |
| 500 | // might have been modified because of inlining. The latter have been |
| 501 | // subtracted in the FunctionPropertiesUpdater ctor. |
| 502 | // There could be successors that were reached before but now are only |
| 503 | // reachable from elsewhere in the CFG. |
| 504 | // One example is the following diamond CFG (lines are arrows pointing down): |
| 505 | // A |
| 506 | // / \ |
| 507 | // B C |
| 508 | // | | |
| 509 | // | D |
| 510 | // | | |
| 511 | // | E |
| 512 | // \ / |
| 513 | // F |
| 514 | // There's a call site in C that is inlined. Upon doing that, it turns out |
| 515 | // it expands to |
| 516 | // call void @llvm.trap() |
| 517 | // unreachable |
| 518 | // F isn't reachable from C anymore, but we did discount it when we set up |
| 519 | // FunctionPropertiesUpdater, so we need to re-include it here. |
| 520 | // At the same time, D and E were reachable before, but now are not anymore, |
| 521 | // so we need to leave D out (we discounted it at setup), and explicitly |
| 522 | // remove E. |
| 523 | SetVector<const BasicBlock *> Reinclude; |
| 524 | SetVector<const BasicBlock *> Unreachable; |
| 525 | auto &DT = getUpdatedDominatorTree(FAM); |
| 526 | |
| 527 | if (&CallSiteBB != &*Caller.begin()) |
| 528 | Reinclude.insert(X: &*Caller.begin()); |
| 529 | |
| 530 | // Reinclude the BBs which use the values returned by call instruction |
| 531 | Reinclude.insert_range(R: CallUsers); |
| 532 | |
| 533 | // Distribute the successors to the 2 buckets. |
| 534 | for (const auto *Succ : Successors) |
| 535 | if (DT.isReachableFromEntry(A: Succ)) |
| 536 | Reinclude.insert(X: Succ); |
| 537 | else |
| 538 | Unreachable.insert(X: Succ); |
| 539 | |
| 540 | // For reinclusion, we want to stop at the reachable successors, who are at |
| 541 | // the beginning of the worklist; but, starting from the callsite bb and |
| 542 | // ending at those successors, we also want to perform a traversal. |
| 543 | // IncludeSuccessorsMark is the index after which we include successors. |
| 544 | const auto IncludeSuccessorsMark = Reinclude.size(); |
| 545 | bool CSInsertion = Reinclude.insert(X: &CallSiteBB); |
| 546 | (void)CSInsertion; |
| 547 | assert(CSInsertion); |
| 548 | for (size_t I = 0; I < Reinclude.size(); ++I) { |
| 549 | const auto *BB = Reinclude[I]; |
| 550 | FPI.reIncludeBB(BB: *BB); |
| 551 | if (I >= IncludeSuccessorsMark) |
| 552 | Reinclude.insert_range(R: successors(BB)); |
| 553 | } |
| 554 | |
| 555 | // For exclusion, we don't need to exclude the set of BBs that were successors |
| 556 | // before and are now unreachable, because we already did that at setup. For |
| 557 | // the rest, as long as a successor is unreachable, we want to explicitly |
| 558 | // exclude it. |
| 559 | const auto AlreadyExcludedMark = Unreachable.size(); |
| 560 | for (size_t I = 0; I < Unreachable.size(); ++I) { |
| 561 | const auto *U = Unreachable[I]; |
| 562 | if (I >= AlreadyExcludedMark) |
| 563 | FPI.updateForBB(BB: *U, Direction: -1); |
| 564 | for (const auto *Succ : successors(BB: U)) |
| 565 | if (!DT.isReachableFromEntry(A: Succ)) |
| 566 | Unreachable.insert(X: Succ); |
| 567 | } |
| 568 | |
| 569 | const auto &LI = FAM.getResult<LoopAnalysis>(IR&: const_cast<Function &>(Caller)); |
| 570 | FPI.updateAggregateStats(F: Caller, LI); |
| 571 | #ifdef EXPENSIVE_CHECKS |
| 572 | assert(isUpdateValid(Caller, FPI, FAM)); |
| 573 | #endif |
| 574 | } |
| 575 | |
| 576 | bool FunctionPropertiesUpdater::isUpdateValid(Function &F, |
| 577 | const FunctionPropertiesInfo &FPI, |
| 578 | FunctionAnalysisManager &FAM) { |
| 579 | if (!FAM.getResult<DominatorTreeAnalysis>(IR&: F).verify( |
| 580 | VL: DominatorTree::VerificationLevel::Full)) |
| 581 | return false; |
| 582 | DominatorTree DT(F); |
| 583 | LoopInfo LI(DT); |
| 584 | auto Vocabulary = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F) |
| 585 | .getCachedResult<IR2VecVocabAnalysis>(IR&: *F.getParent()); |
| 586 | auto Fresh = |
| 587 | FunctionPropertiesInfo::getFunctionPropertiesInfo(F, DT, LI, Vocabulary); |
| 588 | return FPI == Fresh; |
| 589 | } |
| 590 | |