| 1 | //===- FunctionPropertiesAnalysis.cpp - Function Properties Analysis ------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines the FunctionPropertiesInfo and FunctionPropertiesAnalysis |
| 10 | // classes used to extract function properties. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "llvm/Analysis/FunctionPropertiesAnalysis.h" |
| 15 | #include "llvm/ADT/STLExtras.h" |
| 16 | #include "llvm/ADT/SetVector.h" |
| 17 | #include "llvm/Analysis/LoopInfo.h" |
| 18 | #include "llvm/IR/CFG.h" |
| 19 | #include "llvm/IR/Constants.h" |
| 20 | #include "llvm/IR/Dominators.h" |
| 21 | #include "llvm/IR/Instructions.h" |
| 22 | #include "llvm/IR/IntrinsicInst.h" |
| 23 | #include "llvm/Support/CommandLine.h" |
| 24 | #include "llvm/Support/Compiler.h" |
| 25 | #include <deque> |
| 26 | |
| 27 | using namespace llvm; |
| 28 | |
| 29 | namespace llvm { |
| 30 | LLVM_ABI cl::opt<bool> EnableDetailedFunctionProperties( |
| 31 | "enable-detailed-function-properties" , cl::Hidden, cl::init(Val: false), |
| 32 | cl::desc("Whether or not to compute detailed function properties." )); |
| 33 | |
| 34 | static cl::opt<unsigned> BigBasicBlockInstructionThreshold( |
| 35 | "big-basic-block-instruction-threshold" , cl::Hidden, cl::init(Val: 500), |
| 36 | cl::desc("The minimum number of instructions a basic block should contain " |
| 37 | "before being considered big." )); |
| 38 | |
| 39 | static cl::opt<unsigned> MediumBasicBlockInstructionThreshold( |
| 40 | "medium-basic-block-instruction-threshold" , cl::Hidden, cl::init(Val: 15), |
| 41 | cl::desc("The minimum number of instructions a basic block should contain " |
| 42 | "before being considered medium-sized." )); |
| 43 | } // namespace llvm |
| 44 | |
| 45 | static cl::opt<unsigned> CallWithManyArgumentsThreshold( |
| 46 | "call-with-many-arguments-threshold" , cl::Hidden, cl::init(Val: 4), |
| 47 | cl::desc("The minimum number of arguments a function call must have before " |
| 48 | "it is considered having many arguments." )); |
| 49 | |
| 50 | namespace { |
| 51 | int64_t getNumBlocksFromCond(const BasicBlock &BB) { |
| 52 | int64_t Ret = 0; |
| 53 | if (const auto *BI = dyn_cast<BranchInst>(Val: BB.getTerminator())) { |
| 54 | if (BI->isConditional()) |
| 55 | Ret += BI->getNumSuccessors(); |
| 56 | } else if (const auto *SI = dyn_cast<SwitchInst>(Val: BB.getTerminator())) { |
| 57 | Ret += (SI->getNumCases() + (nullptr != SI->getDefaultDest())); |
| 58 | } |
| 59 | return Ret; |
| 60 | } |
| 61 | |
| 62 | int64_t getUses(const Function &F) { |
| 63 | return ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses(); |
| 64 | } |
| 65 | } // namespace |
| 66 | |
| 67 | void FunctionPropertiesInfo::reIncludeBB(const BasicBlock &BB) { |
| 68 | updateForBB(BB, Direction: +1); |
| 69 | } |
| 70 | |
| 71 | void FunctionPropertiesInfo::updateForBB(const BasicBlock &BB, |
| 72 | int64_t Direction) { |
| 73 | assert(Direction == 1 || Direction == -1); |
| 74 | BasicBlockCount += Direction; |
| 75 | BlocksReachedFromConditionalInstruction += |
| 76 | (Direction * getNumBlocksFromCond(BB)); |
| 77 | for (const auto &I : BB) { |
| 78 | if (auto *CS = dyn_cast<CallBase>(Val: &I)) { |
| 79 | const auto *Callee = CS->getCalledFunction(); |
| 80 | if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration()) |
| 81 | DirectCallsToDefinedFunctions += Direction; |
| 82 | } |
| 83 | if (I.getOpcode() == Instruction::Load) { |
| 84 | LoadInstCount += Direction; |
| 85 | } else if (I.getOpcode() == Instruction::Store) { |
| 86 | StoreInstCount += Direction; |
| 87 | } |
| 88 | } |
| 89 | TotalInstructionCount += Direction * BB.sizeWithoutDebug(); |
| 90 | |
| 91 | if (EnableDetailedFunctionProperties) { |
| 92 | unsigned SuccessorCount = succ_size(BB: &BB); |
| 93 | if (SuccessorCount == 1) |
| 94 | BasicBlocksWithSingleSuccessor += Direction; |
| 95 | else if (SuccessorCount == 2) |
| 96 | BasicBlocksWithTwoSuccessors += Direction; |
| 97 | else if (SuccessorCount > 2) |
| 98 | BasicBlocksWithMoreThanTwoSuccessors += Direction; |
| 99 | |
| 100 | unsigned PredecessorCount = pred_size(BB: &BB); |
| 101 | if (PredecessorCount == 1) |
| 102 | BasicBlocksWithSinglePredecessor += Direction; |
| 103 | else if (PredecessorCount == 2) |
| 104 | BasicBlocksWithTwoPredecessors += Direction; |
| 105 | else if (PredecessorCount > 2) |
| 106 | BasicBlocksWithMoreThanTwoPredecessors += Direction; |
| 107 | |
| 108 | if (TotalInstructionCount > BigBasicBlockInstructionThreshold) |
| 109 | BigBasicBlocks += Direction; |
| 110 | else if (TotalInstructionCount > MediumBasicBlockInstructionThreshold) |
| 111 | MediumBasicBlocks += Direction; |
| 112 | else |
| 113 | SmallBasicBlocks += Direction; |
| 114 | |
| 115 | // Calculate critical edges by looking through all successors of a basic |
| 116 | // block that has multiple successors and finding ones that have multiple |
| 117 | // predecessors, which represent critical edges. |
| 118 | if (SuccessorCount > 1) { |
| 119 | for (const auto *Successor : successors(BB: &BB)) { |
| 120 | if (pred_size(BB: Successor) > 1) |
| 121 | CriticalEdgeCount += Direction; |
| 122 | } |
| 123 | } |
| 124 | |
| 125 | ControlFlowEdgeCount += Direction * SuccessorCount; |
| 126 | |
| 127 | if (const auto *BI = dyn_cast<BranchInst>(Val: BB.getTerminator())) { |
| 128 | if (!BI->isConditional()) |
| 129 | UnconditionalBranchCount += Direction; |
| 130 | } |
| 131 | |
| 132 | for (const Instruction &I : BB.instructionsWithoutDebug()) { |
| 133 | if (I.isCast()) |
| 134 | CastInstructionCount += Direction; |
| 135 | |
| 136 | if (I.getType()->isFloatTy()) |
| 137 | FloatingPointInstructionCount += Direction; |
| 138 | else if (I.getType()->isIntegerTy()) |
| 139 | IntegerInstructionCount += Direction; |
| 140 | |
| 141 | if (isa<IntrinsicInst>(Val: I)) |
| 142 | ++IntrinsicCount; |
| 143 | |
| 144 | if (const auto *Call = dyn_cast<CallInst>(Val: &I)) { |
| 145 | if (Call->isIndirectCall()) |
| 146 | IndirectCallCount += Direction; |
| 147 | else |
| 148 | DirectCallCount += Direction; |
| 149 | |
| 150 | if (Call->getType()->isIntegerTy()) |
| 151 | CallReturnsIntegerCount += Direction; |
| 152 | else if (Call->getType()->isFloatingPointTy()) |
| 153 | CallReturnsFloatCount += Direction; |
| 154 | else if (Call->getType()->isPointerTy()) |
| 155 | CallReturnsPointerCount += Direction; |
| 156 | else if (Call->getType()->isVectorTy()) { |
| 157 | if (Call->getType()->getScalarType()->isIntegerTy()) |
| 158 | CallReturnsVectorIntCount += Direction; |
| 159 | else if (Call->getType()->getScalarType()->isFloatingPointTy()) |
| 160 | CallReturnsVectorFloatCount += Direction; |
| 161 | else if (Call->getType()->getScalarType()->isPointerTy()) |
| 162 | CallReturnsVectorPointerCount += Direction; |
| 163 | } |
| 164 | |
| 165 | if (Call->arg_size() > CallWithManyArgumentsThreshold) |
| 166 | CallWithManyArgumentsCount += Direction; |
| 167 | |
| 168 | for (const auto &Arg : Call->args()) { |
| 169 | if (Arg->getType()->isPointerTy()) { |
| 170 | CallWithPointerArgumentCount += Direction; |
| 171 | break; |
| 172 | } |
| 173 | } |
| 174 | } |
| 175 | |
| 176 | #define COUNT_OPERAND(OPTYPE) \ |
| 177 | if (isa<OPTYPE>(Operand)) { \ |
| 178 | OPTYPE##OperandCount += Direction; \ |
| 179 | continue; \ |
| 180 | } |
| 181 | |
| 182 | for (unsigned int OperandIndex = 0; OperandIndex < I.getNumOperands(); |
| 183 | ++OperandIndex) { |
| 184 | Value *Operand = I.getOperand(i: OperandIndex); |
| 185 | COUNT_OPERAND(GlobalValue) |
| 186 | COUNT_OPERAND(ConstantInt) |
| 187 | COUNT_OPERAND(ConstantFP) |
| 188 | COUNT_OPERAND(Constant) |
| 189 | COUNT_OPERAND(Instruction) |
| 190 | COUNT_OPERAND(BasicBlock) |
| 191 | COUNT_OPERAND(InlineAsm) |
| 192 | COUNT_OPERAND(Argument) |
| 193 | |
| 194 | // We only get to this point if we haven't matched any of the other |
| 195 | // operand types. |
| 196 | UnknownOperandCount += Direction; |
| 197 | } |
| 198 | |
| 199 | #undef CHECK_OPERAND |
| 200 | } |
| 201 | } |
| 202 | |
| 203 | if (IR2VecVocab) { |
| 204 | // We instantiate the IR2Vec embedder each time, as having an unique |
| 205 | // pointer to the embedder as member of the class would make it |
| 206 | // non-copyable. Instantiating the embedder in itself is not costly. |
| 207 | auto Embedder = ir2vec::Embedder::create(Mode: IR2VecKind::Symbolic, |
| 208 | F: *BB.getParent(), Vocabulary: *IR2VecVocab); |
| 209 | if (!Embedder) { |
| 210 | BB.getContext().emitError(ErrorStr: "Error creating IR2Vec embeddings" ); |
| 211 | return; |
| 212 | } |
| 213 | const auto &BBEmbedding = Embedder->getBBVector(BB); |
| 214 | // Subtract BBEmbedding from Function embedding if the direction is -1, |
| 215 | // and add it if the direction is +1. |
| 216 | if (Direction == -1) |
| 217 | FunctionEmbedding -= BBEmbedding; |
| 218 | else |
| 219 | FunctionEmbedding += BBEmbedding; |
| 220 | } |
| 221 | } |
| 222 | |
| 223 | void FunctionPropertiesInfo::updateAggregateStats(const Function &F, |
| 224 | const LoopInfo &LI) { |
| 225 | |
| 226 | Uses = getUses(F); |
| 227 | TopLevelLoopCount = llvm::size(Range: LI); |
| 228 | MaxLoopDepth = 0; |
| 229 | std::deque<const Loop *> Worklist; |
| 230 | llvm::append_range(C&: Worklist, R: LI); |
| 231 | while (!Worklist.empty()) { |
| 232 | const auto *L = Worklist.front(); |
| 233 | MaxLoopDepth = |
| 234 | std::max(a: MaxLoopDepth, b: static_cast<int64_t>(L->getLoopDepth())); |
| 235 | Worklist.pop_front(); |
| 236 | llvm::append_range(C&: Worklist, R: L->getSubLoops()); |
| 237 | } |
| 238 | } |
| 239 | |
| 240 | FunctionPropertiesInfo FunctionPropertiesInfo::getFunctionPropertiesInfo( |
| 241 | Function &F, FunctionAnalysisManager &FAM) { |
| 242 | // We use the cached result of the IR2VecVocabAnalysis run by |
| 243 | // InlineAdvisorAnalysis. If the IR2VecVocabAnalysis is not run, we don't |
| 244 | // use IR2Vec embeddings. |
| 245 | auto VocabResult = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F) |
| 246 | .getCachedResult<IR2VecVocabAnalysis>(IR&: *F.getParent()); |
| 247 | return getFunctionPropertiesInfo(F, DT: FAM.getResult<DominatorTreeAnalysis>(IR&: F), |
| 248 | LI: FAM.getResult<LoopAnalysis>(IR&: F), VocabResult); |
| 249 | } |
| 250 | |
| 251 | FunctionPropertiesInfo FunctionPropertiesInfo::getFunctionPropertiesInfo( |
| 252 | const Function &F, const DominatorTree &DT, const LoopInfo &LI, |
| 253 | const IR2VecVocabResult *VocabResult) { |
| 254 | |
| 255 | FunctionPropertiesInfo FPI; |
| 256 | if (VocabResult && VocabResult->isValid()) { |
| 257 | FPI.IR2VecVocab = VocabResult->getVocabulary(); |
| 258 | FPI.FunctionEmbedding = ir2vec::Embedding(VocabResult->getDimension(), 0.0); |
| 259 | } |
| 260 | for (const auto &BB : F) |
| 261 | if (DT.isReachableFromEntry(A: &BB)) |
| 262 | FPI.reIncludeBB(BB); |
| 263 | FPI.updateAggregateStats(F, LI); |
| 264 | return FPI; |
| 265 | } |
| 266 | |
| 267 | bool FunctionPropertiesInfo::operator==( |
| 268 | const FunctionPropertiesInfo &FPI) const { |
| 269 | if (BasicBlockCount != FPI.BasicBlockCount || |
| 270 | BlocksReachedFromConditionalInstruction != |
| 271 | FPI.BlocksReachedFromConditionalInstruction || |
| 272 | Uses != FPI.Uses || |
| 273 | DirectCallsToDefinedFunctions != FPI.DirectCallsToDefinedFunctions || |
| 274 | LoadInstCount != FPI.LoadInstCount || |
| 275 | StoreInstCount != FPI.StoreInstCount || |
| 276 | MaxLoopDepth != FPI.MaxLoopDepth || |
| 277 | TopLevelLoopCount != FPI.TopLevelLoopCount || |
| 278 | TotalInstructionCount != FPI.TotalInstructionCount || |
| 279 | BasicBlocksWithSingleSuccessor != FPI.BasicBlocksWithSingleSuccessor || |
| 280 | BasicBlocksWithTwoSuccessors != FPI.BasicBlocksWithTwoSuccessors || |
| 281 | BasicBlocksWithMoreThanTwoSuccessors != |
| 282 | FPI.BasicBlocksWithMoreThanTwoSuccessors || |
| 283 | BasicBlocksWithSinglePredecessor != |
| 284 | FPI.BasicBlocksWithSinglePredecessor || |
| 285 | BasicBlocksWithTwoPredecessors != FPI.BasicBlocksWithTwoPredecessors || |
| 286 | BasicBlocksWithMoreThanTwoPredecessors != |
| 287 | FPI.BasicBlocksWithMoreThanTwoPredecessors || |
| 288 | BigBasicBlocks != FPI.BigBasicBlocks || |
| 289 | MediumBasicBlocks != FPI.MediumBasicBlocks || |
| 290 | SmallBasicBlocks != FPI.SmallBasicBlocks || |
| 291 | CastInstructionCount != FPI.CastInstructionCount || |
| 292 | FloatingPointInstructionCount != FPI.FloatingPointInstructionCount || |
| 293 | IntegerInstructionCount != FPI.IntegerInstructionCount || |
| 294 | ConstantIntOperandCount != FPI.ConstantIntOperandCount || |
| 295 | ConstantFPOperandCount != FPI.ConstantFPOperandCount || |
| 296 | ConstantOperandCount != FPI.ConstantOperandCount || |
| 297 | InstructionOperandCount != FPI.InstructionOperandCount || |
| 298 | BasicBlockOperandCount != FPI.BasicBlockOperandCount || |
| 299 | GlobalValueOperandCount != FPI.GlobalValueOperandCount || |
| 300 | InlineAsmOperandCount != FPI.InlineAsmOperandCount || |
| 301 | ArgumentOperandCount != FPI.ArgumentOperandCount || |
| 302 | UnknownOperandCount != FPI.UnknownOperandCount || |
| 303 | CriticalEdgeCount != FPI.CriticalEdgeCount || |
| 304 | ControlFlowEdgeCount != FPI.ControlFlowEdgeCount || |
| 305 | UnconditionalBranchCount != FPI.UnconditionalBranchCount || |
| 306 | IntrinsicCount != FPI.IntrinsicCount || |
| 307 | DirectCallCount != FPI.DirectCallCount || |
| 308 | IndirectCallCount != FPI.IndirectCallCount || |
| 309 | CallReturnsIntegerCount != FPI.CallReturnsIntegerCount || |
| 310 | CallReturnsFloatCount != FPI.CallReturnsFloatCount || |
| 311 | CallReturnsPointerCount != FPI.CallReturnsPointerCount || |
| 312 | CallReturnsVectorIntCount != FPI.CallReturnsVectorIntCount || |
| 313 | CallReturnsVectorFloatCount != FPI.CallReturnsVectorFloatCount || |
| 314 | CallReturnsVectorPointerCount != FPI.CallReturnsVectorPointerCount || |
| 315 | CallWithManyArgumentsCount != FPI.CallWithManyArgumentsCount || |
| 316 | CallWithPointerArgumentCount != FPI.CallWithPointerArgumentCount) { |
| 317 | return false; |
| 318 | } |
| 319 | // Check the equality of the function embeddings. We don't check the equality |
| 320 | // of Vocabulary as it remains the same. |
| 321 | if (!FunctionEmbedding.approximatelyEquals(RHS: FPI.FunctionEmbedding)) |
| 322 | return false; |
| 323 | |
| 324 | return true; |
| 325 | } |
| 326 | |
| 327 | void FunctionPropertiesInfo::print(raw_ostream &OS) const { |
| 328 | #define PRINT_PROPERTY(PROP_NAME) OS << #PROP_NAME ": " << PROP_NAME << "\n"; |
| 329 | |
| 330 | PRINT_PROPERTY(BasicBlockCount) |
| 331 | PRINT_PROPERTY(BlocksReachedFromConditionalInstruction) |
| 332 | PRINT_PROPERTY(Uses) |
| 333 | PRINT_PROPERTY(DirectCallsToDefinedFunctions) |
| 334 | PRINT_PROPERTY(LoadInstCount) |
| 335 | PRINT_PROPERTY(StoreInstCount) |
| 336 | PRINT_PROPERTY(MaxLoopDepth) |
| 337 | PRINT_PROPERTY(TopLevelLoopCount) |
| 338 | PRINT_PROPERTY(TotalInstructionCount) |
| 339 | |
| 340 | if (EnableDetailedFunctionProperties) { |
| 341 | PRINT_PROPERTY(BasicBlocksWithSingleSuccessor) |
| 342 | PRINT_PROPERTY(BasicBlocksWithTwoSuccessors) |
| 343 | PRINT_PROPERTY(BasicBlocksWithMoreThanTwoSuccessors) |
| 344 | PRINT_PROPERTY(BasicBlocksWithSinglePredecessor) |
| 345 | PRINT_PROPERTY(BasicBlocksWithTwoPredecessors) |
| 346 | PRINT_PROPERTY(BasicBlocksWithMoreThanTwoPredecessors) |
| 347 | PRINT_PROPERTY(BigBasicBlocks) |
| 348 | PRINT_PROPERTY(MediumBasicBlocks) |
| 349 | PRINT_PROPERTY(SmallBasicBlocks) |
| 350 | PRINT_PROPERTY(CastInstructionCount) |
| 351 | PRINT_PROPERTY(FloatingPointInstructionCount) |
| 352 | PRINT_PROPERTY(IntegerInstructionCount) |
| 353 | PRINT_PROPERTY(ConstantIntOperandCount) |
| 354 | PRINT_PROPERTY(ConstantFPOperandCount) |
| 355 | PRINT_PROPERTY(ConstantOperandCount) |
| 356 | PRINT_PROPERTY(InstructionOperandCount) |
| 357 | PRINT_PROPERTY(BasicBlockOperandCount) |
| 358 | PRINT_PROPERTY(GlobalValueOperandCount) |
| 359 | PRINT_PROPERTY(InlineAsmOperandCount) |
| 360 | PRINT_PROPERTY(ArgumentOperandCount) |
| 361 | PRINT_PROPERTY(UnknownOperandCount) |
| 362 | PRINT_PROPERTY(CriticalEdgeCount) |
| 363 | PRINT_PROPERTY(ControlFlowEdgeCount) |
| 364 | PRINT_PROPERTY(UnconditionalBranchCount) |
| 365 | PRINT_PROPERTY(IntrinsicCount) |
| 366 | PRINT_PROPERTY(DirectCallCount) |
| 367 | PRINT_PROPERTY(IndirectCallCount) |
| 368 | PRINT_PROPERTY(CallReturnsIntegerCount) |
| 369 | PRINT_PROPERTY(CallReturnsFloatCount) |
| 370 | PRINT_PROPERTY(CallReturnsPointerCount) |
| 371 | PRINT_PROPERTY(CallReturnsVectorIntCount) |
| 372 | PRINT_PROPERTY(CallReturnsVectorFloatCount) |
| 373 | PRINT_PROPERTY(CallReturnsVectorPointerCount) |
| 374 | PRINT_PROPERTY(CallWithManyArgumentsCount) |
| 375 | PRINT_PROPERTY(CallWithPointerArgumentCount) |
| 376 | } |
| 377 | |
| 378 | #undef PRINT_PROPERTY |
| 379 | |
| 380 | OS << "\n" ; |
| 381 | } |
| 382 | |
| 383 | AnalysisKey FunctionPropertiesAnalysis::Key; |
| 384 | |
| 385 | FunctionPropertiesInfo |
| 386 | FunctionPropertiesAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { |
| 387 | return FunctionPropertiesInfo::getFunctionPropertiesInfo(F, FAM); |
| 388 | } |
| 389 | |
| 390 | PreservedAnalyses |
| 391 | FunctionPropertiesPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { |
| 392 | OS << "Printing analysis results of CFA for function " |
| 393 | << "'" << F.getName() << "':" |
| 394 | << "\n" ; |
| 395 | AM.getResult<FunctionPropertiesAnalysis>(IR&: F).print(OS); |
| 396 | return PreservedAnalyses::all(); |
| 397 | } |
| 398 | |
| 399 | FunctionPropertiesUpdater::FunctionPropertiesUpdater( |
| 400 | FunctionPropertiesInfo &FPI, CallBase &CB) |
| 401 | : FPI(FPI), CallSiteBB(*CB.getParent()), Caller(*CallSiteBB.getParent()) { |
| 402 | assert(isa<CallInst>(CB) || isa<InvokeInst>(CB)); |
| 403 | // For BBs that are likely to change, we subtract from feature totals their |
| 404 | // contribution. Some features, like max loop counts or depths, are left |
| 405 | // invalid, as they will be updated post-inlining. |
| 406 | SmallPtrSet<const BasicBlock *, 4> LikelyToChangeBBs; |
| 407 | // The CB BB will change - it'll either be split or the callee's body (single |
| 408 | // BB) will be pasted in. |
| 409 | LikelyToChangeBBs.insert(Ptr: &CallSiteBB); |
| 410 | |
| 411 | // The caller's entry BB may change due to new alloca instructions. |
| 412 | LikelyToChangeBBs.insert(Ptr: &*Caller.begin()); |
| 413 | |
| 414 | // The users of the value returned by call instruction can change |
| 415 | // leading to the change in embeddings being computed, when used. |
| 416 | // We conservatively add the BBs with such uses to LikelyToChangeBBs. |
| 417 | for (const auto *User : CB.users()) |
| 418 | CallUsers.insert(V: dyn_cast<Instruction>(Val: User)->getParent()); |
| 419 | // CallSiteBB can be removed from CallUsers if present, it's taken care |
| 420 | // separately. |
| 421 | CallUsers.erase(V: &CallSiteBB); |
| 422 | LikelyToChangeBBs.insert_range(R&: CallUsers); |
| 423 | |
| 424 | // The successors may become unreachable in the case of `invoke` inlining. |
| 425 | // We track successors separately, too, because they form a boundary, together |
| 426 | // with the CB BB ('Entry') between which the inlined callee will be pasted. |
| 427 | Successors.insert_range(R: successors(BB: &CallSiteBB)); |
| 428 | |
| 429 | // the outcome of the inlining may be that some edges get lost (DCEd BBs |
| 430 | // because inlining brought some constant, for example). We don't know which |
| 431 | // edges will be removed, so we list all of them as potentially removable. |
| 432 | // Some BBs have (at this point) duplicate edges. Remove duplicates, otherwise |
| 433 | // the DT updater will not apply changes correctly. |
| 434 | DenseSet<const BasicBlock *> Inserted; |
| 435 | for (auto *Succ : successors(BB: &CallSiteBB)) |
| 436 | if (Inserted.insert(V: Succ).second) |
| 437 | DomTreeUpdates.emplace_back(Args: DominatorTree::UpdateKind::Delete, |
| 438 | Args: const_cast<BasicBlock *>(&CallSiteBB), |
| 439 | Args: const_cast<BasicBlock *>(Succ)); |
| 440 | // Reuse Inserted (which has some allocated capacity at this point) below, if |
| 441 | // we have an invoke. |
| 442 | Inserted.clear(); |
| 443 | // Inlining only handles invoke and calls. If this is an invoke, and inlining |
| 444 | // it pulls another invoke, the original landing pad may get split, so as to |
| 445 | // share its content with other potential users. So the edge up to which we |
| 446 | // need to invalidate and then re-account BB data is the successors of the |
| 447 | // current landing pad. We can leave the current lp, too - if it doesn't get |
| 448 | // split, then it will be the place traversal stops. Either way, the |
| 449 | // discounted BBs will be checked if reachable and re-added. |
| 450 | if (const auto *II = dyn_cast<InvokeInst>(Val: &CB)) { |
| 451 | const auto *UnwindDest = II->getUnwindDest(); |
| 452 | Successors.insert_range(R: successors(BB: UnwindDest)); |
| 453 | // Same idea as above, we pretend we lose all these edges. |
| 454 | for (auto *Succ : successors(BB: UnwindDest)) |
| 455 | if (Inserted.insert(V: Succ).second) |
| 456 | DomTreeUpdates.emplace_back(Args: DominatorTree::UpdateKind::Delete, |
| 457 | Args: const_cast<BasicBlock *>(UnwindDest), |
| 458 | Args: const_cast<BasicBlock *>(Succ)); |
| 459 | } |
| 460 | |
| 461 | // Exclude the CallSiteBB, if it happens to be its own successor (1-BB loop). |
| 462 | // We are only interested in BBs the graph moves past the callsite BB to |
| 463 | // define the frontier past which we don't want to re-process BBs. Including |
| 464 | // the callsite BB in this case would prematurely stop the traversal in |
| 465 | // finish(). |
| 466 | Successors.erase(V: &CallSiteBB); |
| 467 | |
| 468 | LikelyToChangeBBs.insert_range(R&: Successors); |
| 469 | |
| 470 | // Commit the change. While some of the BBs accounted for above may play dual |
| 471 | // role - e.g. caller's entry BB may be the same as the callsite BB - set |
| 472 | // insertion semantics make sure we account them once. This needs to be |
| 473 | // followed in `finish`, too. |
| 474 | for (const auto *BB : LikelyToChangeBBs) |
| 475 | FPI.updateForBB(BB: *BB, Direction: -1); |
| 476 | } |
| 477 | |
| 478 | DominatorTree &FunctionPropertiesUpdater::getUpdatedDominatorTree( |
| 479 | FunctionAnalysisManager &FAM) const { |
| 480 | auto &DT = |
| 481 | FAM.getResult<DominatorTreeAnalysis>(IR&: const_cast<Function &>(Caller)); |
| 482 | |
| 483 | SmallVector<DominatorTree::UpdateType, 2> FinalDomTreeUpdates; |
| 484 | |
| 485 | DenseSet<const BasicBlock *> Inserted; |
| 486 | for (auto *Succ : successors(BB: &CallSiteBB)) |
| 487 | if (Inserted.insert(V: Succ).second) |
| 488 | FinalDomTreeUpdates.push_back(Elt: {DominatorTree::UpdateKind::Insert, |
| 489 | const_cast<BasicBlock *>(&CallSiteBB), |
| 490 | const_cast<BasicBlock *>(Succ)}); |
| 491 | |
| 492 | // Perform the deletes last, so that any new nodes connected to nodes |
| 493 | // participating in the edge deletion are known to the DT. |
| 494 | for (auto &Upd : DomTreeUpdates) |
| 495 | if (!llvm::is_contained(Range: successors(BB: Upd.getFrom()), Element: Upd.getTo())) |
| 496 | FinalDomTreeUpdates.push_back(Elt: Upd); |
| 497 | |
| 498 | DT.applyUpdates(Updates: FinalDomTreeUpdates); |
| 499 | #ifdef EXPENSIVE_CHECKS |
| 500 | assert(DT.verify(DominatorTree::VerificationLevel::Full)); |
| 501 | #endif |
| 502 | return DT; |
| 503 | } |
| 504 | |
| 505 | void FunctionPropertiesUpdater::finish(FunctionAnalysisManager &FAM) const { |
| 506 | // Update feature values from the BBs that were copied from the callee, or |
| 507 | // might have been modified because of inlining. The latter have been |
| 508 | // subtracted in the FunctionPropertiesUpdater ctor. |
| 509 | // There could be successors that were reached before but now are only |
| 510 | // reachable from elsewhere in the CFG. |
| 511 | // One example is the following diamond CFG (lines are arrows pointing down): |
| 512 | // A |
| 513 | // / \ |
| 514 | // B C |
| 515 | // | | |
| 516 | // | D |
| 517 | // | | |
| 518 | // | E |
| 519 | // \ / |
| 520 | // F |
| 521 | // There's a call site in C that is inlined. Upon doing that, it turns out |
| 522 | // it expands to |
| 523 | // call void @llvm.trap() |
| 524 | // unreachable |
| 525 | // F isn't reachable from C anymore, but we did discount it when we set up |
| 526 | // FunctionPropertiesUpdater, so we need to re-include it here. |
| 527 | // At the same time, D and E were reachable before, but now are not anymore, |
| 528 | // so we need to leave D out (we discounted it at setup), and explicitly |
| 529 | // remove E. |
| 530 | SetVector<const BasicBlock *> Reinclude; |
| 531 | SetVector<const BasicBlock *> Unreachable; |
| 532 | auto &DT = getUpdatedDominatorTree(FAM); |
| 533 | |
| 534 | if (&CallSiteBB != &*Caller.begin()) |
| 535 | Reinclude.insert(X: &*Caller.begin()); |
| 536 | |
| 537 | // Reinclude the BBs which use the values returned by call instruction |
| 538 | Reinclude.insert_range(R: CallUsers); |
| 539 | |
| 540 | // Distribute the successors to the 2 buckets. |
| 541 | for (const auto *Succ : Successors) |
| 542 | if (DT.isReachableFromEntry(A: Succ)) |
| 543 | Reinclude.insert(X: Succ); |
| 544 | else |
| 545 | Unreachable.insert(X: Succ); |
| 546 | |
| 547 | // For reinclusion, we want to stop at the reachable successors, who are at |
| 548 | // the beginning of the worklist; but, starting from the callsite bb and |
| 549 | // ending at those successors, we also want to perform a traversal. |
| 550 | // IncludeSuccessorsMark is the index after which we include successors. |
| 551 | const auto IncludeSuccessorsMark = Reinclude.size(); |
| 552 | bool CSInsertion = Reinclude.insert(X: &CallSiteBB); |
| 553 | (void)CSInsertion; |
| 554 | assert(CSInsertion); |
| 555 | for (size_t I = 0; I < Reinclude.size(); ++I) { |
| 556 | const auto *BB = Reinclude[I]; |
| 557 | FPI.reIncludeBB(BB: *BB); |
| 558 | if (I >= IncludeSuccessorsMark) |
| 559 | Reinclude.insert_range(R: successors(BB)); |
| 560 | } |
| 561 | |
| 562 | // For exclusion, we don't need to exclude the set of BBs that were successors |
| 563 | // before and are now unreachable, because we already did that at setup. For |
| 564 | // the rest, as long as a successor is unreachable, we want to explicitly |
| 565 | // exclude it. |
| 566 | const auto AlreadyExcludedMark = Unreachable.size(); |
| 567 | for (size_t I = 0; I < Unreachable.size(); ++I) { |
| 568 | const auto *U = Unreachable[I]; |
| 569 | if (I >= AlreadyExcludedMark) |
| 570 | FPI.updateForBB(BB: *U, Direction: -1); |
| 571 | for (const auto *Succ : successors(BB: U)) |
| 572 | if (!DT.isReachableFromEntry(A: Succ)) |
| 573 | Unreachable.insert(X: Succ); |
| 574 | } |
| 575 | |
| 576 | const auto &LI = FAM.getResult<LoopAnalysis>(IR&: const_cast<Function &>(Caller)); |
| 577 | FPI.updateAggregateStats(F: Caller, LI); |
| 578 | #ifdef EXPENSIVE_CHECKS |
| 579 | assert(isUpdateValid(Caller, FPI, FAM)); |
| 580 | #endif |
| 581 | } |
| 582 | |
| 583 | bool FunctionPropertiesUpdater::isUpdateValid(Function &F, |
| 584 | const FunctionPropertiesInfo &FPI, |
| 585 | FunctionAnalysisManager &FAM) { |
| 586 | if (!FAM.getResult<DominatorTreeAnalysis>(IR&: F).verify( |
| 587 | VL: DominatorTree::VerificationLevel::Full)) |
| 588 | return false; |
| 589 | DominatorTree DT(F); |
| 590 | LoopInfo LI(DT); |
| 591 | auto VocabResult = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F) |
| 592 | .getCachedResult<IR2VecVocabAnalysis>(IR&: *F.getParent()); |
| 593 | auto Fresh = |
| 594 | FunctionPropertiesInfo::getFunctionPropertiesInfo(F, DT, LI, VocabResult); |
| 595 | return FPI == Fresh; |
| 596 | } |
| 597 | |