| 1 | //===- CodeMetrics.cpp - Code cost measurements ---------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements code cost measurement utilities. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "llvm/Analysis/CodeMetrics.h" |
| 14 | #include "llvm/ADT/SmallPtrSet.h" |
| 15 | #include "llvm/Analysis/AssumptionCache.h" |
| 16 | #include "llvm/Analysis/LoopInfo.h" |
| 17 | #include "llvm/Analysis/TargetTransformInfo.h" |
| 18 | #include "llvm/IR/Function.h" |
| 19 | #include "llvm/IR/IntrinsicInst.h" |
| 20 | #include "llvm/Support/Debug.h" |
| 21 | #include "llvm/Support/InstructionCost.h" |
| 22 | |
| 23 | #define DEBUG_TYPE "code-metrics" |
| 24 | |
| 25 | using namespace llvm; |
| 26 | |
| 27 | static void |
| 28 | appendSpeculatableOperands(const Value *V, |
| 29 | SmallPtrSetImpl<const Value *> &Visited, |
| 30 | SmallVectorImpl<const Value *> &Worklist) { |
| 31 | const User *U = dyn_cast<User>(Val: V); |
| 32 | if (!U) |
| 33 | return; |
| 34 | |
| 35 | for (const Value *Operand : U->operands()) |
| 36 | if (Visited.insert(Ptr: Operand).second) |
| 37 | if (const auto *I = dyn_cast<Instruction>(Val: Operand)) |
| 38 | if (!I->mayHaveSideEffects() && !I->isTerminator()) |
| 39 | Worklist.push_back(Elt: I); |
| 40 | } |
| 41 | |
| 42 | static void completeEphemeralValues(SmallPtrSetImpl<const Value *> &Visited, |
| 43 | SmallVectorImpl<const Value *> &Worklist, |
| 44 | SmallPtrSetImpl<const Value *> &EphValues) { |
| 45 | // Note: We don't speculate PHIs here, so we'll miss instruction chains kept |
| 46 | // alive only by ephemeral values. |
| 47 | |
| 48 | // Walk the worklist using an index but without caching the size so we can |
| 49 | // append more entries as we process the worklist. This forms a queue without |
| 50 | // quadratic behavior by just leaving processed nodes at the head of the |
| 51 | // worklist forever. |
| 52 | for (int i = 0; i < (int)Worklist.size(); ++i) { |
| 53 | const Value *V = Worklist[i]; |
| 54 | |
| 55 | assert(Visited.count(V) && |
| 56 | "Failed to add a worklist entry to our visited set!" ); |
| 57 | |
| 58 | // If all uses of this value are ephemeral, then so is this value. |
| 59 | if (!all_of(Range: V->users(), P: [&](const User *U) { return EphValues.count(Ptr: U); })) |
| 60 | continue; |
| 61 | |
| 62 | EphValues.insert(Ptr: V); |
| 63 | LLVM_DEBUG(dbgs() << "Ephemeral Value: " << *V << "\n" ); |
| 64 | |
| 65 | // Append any more operands to consider. |
| 66 | appendSpeculatableOperands(V, Visited, Worklist); |
| 67 | } |
| 68 | } |
| 69 | |
| 70 | // Find all ephemeral values. |
| 71 | void CodeMetrics::collectEphemeralValues( |
| 72 | const Loop *L, AssumptionCache *AC, |
| 73 | SmallPtrSetImpl<const Value *> &EphValues) { |
| 74 | SmallPtrSet<const Value *, 32> Visited; |
| 75 | SmallVector<const Value *, 16> Worklist; |
| 76 | |
| 77 | for (auto &AssumeVH : AC->assumptions()) { |
| 78 | if (!AssumeVH) |
| 79 | continue; |
| 80 | Instruction *I = cast<Instruction>(Val&: AssumeVH); |
| 81 | |
| 82 | // Filter out call sites outside of the loop so we don't do a function's |
| 83 | // worth of work for each of its loops (and, in the common case, ephemeral |
| 84 | // values in the loop are likely due to @llvm.assume calls in the loop). |
| 85 | if (!L->contains(BB: I->getParent())) |
| 86 | continue; |
| 87 | |
| 88 | if (EphValues.insert(Ptr: I).second) |
| 89 | appendSpeculatableOperands(V: I, Visited, Worklist); |
| 90 | } |
| 91 | |
| 92 | completeEphemeralValues(Visited, Worklist, EphValues); |
| 93 | } |
| 94 | |
| 95 | void CodeMetrics::collectEphemeralValues( |
| 96 | const Function *F, AssumptionCache *AC, |
| 97 | SmallPtrSetImpl<const Value *> &EphValues) { |
| 98 | SmallPtrSet<const Value *, 32> Visited; |
| 99 | SmallVector<const Value *, 16> Worklist; |
| 100 | |
| 101 | for (auto &AssumeVH : AC->assumptions()) { |
| 102 | if (!AssumeVH) |
| 103 | continue; |
| 104 | Instruction *I = cast<Instruction>(Val&: AssumeVH); |
| 105 | assert(I->getParent()->getParent() == F && |
| 106 | "Found assumption for the wrong function!" ); |
| 107 | |
| 108 | if (EphValues.insert(Ptr: I).second) |
| 109 | appendSpeculatableOperands(V: I, Visited, Worklist); |
| 110 | } |
| 111 | |
| 112 | completeEphemeralValues(Visited, Worklist, EphValues); |
| 113 | } |
| 114 | |
| 115 | static bool extendsConvergenceOutsideLoop(const Instruction &I, const Loop *L) { |
| 116 | if (!L) |
| 117 | return false; |
| 118 | if (!isa<ConvergenceControlInst>(Val: I)) |
| 119 | return false; |
| 120 | for (const auto *U : I.users()) { |
| 121 | if (!L->contains(Inst: cast<Instruction>(Val: U))) |
| 122 | return true; |
| 123 | } |
| 124 | return false; |
| 125 | } |
| 126 | |
| 127 | /// Fill in the current structure with information gleaned from the specified |
| 128 | /// block. |
| 129 | void CodeMetrics::analyzeBasicBlock( |
| 130 | const BasicBlock *BB, const TargetTransformInfo &TTI, |
| 131 | const SmallPtrSetImpl<const Value *> &EphValues, bool PrepareForLTO, |
| 132 | const Loop *L) { |
| 133 | ++NumBlocks; |
| 134 | InstructionCost NumInstsBeforeThisBB = NumInsts; |
| 135 | for (const Instruction &I : *BB) { |
| 136 | // Skip ephemeral values. |
| 137 | if (EphValues.count(Ptr: &I)) |
| 138 | continue; |
| 139 | |
| 140 | // Special handling for calls. |
| 141 | if (const auto *Call = dyn_cast<CallBase>(Val: &I)) { |
| 142 | if (const Function *F = Call->getCalledFunction()) { |
| 143 | bool IsLoweredToCall = TTI.isLoweredToCall(F); |
| 144 | // If a function is both internal and has a single use, then it is |
| 145 | // extremely likely to get inlined in the future (it was probably |
| 146 | // exposed by an interleaved devirtualization pass). |
| 147 | // When preparing for LTO, liberally consider calls as inline |
| 148 | // candidates. |
| 149 | if (!Call->isNoInline() && IsLoweredToCall && |
| 150 | ((F->hasInternalLinkage() && F->hasOneLiveUse()) || |
| 151 | PrepareForLTO)) { |
| 152 | ++NumInlineCandidates; |
| 153 | } |
| 154 | |
| 155 | // If this call is to function itself, then the function is recursive. |
| 156 | // Inlining it into other functions is a bad idea, because this is |
| 157 | // basically just a form of loop peeling, and our metrics aren't useful |
| 158 | // for that case. |
| 159 | if (F == BB->getParent()) |
| 160 | isRecursive = true; |
| 161 | |
| 162 | if (IsLoweredToCall) |
| 163 | ++NumCalls; |
| 164 | } else { |
| 165 | // We don't want inline asm to count as a call - that would prevent loop |
| 166 | // unrolling. The argument setup cost is still real, though. |
| 167 | if (!Call->isInlineAsm()) |
| 168 | ++NumCalls; |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | if (const AllocaInst *AI = dyn_cast<AllocaInst>(Val: &I)) { |
| 173 | if (!AI->isStaticAlloca()) |
| 174 | this->usesDynamicAlloca = true; |
| 175 | } |
| 176 | |
| 177 | if (isa<ExtractElementInst>(Val: I) || I.getType()->isVectorTy()) |
| 178 | ++NumVectorInsts; |
| 179 | |
| 180 | if (I.getType()->isTokenTy() && !isa<ConvergenceControlInst>(Val: I) && |
| 181 | I.isUsedOutsideOfBlock(BB)) { |
| 182 | LLVM_DEBUG(dbgs() << I |
| 183 | << "\n Cannot duplicate a token value used outside " |
| 184 | "the current block (except convergence control).\n" ); |
| 185 | notDuplicatable = true; |
| 186 | } |
| 187 | |
| 188 | if (const CallBase *CB = dyn_cast<CallBase>(Val: &I)) { |
| 189 | if (CB->cannotDuplicate()) |
| 190 | notDuplicatable = true; |
| 191 | // Compute a meet over the visited blocks for the following partial order: |
| 192 | // |
| 193 | // None -> { Controlled, ExtendedLoop, Uncontrolled} |
| 194 | // Controlled -> ExtendedLoop |
| 195 | if (Convergence <= ConvergenceKind::Controlled && CB->isConvergent()) { |
| 196 | if (isa<ConvergenceControlInst>(Val: CB) || |
| 197 | CB->getConvergenceControlToken()) { |
| 198 | assert(Convergence != ConvergenceKind::Uncontrolled); |
| 199 | LLVM_DEBUG(dbgs() << "Found controlled convergence:\n" << I << "\n" ); |
| 200 | if (extendsConvergenceOutsideLoop(I, L)) |
| 201 | Convergence = ConvergenceKind::ExtendedLoop; |
| 202 | else { |
| 203 | assert(Convergence != ConvergenceKind::ExtendedLoop); |
| 204 | Convergence = ConvergenceKind::Controlled; |
| 205 | } |
| 206 | } else { |
| 207 | assert(Convergence == ConvergenceKind::None); |
| 208 | Convergence = ConvergenceKind::Uncontrolled; |
| 209 | } |
| 210 | } |
| 211 | } |
| 212 | |
| 213 | NumInsts += TTI.getInstructionCost(U: &I, CostKind: TargetTransformInfo::TCK_CodeSize); |
| 214 | } |
| 215 | |
| 216 | if (isa<ReturnInst>(Val: BB->getTerminator())) |
| 217 | ++NumRets; |
| 218 | |
| 219 | // We never want to inline functions that contain an indirectbr. This is |
| 220 | // incorrect because all the blockaddress's (in static global initializers |
| 221 | // for example) would be referring to the original function, and this indirect |
| 222 | // jump would jump from the inlined copy of the function into the original |
| 223 | // function which is extremely undefined behavior. |
| 224 | // FIXME: This logic isn't really right; we can safely inline functions |
| 225 | // with indirectbr's as long as no other function or global references the |
| 226 | // blockaddress of a block within the current function. And as a QOI issue, |
| 227 | // if someone is using a blockaddress without an indirectbr, and that |
| 228 | // reference somehow ends up in another function or global, we probably |
| 229 | // don't want to inline this function. |
| 230 | notDuplicatable |= isa<IndirectBrInst>(Val: BB->getTerminator()); |
| 231 | |
| 232 | // Remember NumInsts for this BB. |
| 233 | InstructionCost NumInstsThisBB = NumInsts - NumInstsBeforeThisBB; |
| 234 | NumBBInsts[BB] = NumInstsThisBB; |
| 235 | } |
| 236 | |