| 1 | //===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===// | 
|---|
| 2 | // | 
|---|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|---|
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|---|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|---|
| 6 | // | 
|---|
| 7 | //===----------------------------------------------------------------------===// | 
|---|
| 8 | // | 
|---|
| 9 | // This pass promotes "by reference" arguments to be "by value" arguments.  In | 
|---|
| 10 | // practice, this means looking for internal functions that have pointer | 
|---|
| 11 | // arguments.  If it can prove, through the use of alias analysis, that an | 
|---|
| 12 | // argument is *only* loaded, then it can pass the value into the function | 
|---|
| 13 | // instead of the address of the value.  This can cause recursive simplification | 
|---|
| 14 | // of code and lead to the elimination of allocas (especially in C++ template | 
|---|
| 15 | // code like the STL). | 
|---|
| 16 | // | 
|---|
| 17 | // This pass also handles aggregate arguments that are passed into a function, | 
|---|
| 18 | // scalarizing them if the elements of the aggregate are only loaded.  Note that | 
|---|
| 19 | // by default it refuses to scalarize aggregates which would require passing in | 
|---|
| 20 | // more than three operands to the function, because passing thousands of | 
|---|
| 21 | // operands for a large array or structure is unprofitable! This limit can be | 
|---|
| 22 | // configured or disabled, however. | 
|---|
| 23 | // | 
|---|
| 24 | // Note that this transformation could also be done for arguments that are only | 
|---|
| 25 | // stored to (returning the value instead), but does not currently.  This case | 
|---|
| 26 | // would be best handled when and if LLVM begins supporting multiple return | 
|---|
| 27 | // values from functions. | 
|---|
| 28 | // | 
|---|
| 29 | //===----------------------------------------------------------------------===// | 
|---|
| 30 |  | 
|---|
| 31 | #include "llvm/Transforms/IPO/ArgumentPromotion.h" | 
|---|
| 32 |  | 
|---|
| 33 | #include "llvm/ADT/DepthFirstIterator.h" | 
|---|
| 34 | #include "llvm/ADT/STLExtras.h" | 
|---|
| 35 | #include "llvm/ADT/ScopeExit.h" | 
|---|
| 36 | #include "llvm/ADT/SmallPtrSet.h" | 
|---|
| 37 | #include "llvm/ADT/SmallVector.h" | 
|---|
| 38 | #include "llvm/ADT/Statistic.h" | 
|---|
| 39 | #include "llvm/ADT/Twine.h" | 
|---|
| 40 | #include "llvm/Analysis/AssumptionCache.h" | 
|---|
| 41 | #include "llvm/Analysis/BasicAliasAnalysis.h" | 
|---|
| 42 | #include "llvm/Analysis/CallGraph.h" | 
|---|
| 43 | #include "llvm/Analysis/Loads.h" | 
|---|
| 44 | #include "llvm/Analysis/MemoryLocation.h" | 
|---|
| 45 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" | 
|---|
| 46 | #include "llvm/Analysis/TargetTransformInfo.h" | 
|---|
| 47 | #include "llvm/Analysis/ValueTracking.h" | 
|---|
| 48 | #include "llvm/IR/Argument.h" | 
|---|
| 49 | #include "llvm/IR/Attributes.h" | 
|---|
| 50 | #include "llvm/IR/BasicBlock.h" | 
|---|
| 51 | #include "llvm/IR/CFG.h" | 
|---|
| 52 | #include "llvm/IR/Constants.h" | 
|---|
| 53 | #include "llvm/IR/DataLayout.h" | 
|---|
| 54 | #include "llvm/IR/DerivedTypes.h" | 
|---|
| 55 | #include "llvm/IR/Dominators.h" | 
|---|
| 56 | #include "llvm/IR/Function.h" | 
|---|
| 57 | #include "llvm/IR/IRBuilder.h" | 
|---|
| 58 | #include "llvm/IR/InstrTypes.h" | 
|---|
| 59 | #include "llvm/IR/Instruction.h" | 
|---|
| 60 | #include "llvm/IR/Instructions.h" | 
|---|
| 61 | #include "llvm/IR/Metadata.h" | 
|---|
| 62 | #include "llvm/IR/Module.h" | 
|---|
| 63 | #include "llvm/IR/NoFolder.h" | 
|---|
| 64 | #include "llvm/IR/PassManager.h" | 
|---|
| 65 | #include "llvm/IR/Type.h" | 
|---|
| 66 | #include "llvm/IR/Use.h" | 
|---|
| 67 | #include "llvm/IR/User.h" | 
|---|
| 68 | #include "llvm/IR/Value.h" | 
|---|
| 69 | #include "llvm/Support/Casting.h" | 
|---|
| 70 | #include "llvm/Support/Debug.h" | 
|---|
| 71 | #include "llvm/Support/raw_ostream.h" | 
|---|
| 72 | #include "llvm/Transforms/Utils/Local.h" | 
|---|
| 73 | #include "llvm/Transforms/Utils/PromoteMemToReg.h" | 
|---|
| 74 | #include <algorithm> | 
|---|
| 75 | #include <cassert> | 
|---|
| 76 | #include <cstdint> | 
|---|
| 77 | #include <utility> | 
|---|
| 78 | #include <vector> | 
|---|
| 79 |  | 
|---|
| 80 | using namespace llvm; | 
|---|
| 81 |  | 
|---|
| 82 | #define DEBUG_TYPE "argpromotion" | 
|---|
| 83 |  | 
|---|
| 84 | STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted"); | 
|---|
| 85 | STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated"); | 
|---|
| 86 |  | 
|---|
| 87 | namespace { | 
|---|
| 88 |  | 
|---|
| 89 | struct ArgPart { | 
|---|
| 90 | Type *Ty; | 
|---|
| 91 | Align Alignment; | 
|---|
| 92 | /// A representative guaranteed-executed load or store instruction for use by | 
|---|
| 93 | /// metadata transfer. | 
|---|
| 94 | Instruction *MustExecInstr; | 
|---|
| 95 | }; | 
|---|
| 96 |  | 
|---|
| 97 | using OffsetAndArgPart = std::pair<int64_t, ArgPart>; | 
|---|
| 98 |  | 
|---|
| 99 | } // end anonymous namespace | 
|---|
| 100 |  | 
|---|
| 101 | static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL, | 
|---|
| 102 | Value *Ptr, Type *ResElemTy, int64_t Offset) { | 
|---|
| 103 | if (Offset != 0) { | 
|---|
| 104 | APInt APOffset(DL.getIndexTypeSizeInBits(Ty: Ptr->getType()), Offset, | 
|---|
| 105 | /*isSigned=*/true); | 
|---|
| 106 | Ptr = IRB.CreatePtrAdd(Ptr, Offset: IRB.getInt(AI: APOffset)); | 
|---|
| 107 | } | 
|---|
| 108 | return Ptr; | 
|---|
| 109 | } | 
|---|
| 110 |  | 
|---|
| 111 | /// DoPromotion - This method actually performs the promotion of the specified | 
|---|
| 112 | /// arguments, and returns the new function.  At this point, we know that it's | 
|---|
| 113 | /// safe to do so. | 
|---|
| 114 | static Function * | 
|---|
| 115 | doPromotion(Function *F, FunctionAnalysisManager &FAM, | 
|---|
| 116 | const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> | 
|---|
| 117 | &ArgsToPromote) { | 
|---|
| 118 | // Start by computing a new prototype for the function, which is the same as | 
|---|
| 119 | // the old function, but has modified arguments. | 
|---|
| 120 | FunctionType *FTy = F->getFunctionType(); | 
|---|
| 121 | std::vector<Type *> Params; | 
|---|
| 122 |  | 
|---|
| 123 | // Attribute - Keep track of the parameter attributes for the arguments | 
|---|
| 124 | // that we are *not* promoting. For the ones that we do promote, the parameter | 
|---|
| 125 | // attributes are lost | 
|---|
| 126 | SmallVector<AttributeSet, 8> ArgAttrVec; | 
|---|
| 127 | // Mapping from old to new argument indices. -1 for promoted or removed | 
|---|
| 128 | // arguments. | 
|---|
| 129 | SmallVector<unsigned> NewArgIndices; | 
|---|
| 130 | AttributeList PAL = F->getAttributes(); | 
|---|
| 131 | OptimizationRemarkEmitter ORE(F); | 
|---|
| 132 |  | 
|---|
| 133 | // First, determine the new argument list | 
|---|
| 134 | unsigned ArgNo = 0, NewArgNo = 0; | 
|---|
| 135 | for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; | 
|---|
| 136 | ++I, ++ArgNo) { | 
|---|
| 137 | auto It = ArgsToPromote.find(Val: &*I); | 
|---|
| 138 | if (It == ArgsToPromote.end()) { | 
|---|
| 139 | // Unchanged argument | 
|---|
| 140 | Params.push_back(x: I->getType()); | 
|---|
| 141 | ArgAttrVec.push_back(Elt: PAL.getParamAttrs(ArgNo)); | 
|---|
| 142 | NewArgIndices.push_back(Elt: NewArgNo++); | 
|---|
| 143 | } else if (I->use_empty()) { | 
|---|
| 144 | // Dead argument (which are always marked as promotable) | 
|---|
| 145 | ++NumArgumentsDead; | 
|---|
| 146 | ORE.emit(RemarkBuilder: [&]() { | 
|---|
| 147 | return OptimizationRemark(DEBUG_TYPE, "ArgumentRemoved", F) | 
|---|
| 148 | << "eliminating argument "<< ore::NV( "ArgName", I->getName()) | 
|---|
| 149 | << "("<< ore::NV( "ArgIndex", ArgNo) << ")"; | 
|---|
| 150 | }); | 
|---|
| 151 |  | 
|---|
| 152 | NewArgIndices.push_back(Elt: (unsigned)-1); | 
|---|
| 153 | } else { | 
|---|
| 154 | const auto &ArgParts = It->second; | 
|---|
| 155 | for (const auto &Pair : ArgParts) { | 
|---|
| 156 | Params.push_back(x: Pair.second.Ty); | 
|---|
| 157 | ArgAttrVec.push_back(Elt: AttributeSet()); | 
|---|
| 158 | } | 
|---|
| 159 | ++NumArgumentsPromoted; | 
|---|
| 160 | ORE.emit(RemarkBuilder: [&]() { | 
|---|
| 161 | return OptimizationRemark(DEBUG_TYPE, "ArgumentPromoted", F) | 
|---|
| 162 | << "promoting argument "<< ore::NV( "ArgName", I->getName()) | 
|---|
| 163 | << "("<< ore::NV( "ArgIndex", ArgNo) << ")" | 
|---|
| 164 | << " to pass by value"; | 
|---|
| 165 | }); | 
|---|
| 166 |  | 
|---|
| 167 | NewArgIndices.push_back(Elt: (unsigned)-1); | 
|---|
| 168 | NewArgNo += ArgParts.size(); | 
|---|
| 169 | } | 
|---|
| 170 | } | 
|---|
| 171 |  | 
|---|
| 172 | Type *RetTy = FTy->getReturnType(); | 
|---|
| 173 |  | 
|---|
| 174 | // Construct the new function type using the new arguments. | 
|---|
| 175 | FunctionType *NFTy = FunctionType::get(Result: RetTy, Params, isVarArg: FTy->isVarArg()); | 
|---|
| 176 |  | 
|---|
| 177 | // Create the new function body and insert it into the module. | 
|---|
| 178 | Function *NF = Function::Create(Ty: NFTy, Linkage: F->getLinkage(), AddrSpace: F->getAddressSpace(), | 
|---|
| 179 | N: F->getName()); | 
|---|
| 180 | NF->copyAttributesFrom(Src: F); | 
|---|
| 181 | NF->copyMetadata(Src: F, Offset: 0); | 
|---|
| 182 |  | 
|---|
| 183 | // The new function will have the !dbg metadata copied from the original | 
|---|
| 184 | // function. The original function may not be deleted, and dbg metadata need | 
|---|
| 185 | // to be unique, so we need to drop it. | 
|---|
| 186 | F->setSubprogram(nullptr); | 
|---|
| 187 |  | 
|---|
| 188 | LLVM_DEBUG(dbgs() << "ARG PROMOTION:  Promoting to:"<< *NF << "\n" | 
|---|
| 189 | << "From: "<< *F); | 
|---|
| 190 |  | 
|---|
| 191 | uint64_t LargestVectorWidth = 0; | 
|---|
| 192 | for (auto *I : Params) | 
|---|
| 193 | if (auto *VT = dyn_cast<llvm::VectorType>(Val: I)) | 
|---|
| 194 | LargestVectorWidth = std::max( | 
|---|
| 195 | a: LargestVectorWidth, b: VT->getPrimitiveSizeInBits().getKnownMinValue()); | 
|---|
| 196 |  | 
|---|
| 197 | // Recompute the parameter attributes list based on the new arguments for | 
|---|
| 198 | // the function. | 
|---|
| 199 | NF->setAttributes(AttributeList::get(C&: F->getContext(), FnAttrs: PAL.getFnAttrs(), | 
|---|
| 200 | RetAttrs: PAL.getRetAttrs(), ArgAttrs: ArgAttrVec)); | 
|---|
| 201 |  | 
|---|
| 202 | // Remap argument indices in allocsize attribute. | 
|---|
| 203 | if (auto AllocSize = NF->getAttributes().getFnAttrs().getAllocSizeArgs()) { | 
|---|
| 204 | unsigned Arg1 = NewArgIndices[AllocSize->first]; | 
|---|
| 205 | assert(Arg1 != (unsigned)-1 && "allocsize cannot be promoted argument"); | 
|---|
| 206 | std::optional<unsigned> Arg2; | 
|---|
| 207 | if (AllocSize->second) { | 
|---|
| 208 | Arg2 = NewArgIndices[*AllocSize->second]; | 
|---|
| 209 | assert(Arg2 != (unsigned)-1 && "allocsize cannot be promoted argument"); | 
|---|
| 210 | } | 
|---|
| 211 | NF->addFnAttr(Attr: Attribute::getWithAllocSizeArgs(Context&: F->getContext(), ElemSizeArg: Arg1, NumElemsArg: Arg2)); | 
|---|
| 212 | } | 
|---|
| 213 |  | 
|---|
| 214 | AttributeFuncs::updateMinLegalVectorWidthAttr(Fn&: *NF, Width: LargestVectorWidth); | 
|---|
| 215 | ArgAttrVec.clear(); | 
|---|
| 216 |  | 
|---|
| 217 | F->getParent()->getFunctionList().insert(where: F->getIterator(), New: NF); | 
|---|
| 218 | NF->takeName(V: F); | 
|---|
| 219 |  | 
|---|
| 220 | // Loop over all the callers of the function, transforming the call sites to | 
|---|
| 221 | // pass in the loaded pointers. | 
|---|
| 222 | SmallVector<Value *, 16> Args; | 
|---|
| 223 | const DataLayout &DL = F->getDataLayout(); | 
|---|
| 224 | SmallVector<WeakTrackingVH, 16> DeadArgs; | 
|---|
| 225 |  | 
|---|
| 226 | while (!F->use_empty()) { | 
|---|
| 227 | CallBase &CB = cast<CallBase>(Val&: *F->user_back()); | 
|---|
| 228 | assert(CB.getCalledFunction() == F); | 
|---|
| 229 | const AttributeList &CallPAL = CB.getAttributes(); | 
|---|
| 230 | IRBuilder<NoFolder> IRB(&CB); | 
|---|
| 231 |  | 
|---|
| 232 | // Loop over the operands, inserting GEP and loads in the caller as | 
|---|
| 233 | // appropriate. | 
|---|
| 234 | auto *AI = CB.arg_begin(); | 
|---|
| 235 | ArgNo = 0; | 
|---|
| 236 | for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; | 
|---|
| 237 | ++I, ++AI, ++ArgNo) { | 
|---|
| 238 | auto ArgIt = ArgsToPromote.find(Val: &*I); | 
|---|
| 239 | if (ArgIt == ArgsToPromote.end()) { | 
|---|
| 240 | Args.push_back(Elt: *AI); // Unmodified argument | 
|---|
| 241 | ArgAttrVec.push_back(Elt: CallPAL.getParamAttrs(ArgNo)); | 
|---|
| 242 | } else if (!I->use_empty()) { | 
|---|
| 243 | Value *V = *AI; | 
|---|
| 244 | for (const auto &Pair : ArgIt->second) { | 
|---|
| 245 | LoadInst *LI = IRB.CreateAlignedLoad( | 
|---|
| 246 | Ty: Pair.second.Ty, | 
|---|
| 247 | Ptr: createByteGEP(IRB, DL, Ptr: V, ResElemTy: Pair.second.Ty, Offset: Pair.first), | 
|---|
| 248 | Align: Pair.second.Alignment, Name: V->getName() + ".val"); | 
|---|
| 249 | if (Pair.second.MustExecInstr) { | 
|---|
| 250 | LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata()); | 
|---|
| 251 | LI->copyMetadata(SrcInst: *Pair.second.MustExecInstr, | 
|---|
| 252 | WL: {LLVMContext::MD_dereferenceable, | 
|---|
| 253 | LLVMContext::MD_dereferenceable_or_null, | 
|---|
| 254 | LLVMContext::MD_noundef, | 
|---|
| 255 | LLVMContext::MD_nontemporal}); | 
|---|
| 256 | // Only transfer poison-generating metadata if we also have | 
|---|
| 257 | // !noundef. | 
|---|
| 258 | // TODO: Without !noundef, we could merge this metadata across | 
|---|
| 259 | // all promoted loads. | 
|---|
| 260 | if (LI->hasMetadata(KindID: LLVMContext::MD_noundef)) | 
|---|
| 261 | LI->copyMetadata(SrcInst: *Pair.second.MustExecInstr, | 
|---|
| 262 | WL: Metadata::PoisonGeneratingIDs); | 
|---|
| 263 | } | 
|---|
| 264 | Args.push_back(Elt: LI); | 
|---|
| 265 | ArgAttrVec.push_back(Elt: AttributeSet()); | 
|---|
| 266 | } | 
|---|
| 267 | } else { | 
|---|
| 268 | assert(I->use_empty()); | 
|---|
| 269 | DeadArgs.emplace_back(Args: AI->get()); | 
|---|
| 270 | } | 
|---|
| 271 | } | 
|---|
| 272 |  | 
|---|
| 273 | // Push any varargs arguments on the list. | 
|---|
| 274 | for (; AI != CB.arg_end(); ++AI, ++ArgNo) { | 
|---|
| 275 | Args.push_back(Elt: *AI); | 
|---|
| 276 | ArgAttrVec.push_back(Elt: CallPAL.getParamAttrs(ArgNo)); | 
|---|
| 277 | } | 
|---|
| 278 |  | 
|---|
| 279 | SmallVector<OperandBundleDef, 1> OpBundles; | 
|---|
| 280 | CB.getOperandBundlesAsDefs(Defs&: OpBundles); | 
|---|
| 281 |  | 
|---|
| 282 | CallBase *NewCS = nullptr; | 
|---|
| 283 | if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) { | 
|---|
| 284 | NewCS = InvokeInst::Create(Func: NF, IfNormal: II->getNormalDest(), IfException: II->getUnwindDest(), | 
|---|
| 285 | Args, Bundles: OpBundles, NameStr: "", InsertBefore: CB.getIterator()); | 
|---|
| 286 | } else { | 
|---|
| 287 | auto *NewCall = | 
|---|
| 288 | CallInst::Create(Func: NF, Args, Bundles: OpBundles, NameStr: "", InsertBefore: CB.getIterator()); | 
|---|
| 289 | NewCall->setTailCallKind(cast<CallInst>(Val: &CB)->getTailCallKind()); | 
|---|
| 290 | NewCS = NewCall; | 
|---|
| 291 | } | 
|---|
| 292 | NewCS->setCallingConv(CB.getCallingConv()); | 
|---|
| 293 | NewCS->setAttributes(AttributeList::get(C&: F->getContext(), | 
|---|
| 294 | FnAttrs: CallPAL.getFnAttrs(), | 
|---|
| 295 | RetAttrs: CallPAL.getRetAttrs(), ArgAttrs: ArgAttrVec)); | 
|---|
| 296 | NewCS->copyMetadata(SrcInst: CB, WL: {LLVMContext::MD_prof, LLVMContext::MD_dbg}); | 
|---|
| 297 | Args.clear(); | 
|---|
| 298 | ArgAttrVec.clear(); | 
|---|
| 299 |  | 
|---|
| 300 | AttributeFuncs::updateMinLegalVectorWidthAttr(Fn&: *CB.getCaller(), | 
|---|
| 301 | Width: LargestVectorWidth); | 
|---|
| 302 |  | 
|---|
| 303 | if (!CB.use_empty()) { | 
|---|
| 304 | CB.replaceAllUsesWith(V: NewCS); | 
|---|
| 305 | NewCS->takeName(V: &CB); | 
|---|
| 306 | } | 
|---|
| 307 |  | 
|---|
| 308 | // Finally, remove the old call from the program, reducing the use-count of | 
|---|
| 309 | // F. | 
|---|
| 310 | CB.eraseFromParent(); | 
|---|
| 311 | } | 
|---|
| 312 |  | 
|---|
| 313 | RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts&: DeadArgs); | 
|---|
| 314 |  | 
|---|
| 315 | // Since we have now created the new function, splice the body of the old | 
|---|
| 316 | // function right into the new function, leaving the old rotting hulk of the | 
|---|
| 317 | // function empty. | 
|---|
| 318 | NF->splice(ToIt: NF->begin(), FromF: F); | 
|---|
| 319 |  | 
|---|
| 320 | // We will collect all the new created allocas to promote them into registers | 
|---|
| 321 | // after the following loop | 
|---|
| 322 | SmallVector<AllocaInst *, 4> Allocas; | 
|---|
| 323 |  | 
|---|
| 324 | // Loop over the argument list, transferring uses of the old arguments over to | 
|---|
| 325 | // the new arguments, also transferring over the names as well. | 
|---|
| 326 | Function::arg_iterator I2 = NF->arg_begin(); | 
|---|
| 327 | for (Argument &Arg : F->args()) { | 
|---|
| 328 | if (!ArgsToPromote.count(Val: &Arg)) { | 
|---|
| 329 | // If this is an unmodified argument, move the name and users over to the | 
|---|
| 330 | // new version. | 
|---|
| 331 | Arg.replaceAllUsesWith(V: &*I2); | 
|---|
| 332 | I2->takeName(V: &Arg); | 
|---|
| 333 | ++I2; | 
|---|
| 334 | continue; | 
|---|
| 335 | } | 
|---|
| 336 |  | 
|---|
| 337 | // There potentially are metadata uses for things like llvm.dbg.value. | 
|---|
| 338 | // Replace them with poison, after handling the other regular uses. | 
|---|
| 339 | auto RauwPoisonMetadata = make_scope_exit( | 
|---|
| 340 | F: [&]() { Arg.replaceAllUsesWith(V: PoisonValue::get(T: Arg.getType())); }); | 
|---|
| 341 |  | 
|---|
| 342 | if (Arg.use_empty()) | 
|---|
| 343 | continue; | 
|---|
| 344 |  | 
|---|
| 345 | // Otherwise, if we promoted this argument, we have to create an alloca in | 
|---|
| 346 | // the callee for every promotable part and store each of the new incoming | 
|---|
| 347 | // arguments into the corresponding alloca, what lets the old code (the | 
|---|
| 348 | // store instructions if they are allowed especially) a chance to work as | 
|---|
| 349 | // before. | 
|---|
| 350 | assert(Arg.getType()->isPointerTy() && | 
|---|
| 351 | "Only arguments with a pointer type are promotable"); | 
|---|
| 352 |  | 
|---|
| 353 | IRBuilder<NoFolder> IRB(&NF->begin()->front()); | 
|---|
| 354 |  | 
|---|
| 355 | // Add only the promoted elements, so parts from ArgsToPromote | 
|---|
| 356 | SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca; | 
|---|
| 357 | for (const auto &Pair : ArgsToPromote.find(Val: &Arg)->second) { | 
|---|
| 358 | int64_t Offset = Pair.first; | 
|---|
| 359 | const ArgPart &Part = Pair.second; | 
|---|
| 360 |  | 
|---|
| 361 | Argument *NewArg = I2++; | 
|---|
| 362 | NewArg->setName(Arg.getName() + "."+ Twine(Offset) + ".val"); | 
|---|
| 363 |  | 
|---|
| 364 | AllocaInst *NewAlloca = IRB.CreateAlloca( | 
|---|
| 365 | Ty: Part.Ty, ArraySize: nullptr, Name: Arg.getName() + "."+ Twine(Offset) + ".allc"); | 
|---|
| 366 | NewAlloca->setAlignment(Pair.second.Alignment); | 
|---|
| 367 | IRB.CreateAlignedStore(Val: NewArg, Ptr: NewAlloca, Align: Pair.second.Alignment); | 
|---|
| 368 |  | 
|---|
| 369 | // Collect the alloca to retarget the users to | 
|---|
| 370 | OffsetToAlloca.insert(KV: {Offset, NewAlloca}); | 
|---|
| 371 | } | 
|---|
| 372 |  | 
|---|
| 373 | auto GetAlloca = [&](Value *Ptr) { | 
|---|
| 374 | APInt Offset(DL.getIndexTypeSizeInBits(Ty: Ptr->getType()), 0); | 
|---|
| 375 | Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, | 
|---|
| 376 | /* AllowNonInbounds */ true); | 
|---|
| 377 | assert(Ptr == &Arg && "Not constant offset from arg?"); | 
|---|
| 378 | return OffsetToAlloca.lookup(Val: Offset.getSExtValue()); | 
|---|
| 379 | }; | 
|---|
| 380 |  | 
|---|
| 381 | // Cleanup the code from the dead instructions: GEPs and BitCasts in between | 
|---|
| 382 | // the original argument and its users: loads and stores. Retarget every | 
|---|
| 383 | // user to the new created alloca. | 
|---|
| 384 | SmallVector<Value *, 16> Worklist(Arg.users()); | 
|---|
| 385 | SmallVector<Instruction *, 16> DeadInsts; | 
|---|
| 386 | while (!Worklist.empty()) { | 
|---|
| 387 | Value *V = Worklist.pop_back_val(); | 
|---|
| 388 | if (isa<GetElementPtrInst>(Val: V)) { | 
|---|
| 389 | DeadInsts.push_back(Elt: cast<Instruction>(Val: V)); | 
|---|
| 390 | append_range(C&: Worklist, R: V->users()); | 
|---|
| 391 | continue; | 
|---|
| 392 | } | 
|---|
| 393 |  | 
|---|
| 394 | if (auto *LI = dyn_cast<LoadInst>(Val: V)) { | 
|---|
| 395 | Value *Ptr = LI->getPointerOperand(); | 
|---|
| 396 | LI->setOperand(i_nocapture: LoadInst::getPointerOperandIndex(), Val_nocapture: GetAlloca(Ptr)); | 
|---|
| 397 | continue; | 
|---|
| 398 | } | 
|---|
| 399 |  | 
|---|
| 400 | if (auto *SI = dyn_cast<StoreInst>(Val: V)) { | 
|---|
| 401 | assert(!SI->isVolatile() && "Volatile operations can't be promoted."); | 
|---|
| 402 | Value *Ptr = SI->getPointerOperand(); | 
|---|
| 403 | SI->setOperand(i_nocapture: StoreInst::getPointerOperandIndex(), Val_nocapture: GetAlloca(Ptr)); | 
|---|
| 404 | continue; | 
|---|
| 405 | } | 
|---|
| 406 |  | 
|---|
| 407 | llvm_unreachable( "Unexpected user"); | 
|---|
| 408 | } | 
|---|
| 409 |  | 
|---|
| 410 | for (Instruction *I : DeadInsts) { | 
|---|
| 411 | I->replaceAllUsesWith(V: PoisonValue::get(T: I->getType())); | 
|---|
| 412 | I->eraseFromParent(); | 
|---|
| 413 | } | 
|---|
| 414 |  | 
|---|
| 415 | // Collect the allocas for promotion | 
|---|
| 416 | for (const auto &Pair : OffsetToAlloca) { | 
|---|
| 417 | assert(isAllocaPromotable(Pair.second) && | 
|---|
| 418 | "By design, only promotable allocas should be produced."); | 
|---|
| 419 | Allocas.push_back(Elt: Pair.second); | 
|---|
| 420 | } | 
|---|
| 421 | } | 
|---|
| 422 |  | 
|---|
| 423 | LLVM_DEBUG(dbgs() << "ARG PROMOTION: "<< Allocas.size() | 
|---|
| 424 | << " alloca(s) are promotable by Mem2Reg\n"); | 
|---|
| 425 |  | 
|---|
| 426 | if (!Allocas.empty()) { | 
|---|
| 427 | // And we are able to call the `promoteMemoryToRegister()` function. | 
|---|
| 428 | // Our earlier checks have ensured that PromoteMemToReg() will | 
|---|
| 429 | // succeed. | 
|---|
| 430 | auto &DT = FAM.getResult<DominatorTreeAnalysis>(IR&: *NF); | 
|---|
| 431 | auto &AC = FAM.getResult<AssumptionAnalysis>(IR&: *NF); | 
|---|
| 432 | PromoteMemToReg(Allocas, DT, AC: &AC); | 
|---|
| 433 | } | 
|---|
| 434 |  | 
|---|
| 435 | return NF; | 
|---|
| 436 | } | 
|---|
| 437 |  | 
|---|
| 438 | /// Return true if we can prove that all callees pass in a valid pointer for the | 
|---|
| 439 | /// specified function argument. | 
|---|
| 440 | static bool allCallersPassValidPointerForArgument( | 
|---|
| 441 | Argument *Arg, SmallPtrSetImpl<CallBase *> &RecursiveCalls, | 
|---|
| 442 | Align NeededAlign, uint64_t NeededDerefBytes) { | 
|---|
| 443 | Function *Callee = Arg->getParent(); | 
|---|
| 444 | const DataLayout &DL = Callee->getDataLayout(); | 
|---|
| 445 | APInt Bytes(64, NeededDerefBytes); | 
|---|
| 446 |  | 
|---|
| 447 | // Check if the argument itself is marked dereferenceable and aligned. | 
|---|
| 448 | if (isDereferenceableAndAlignedPointer(V: Arg, Alignment: NeededAlign, Size: Bytes, DL)) | 
|---|
| 449 | return true; | 
|---|
| 450 |  | 
|---|
| 451 | // Look at all call sites of the function.  At this point we know we only have | 
|---|
| 452 | // direct callees. | 
|---|
| 453 | return all_of(Range: Callee->users(), P: [&](User *U) { | 
|---|
| 454 | CallBase &CB = cast<CallBase>(Val&: *U); | 
|---|
| 455 | // In case of functions with recursive calls, this check | 
|---|
| 456 | // (isDereferenceableAndAlignedPointer) will fail when it tries to look at | 
|---|
| 457 | // the first caller of this function. The caller may or may not have a load, | 
|---|
| 458 | // incase it doesn't load the pointer being passed, this check will fail. | 
|---|
| 459 | // So, it's safe to skip the check incase we know that we are dealing with a | 
|---|
| 460 | // recursive call. For example we have a IR given below. | 
|---|
| 461 | // | 
|---|
| 462 | // def fun(ptr %a) { | 
|---|
| 463 | //   ... | 
|---|
| 464 | //   %loadres = load i32, ptr %a, align 4 | 
|---|
| 465 | //   %res = call i32 @fun(ptr %a) | 
|---|
| 466 | //   ... | 
|---|
| 467 | // } | 
|---|
| 468 | // | 
|---|
| 469 | // def bar(ptr %x) { | 
|---|
| 470 | //   ... | 
|---|
| 471 | //   %resbar = call i32 @fun(ptr %x) | 
|---|
| 472 | //   ... | 
|---|
| 473 | // } | 
|---|
| 474 | // | 
|---|
| 475 | // Since we record processed recursive calls, we check if the current | 
|---|
| 476 | // CallBase has been processed before. If yes it means that it is a | 
|---|
| 477 | // recursive call and we can skip the check just for this call. So, just | 
|---|
| 478 | // return true. | 
|---|
| 479 | if (RecursiveCalls.contains(Ptr: &CB)) | 
|---|
| 480 | return true; | 
|---|
| 481 |  | 
|---|
| 482 | return isDereferenceableAndAlignedPointer(V: CB.getArgOperand(i: Arg->getArgNo()), | 
|---|
| 483 | Alignment: NeededAlign, Size: Bytes, DL); | 
|---|
| 484 | }); | 
|---|
| 485 | } | 
|---|
| 486 |  | 
|---|
| 487 | // Try to prove that all Calls to F do not modify the memory pointed to by Arg, | 
|---|
| 488 | // using alias analysis local to each caller of F. | 
|---|
| 489 | static bool isArgUnmodifiedByAllCalls(Argument *Arg, | 
|---|
| 490 | FunctionAnalysisManager &FAM) { | 
|---|
| 491 | for (User *U : Arg->getParent()->users()) { | 
|---|
| 492 |  | 
|---|
| 493 | auto *Call = cast<CallBase>(Val: U); | 
|---|
| 494 |  | 
|---|
| 495 | MemoryLocation Loc = | 
|---|
| 496 | MemoryLocation::getForArgument(Call, ArgIdx: Arg->getArgNo(), TLI: nullptr); | 
|---|
| 497 |  | 
|---|
| 498 | AAResults &AAR = FAM.getResult<AAManager>(IR&: *Call->getFunction()); | 
|---|
| 499 | // Bail as soon as we find a Call where Arg may be modified. | 
|---|
| 500 | if (isModSet(MRI: AAR.getModRefInfo(I: Call, OptLoc: Loc))) | 
|---|
| 501 | return false; | 
|---|
| 502 | } | 
|---|
| 503 |  | 
|---|
| 504 | // All Users are Calls which do not modify the Arg. | 
|---|
| 505 | return true; | 
|---|
| 506 | } | 
|---|
| 507 |  | 
|---|
| 508 | /// Determine that this argument is safe to promote, and find the argument | 
|---|
| 509 | /// parts it can be promoted into. | 
|---|
| 510 | static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR, | 
|---|
| 511 | unsigned MaxElements, bool IsRecursive, | 
|---|
| 512 | SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec, | 
|---|
| 513 | FunctionAnalysisManager &FAM) { | 
|---|
| 514 | // Quick exit for unused arguments | 
|---|
| 515 | if (Arg->use_empty()) | 
|---|
| 516 | return true; | 
|---|
| 517 |  | 
|---|
| 518 | // We can only promote this argument if all the uses are loads at known | 
|---|
| 519 | // offsets. | 
|---|
| 520 | // | 
|---|
| 521 | // Promoting the argument causes it to be loaded in the caller | 
|---|
| 522 | // unconditionally. This is only safe if we can prove that either the load | 
|---|
| 523 | // would have happened in the callee anyway (ie, there is a load in the entry | 
|---|
| 524 | // block) or the pointer passed in at every call site is guaranteed to be | 
|---|
| 525 | // valid. | 
|---|
| 526 | // In the former case, invalid loads can happen, but would have happened | 
|---|
| 527 | // anyway, in the latter case, invalid loads won't happen. This prevents us | 
|---|
| 528 | // from introducing an invalid load that wouldn't have happened in the | 
|---|
| 529 | // original code. | 
|---|
| 530 |  | 
|---|
| 531 | SmallDenseMap<int64_t, ArgPart, 4> ArgParts; | 
|---|
| 532 | Align NeededAlign(1); | 
|---|
| 533 | uint64_t NeededDerefBytes = 0; | 
|---|
| 534 |  | 
|---|
| 535 | // And if this is a byval argument we also allow to have store instructions. | 
|---|
| 536 | // Only handle in such way arguments with specified alignment; | 
|---|
| 537 | // if it's unspecified, the actual alignment of the argument is | 
|---|
| 538 | // target-specific. | 
|---|
| 539 | bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign(); | 
|---|
| 540 |  | 
|---|
| 541 | // An end user of a pointer argument is a load or store instruction. | 
|---|
| 542 | // Returns std::nullopt if this load or store is not based on the argument. | 
|---|
| 543 | // Return true if we can promote the instruction, false otherwise. | 
|---|
| 544 | auto HandleEndUser = [&](auto *I, Type *Ty, | 
|---|
| 545 | bool GuaranteedToExecute) -> std::optional<bool> { | 
|---|
| 546 | // Don't promote volatile or atomic instructions. | 
|---|
| 547 | if (!I->isSimple()) | 
|---|
| 548 | return false; | 
|---|
| 549 |  | 
|---|
| 550 | Value *Ptr = I->getPointerOperand(); | 
|---|
| 551 | APInt Offset(DL.getIndexTypeSizeInBits(Ty: Ptr->getType()), 0); | 
|---|
| 552 | Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, | 
|---|
| 553 | /* AllowNonInbounds */ true); | 
|---|
| 554 | if (Ptr != Arg) | 
|---|
| 555 | return std::nullopt; | 
|---|
| 556 |  | 
|---|
| 557 | if (Offset.getSignificantBits() >= 64) | 
|---|
| 558 | return false; | 
|---|
| 559 |  | 
|---|
| 560 | TypeSize Size = DL.getTypeStoreSize(Ty); | 
|---|
| 561 | // Don't try to promote scalable types. | 
|---|
| 562 | if (Size.isScalable()) | 
|---|
| 563 | return false; | 
|---|
| 564 |  | 
|---|
| 565 | // If this is a recursive function and one of the types is a pointer, | 
|---|
| 566 | // then promoting it might lead to recursive promotion. | 
|---|
| 567 | if (IsRecursive && Ty->isPointerTy()) | 
|---|
| 568 | return false; | 
|---|
| 569 |  | 
|---|
| 570 | int64_t Off = Offset.getSExtValue(); | 
|---|
| 571 | auto Pair = ArgParts.try_emplace( | 
|---|
| 572 | Key: Off, Args: ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr}); | 
|---|
| 573 | ArgPart &Part = Pair.first->second; | 
|---|
| 574 | bool OffsetNotSeenBefore = Pair.second; | 
|---|
| 575 |  | 
|---|
| 576 | // We limit promotion to only promoting up to a fixed number of elements of | 
|---|
| 577 | // the aggregate. | 
|---|
| 578 | if (MaxElements > 0 && ArgParts.size() > MaxElements) { | 
|---|
| 579 | LLVM_DEBUG(dbgs() << "ArgPromotion of "<< *Arg << " failed: " | 
|---|
| 580 | << "more than "<< MaxElements << " parts\n"); | 
|---|
| 581 | return false; | 
|---|
| 582 | } | 
|---|
| 583 |  | 
|---|
| 584 | // For now, we only support loading/storing one specific type at a given | 
|---|
| 585 | // offset. | 
|---|
| 586 | if (Part.Ty != Ty) { | 
|---|
| 587 | LLVM_DEBUG(dbgs() << "ArgPromotion of "<< *Arg << " failed: " | 
|---|
| 588 | << "accessed as both "<< *Part.Ty << " and "<< *Ty | 
|---|
| 589 | << " at offset "<< Off << "\n"); | 
|---|
| 590 | return false; | 
|---|
| 591 | } | 
|---|
| 592 |  | 
|---|
| 593 | // If this instruction is not guaranteed to execute, and we haven't seen a | 
|---|
| 594 | // load or store at this offset before (or it had lower alignment), then we | 
|---|
| 595 | // need to remember that requirement. | 
|---|
| 596 | // Note that skipping instructions of previously seen offsets is only | 
|---|
| 597 | // correct because we only allow a single type for a given offset, which | 
|---|
| 598 | // also means that the number of accessed bytes will be the same. | 
|---|
| 599 | if (!GuaranteedToExecute && | 
|---|
| 600 | (OffsetNotSeenBefore || Part.Alignment < I->getAlign())) { | 
|---|
| 601 | // We won't be able to prove dereferenceability for negative offsets. | 
|---|
| 602 | if (Off < 0) | 
|---|
| 603 | return false; | 
|---|
| 604 |  | 
|---|
| 605 | // If the offset is not aligned, an aligned base pointer won't help. | 
|---|
| 606 | if (!isAligned(I->getAlign(), Off)) | 
|---|
| 607 | return false; | 
|---|
| 608 |  | 
|---|
| 609 | NeededDerefBytes = std::max(a: NeededDerefBytes, b: Off + Size.getFixedValue()); | 
|---|
| 610 | NeededAlign = std::max(NeededAlign, I->getAlign()); | 
|---|
| 611 | } | 
|---|
| 612 |  | 
|---|
| 613 | Part.Alignment = std::max(Part.Alignment, I->getAlign()); | 
|---|
| 614 | return true; | 
|---|
| 615 | }; | 
|---|
| 616 |  | 
|---|
| 617 | // Look for loads and stores that are guaranteed to execute on entry. | 
|---|
| 618 | for (Instruction &I : Arg->getParent()->getEntryBlock()) { | 
|---|
| 619 | std::optional<bool> Res{}; | 
|---|
| 620 | if (LoadInst *LI = dyn_cast<LoadInst>(Val: &I)) | 
|---|
| 621 | Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true); | 
|---|
| 622 | else if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I)) | 
|---|
| 623 | Res = HandleEndUser(SI, SI->getValueOperand()->getType(), | 
|---|
| 624 | /* GuaranteedToExecute */ true); | 
|---|
| 625 | if (Res && !*Res) | 
|---|
| 626 | return false; | 
|---|
| 627 |  | 
|---|
| 628 | if (!isGuaranteedToTransferExecutionToSuccessor(I: &I)) | 
|---|
| 629 | break; | 
|---|
| 630 | } | 
|---|
| 631 |  | 
|---|
| 632 | // Now look at all loads of the argument. Remember the load instructions | 
|---|
| 633 | // for the aliasing check below. | 
|---|
| 634 | SmallVector<const Use *, 16> Worklist; | 
|---|
| 635 | SmallPtrSet<const Use *, 16> Visited; | 
|---|
| 636 | SmallVector<LoadInst *, 16> Loads; | 
|---|
| 637 | SmallPtrSet<CallBase *, 4> RecursiveCalls; | 
|---|
| 638 | auto AppendUses = [&](const Value *V) { | 
|---|
| 639 | for (const Use &U : V->uses()) | 
|---|
| 640 | if (Visited.insert(Ptr: &U).second) | 
|---|
| 641 | Worklist.push_back(Elt: &U); | 
|---|
| 642 | }; | 
|---|
| 643 | AppendUses(Arg); | 
|---|
| 644 | while (!Worklist.empty()) { | 
|---|
| 645 | const Use *U = Worklist.pop_back_val(); | 
|---|
| 646 | Value *V = U->getUser(); | 
|---|
| 647 |  | 
|---|
| 648 | if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: V)) { | 
|---|
| 649 | if (!GEP->hasAllConstantIndices()) | 
|---|
| 650 | return false; | 
|---|
| 651 | AppendUses(V); | 
|---|
| 652 | continue; | 
|---|
| 653 | } | 
|---|
| 654 |  | 
|---|
| 655 | if (auto *LI = dyn_cast<LoadInst>(Val: V)) { | 
|---|
| 656 | if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false)) | 
|---|
| 657 | return false; | 
|---|
| 658 | Loads.push_back(Elt: LI); | 
|---|
| 659 | continue; | 
|---|
| 660 | } | 
|---|
| 661 |  | 
|---|
| 662 | // Stores are allowed for byval arguments | 
|---|
| 663 | auto *SI = dyn_cast<StoreInst>(Val: V); | 
|---|
| 664 | if (AreStoresAllowed && SI && | 
|---|
| 665 | U->getOperandNo() == StoreInst::getPointerOperandIndex()) { | 
|---|
| 666 | if (!*HandleEndUser(SI, SI->getValueOperand()->getType(), | 
|---|
| 667 | /* GuaranteedToExecute */ false)) | 
|---|
| 668 | return false; | 
|---|
| 669 | continue; | 
|---|
| 670 | // Only stores TO the argument is allowed, all the other stores are | 
|---|
| 671 | // unknown users | 
|---|
| 672 | } | 
|---|
| 673 |  | 
|---|
| 674 | auto *CB = dyn_cast<CallBase>(Val: V); | 
|---|
| 675 | Value *PtrArg = U->get(); | 
|---|
| 676 | if (CB && CB->getCalledFunction() == CB->getFunction()) { | 
|---|
| 677 | if (PtrArg != Arg) { | 
|---|
| 678 | LLVM_DEBUG(dbgs() << "ArgPromotion of "<< *Arg << " failed: " | 
|---|
| 679 | << "pointer offset is not equal to zero\n"); | 
|---|
| 680 | return false; | 
|---|
| 681 | } | 
|---|
| 682 |  | 
|---|
| 683 | unsigned int ArgNo = Arg->getArgNo(); | 
|---|
| 684 | if (U->getOperandNo() != ArgNo) { | 
|---|
| 685 | LLVM_DEBUG(dbgs() << "ArgPromotion of "<< *Arg << " failed: " | 
|---|
| 686 | << "arg position is different in callee\n"); | 
|---|
| 687 | return false; | 
|---|
| 688 | } | 
|---|
| 689 |  | 
|---|
| 690 | // We limit promotion to only promoting up to a fixed number of elements | 
|---|
| 691 | // of the aggregate. | 
|---|
| 692 | if (MaxElements > 0 && ArgParts.size() > MaxElements) { | 
|---|
| 693 | LLVM_DEBUG(dbgs() << "ArgPromotion of "<< *Arg << " failed: " | 
|---|
| 694 | << "more than "<< MaxElements << " parts\n"); | 
|---|
| 695 | return false; | 
|---|
| 696 | } | 
|---|
| 697 |  | 
|---|
| 698 | RecursiveCalls.insert(Ptr: CB); | 
|---|
| 699 | continue; | 
|---|
| 700 | } | 
|---|
| 701 | // Unknown user. | 
|---|
| 702 | LLVM_DEBUG(dbgs() << "ArgPromotion of "<< *Arg << " failed: " | 
|---|
| 703 | << "unknown user "<< *V << "\n"); | 
|---|
| 704 | return false; | 
|---|
| 705 | } | 
|---|
| 706 |  | 
|---|
| 707 | if (NeededDerefBytes || NeededAlign > 1) { | 
|---|
| 708 | // Try to prove a required deref / aligned requirement. | 
|---|
| 709 | if (!allCallersPassValidPointerForArgument(Arg, RecursiveCalls, NeededAlign, | 
|---|
| 710 | NeededDerefBytes)) { | 
|---|
| 711 | LLVM_DEBUG(dbgs() << "ArgPromotion of "<< *Arg << " failed: " | 
|---|
| 712 | << "not dereferenceable or aligned\n"); | 
|---|
| 713 | return false; | 
|---|
| 714 | } | 
|---|
| 715 | } | 
|---|
| 716 |  | 
|---|
| 717 | if (ArgParts.empty()) | 
|---|
| 718 | return true; // No users, this is a dead argument. | 
|---|
| 719 |  | 
|---|
| 720 | // Sort parts by offset. | 
|---|
| 721 | append_range(C&: ArgPartsVec, R&: ArgParts); | 
|---|
| 722 | sort(C&: ArgPartsVec, Comp: llvm::less_first()); | 
|---|
| 723 |  | 
|---|
| 724 | // Make sure the parts are non-overlapping. | 
|---|
| 725 | int64_t Offset = ArgPartsVec[0].first; | 
|---|
| 726 | for (const auto &Pair : ArgPartsVec) { | 
|---|
| 727 | if (Pair.first < Offset) | 
|---|
| 728 | return false; // Overlap with previous part. | 
|---|
| 729 |  | 
|---|
| 730 | Offset = Pair.first + DL.getTypeStoreSize(Ty: Pair.second.Ty); | 
|---|
| 731 | } | 
|---|
| 732 |  | 
|---|
| 733 | // If store instructions are allowed, the path from the entry of the function | 
|---|
| 734 | // to each load may be not free of instructions that potentially invalidate | 
|---|
| 735 | // the load, and this is an admissible situation. | 
|---|
| 736 | if (AreStoresAllowed) | 
|---|
| 737 | return true; | 
|---|
| 738 |  | 
|---|
| 739 | // Okay, now we know that the argument is only used by load instructions, and | 
|---|
| 740 | // it is safe to unconditionally perform all of them. | 
|---|
| 741 |  | 
|---|
| 742 | // If we can determine that no call to the Function modifies the memory region | 
|---|
| 743 | // accessed through Arg, through alias analysis using actual arguments in the | 
|---|
| 744 | // callers, we know that it is guaranteed to be safe to promote the argument. | 
|---|
| 745 | if (isArgUnmodifiedByAllCalls(Arg, FAM)) | 
|---|
| 746 | return true; | 
|---|
| 747 |  | 
|---|
| 748 | // Otherwise, use alias analysis to check if the pointer is guaranteed to not | 
|---|
| 749 | // be modified from entry of the function to each of the load instructions. | 
|---|
| 750 | for (LoadInst *Load : Loads) { | 
|---|
| 751 | // Check to see if the load is invalidated from the start of the block to | 
|---|
| 752 | // the load itself. | 
|---|
| 753 | BasicBlock *BB = Load->getParent(); | 
|---|
| 754 |  | 
|---|
| 755 | MemoryLocation Loc = MemoryLocation::get(LI: Load); | 
|---|
| 756 | if (AAR.canInstructionRangeModRef(I1: BB->front(), I2: *Load, Loc, Mode: ModRefInfo::Mod)) | 
|---|
| 757 | return false; // Pointer is invalidated! | 
|---|
| 758 |  | 
|---|
| 759 | // Now check every path from the entry block to the load for transparency. | 
|---|
| 760 | // To do this, we perform a depth first search on the inverse CFG from the | 
|---|
| 761 | // loading block. | 
|---|
| 762 | for (BasicBlock *P : predecessors(BB)) { | 
|---|
| 763 | for (BasicBlock *TranspBB : inverse_depth_first(G: P)) | 
|---|
| 764 | if (AAR.canBasicBlockModify(BB: *TranspBB, Loc)) | 
|---|
| 765 | return false; | 
|---|
| 766 | } | 
|---|
| 767 | } | 
|---|
| 768 |  | 
|---|
| 769 | // If the path from the entry of the function to each load is free of | 
|---|
| 770 | // instructions that potentially invalidate the load, we can make the | 
|---|
| 771 | // transformation! | 
|---|
| 772 | return true; | 
|---|
| 773 | } | 
|---|
| 774 |  | 
|---|
| 775 | /// Check if callers and callee agree on how promoted arguments would be | 
|---|
| 776 | /// passed. | 
|---|
| 777 | static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F, | 
|---|
| 778 | const TargetTransformInfo &TTI) { | 
|---|
| 779 | return all_of(Range: F.uses(), P: [&](const Use &U) { | 
|---|
| 780 | CallBase *CB = dyn_cast<CallBase>(Val: U.getUser()); | 
|---|
| 781 | if (!CB) | 
|---|
| 782 | return false; | 
|---|
| 783 |  | 
|---|
| 784 | const Function *Caller = CB->getCaller(); | 
|---|
| 785 | const Function *Callee = CB->getCalledFunction(); | 
|---|
| 786 | return TTI.areTypesABICompatible(Caller, Callee, Types); | 
|---|
| 787 | }); | 
|---|
| 788 | } | 
|---|
| 789 |  | 
|---|
| 790 | /// PromoteArguments - This method checks the specified function to see if there | 
|---|
| 791 | /// are any promotable arguments and if it is safe to promote the function (for | 
|---|
| 792 | /// example, all callers are direct).  If safe to promote some arguments, it | 
|---|
| 793 | /// calls the DoPromotion method. | 
|---|
| 794 | static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM, | 
|---|
| 795 | unsigned MaxElements, bool IsRecursive) { | 
|---|
| 796 | // Don't perform argument promotion for naked functions; otherwise we can end | 
|---|
| 797 | // up removing parameters that are seemingly 'not used' as they are referred | 
|---|
| 798 | // to in the assembly. | 
|---|
| 799 | if (F->hasFnAttribute(Kind: Attribute::Naked)) | 
|---|
| 800 | return nullptr; | 
|---|
| 801 |  | 
|---|
| 802 | // Make sure that it is local to this module. | 
|---|
| 803 | if (!F->hasLocalLinkage()) | 
|---|
| 804 | return nullptr; | 
|---|
| 805 |  | 
|---|
| 806 | // Don't promote arguments for variadic functions. Adding, removing, or | 
|---|
| 807 | // changing non-pack parameters can change the classification of pack | 
|---|
| 808 | // parameters. Frontends encode that classification at the call site in the | 
|---|
| 809 | // IR, while in the callee the classification is determined dynamically based | 
|---|
| 810 | // on the number of registers consumed so far. | 
|---|
| 811 | if (F->isVarArg()) | 
|---|
| 812 | return nullptr; | 
|---|
| 813 |  | 
|---|
| 814 | // Don't transform functions that receive inallocas, as the transformation may | 
|---|
| 815 | // not be safe depending on calling convention. | 
|---|
| 816 | if (F->getAttributes().hasAttrSomewhere(Kind: Attribute::InAlloca)) | 
|---|
| 817 | return nullptr; | 
|---|
| 818 |  | 
|---|
| 819 | // First check: see if there are any pointer arguments!  If not, quick exit. | 
|---|
| 820 | SmallVector<Argument *, 16> PointerArgs; | 
|---|
| 821 | for (Argument &I : F->args()) | 
|---|
| 822 | if (I.getType()->isPointerTy()) | 
|---|
| 823 | PointerArgs.push_back(Elt: &I); | 
|---|
| 824 | if (PointerArgs.empty()) | 
|---|
| 825 | return nullptr; | 
|---|
| 826 |  | 
|---|
| 827 | // Second check: make sure that all callers are direct callers.  We can't | 
|---|
| 828 | // transform functions that have indirect callers.  Also see if the function | 
|---|
| 829 | // is self-recursive. | 
|---|
| 830 | for (Use &U : F->uses()) { | 
|---|
| 831 | CallBase *CB = dyn_cast<CallBase>(Val: U.getUser()); | 
|---|
| 832 | // Must be a direct call. | 
|---|
| 833 | if (CB == nullptr || !CB->isCallee(U: &U) || | 
|---|
| 834 | CB->getFunctionType() != F->getFunctionType()) | 
|---|
| 835 | return nullptr; | 
|---|
| 836 |  | 
|---|
| 837 | // Can't change signature of musttail callee | 
|---|
| 838 | if (CB->isMustTailCall()) | 
|---|
| 839 | return nullptr; | 
|---|
| 840 |  | 
|---|
| 841 | if (CB->getFunction() == F) | 
|---|
| 842 | IsRecursive = true; | 
|---|
| 843 | } | 
|---|
| 844 |  | 
|---|
| 845 | // Can't change signature of musttail caller | 
|---|
| 846 | // FIXME: Support promoting whole chain of musttail functions | 
|---|
| 847 | for (BasicBlock &BB : *F) | 
|---|
| 848 | if (BB.getTerminatingMustTailCall()) | 
|---|
| 849 | return nullptr; | 
|---|
| 850 |  | 
|---|
| 851 | const DataLayout &DL = F->getDataLayout(); | 
|---|
| 852 | auto &AAR = FAM.getResult<AAManager>(IR&: *F); | 
|---|
| 853 | const auto &TTI = FAM.getResult<TargetIRAnalysis>(IR&: *F); | 
|---|
| 854 |  | 
|---|
| 855 | // Check to see which arguments are promotable.  If an argument is promotable, | 
|---|
| 856 | // add it to ArgsToPromote. | 
|---|
| 857 | DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote; | 
|---|
| 858 | unsigned NumArgsAfterPromote = F->getFunctionType()->getNumParams(); | 
|---|
| 859 | for (Argument *PtrArg : PointerArgs) { | 
|---|
| 860 | // Replace sret attribute with noalias. This reduces register pressure by | 
|---|
| 861 | // avoiding a register copy. | 
|---|
| 862 | if (PtrArg->hasStructRetAttr()) { | 
|---|
| 863 | unsigned ArgNo = PtrArg->getArgNo(); | 
|---|
| 864 | F->removeParamAttr(ArgNo, Kind: Attribute::StructRet); | 
|---|
| 865 | F->addParamAttr(ArgNo, Kind: Attribute::NoAlias); | 
|---|
| 866 | for (Use &U : F->uses()) { | 
|---|
| 867 | CallBase &CB = cast<CallBase>(Val&: *U.getUser()); | 
|---|
| 868 | CB.removeParamAttr(ArgNo, Kind: Attribute::StructRet); | 
|---|
| 869 | CB.addParamAttr(ArgNo, Kind: Attribute::NoAlias); | 
|---|
| 870 | } | 
|---|
| 871 | } | 
|---|
| 872 |  | 
|---|
| 873 | // If we can promote the pointer to its value. | 
|---|
| 874 | SmallVector<OffsetAndArgPart, 4> ArgParts; | 
|---|
| 875 |  | 
|---|
| 876 | if (findArgParts(Arg: PtrArg, DL, AAR, MaxElements, IsRecursive, ArgPartsVec&: ArgParts, | 
|---|
| 877 | FAM)) { | 
|---|
| 878 | SmallVector<Type *, 4> Types; | 
|---|
| 879 | for (const auto &Pair : ArgParts) | 
|---|
| 880 | Types.push_back(Elt: Pair.second.Ty); | 
|---|
| 881 |  | 
|---|
| 882 | if (areTypesABICompatible(Types, F: *F, TTI)) { | 
|---|
| 883 | NumArgsAfterPromote += ArgParts.size() - 1; | 
|---|
| 884 | ArgsToPromote.insert(KV: {PtrArg, std::move(ArgParts)}); | 
|---|
| 885 | } | 
|---|
| 886 | } | 
|---|
| 887 | } | 
|---|
| 888 |  | 
|---|
| 889 | // No promotable pointer arguments. | 
|---|
| 890 | if (ArgsToPromote.empty()) | 
|---|
| 891 | return nullptr; | 
|---|
| 892 |  | 
|---|
| 893 | if (NumArgsAfterPromote > TTI.getMaxNumArgs()) | 
|---|
| 894 | return nullptr; | 
|---|
| 895 |  | 
|---|
| 896 | return doPromotion(F, FAM, ArgsToPromote); | 
|---|
| 897 | } | 
|---|
| 898 |  | 
|---|
| 899 | PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, | 
|---|
| 900 | CGSCCAnalysisManager &AM, | 
|---|
| 901 | LazyCallGraph &CG, | 
|---|
| 902 | CGSCCUpdateResult &UR) { | 
|---|
| 903 | bool Changed = false, LocalChange; | 
|---|
| 904 |  | 
|---|
| 905 | // Iterate until we stop promoting from this SCC. | 
|---|
| 906 | do { | 
|---|
| 907 | LocalChange = false; | 
|---|
| 908 |  | 
|---|
| 909 | FunctionAnalysisManager &FAM = | 
|---|
| 910 | AM.getResult<FunctionAnalysisManagerCGSCCProxy>(IR&: C, ExtraArgs&: CG).getManager(); | 
|---|
| 911 |  | 
|---|
| 912 | bool IsRecursive = C.size() > 1; | 
|---|
| 913 | for (LazyCallGraph::Node &N : C) { | 
|---|
| 914 | Function &OldF = N.getFunction(); | 
|---|
| 915 | Function *NewF = promoteArguments(F: &OldF, FAM, MaxElements, IsRecursive); | 
|---|
| 916 | if (!NewF) | 
|---|
| 917 | continue; | 
|---|
| 918 | LocalChange = true; | 
|---|
| 919 |  | 
|---|
| 920 | // Directly substitute the functions in the call graph. Note that this | 
|---|
| 921 | // requires the old function to be completely dead and completely | 
|---|
| 922 | // replaced by the new function. It does no call graph updates, it merely | 
|---|
| 923 | // swaps out the particular function mapped to a particular node in the | 
|---|
| 924 | // graph. | 
|---|
| 925 | C.getOuterRefSCC().replaceNodeFunction(N, NewF&: *NewF); | 
|---|
| 926 | FAM.clear(IR&: OldF, Name: OldF.getName()); | 
|---|
| 927 | OldF.eraseFromParent(); | 
|---|
| 928 |  | 
|---|
| 929 | PreservedAnalyses FuncPA; | 
|---|
| 930 | FuncPA.preserveSet<CFGAnalyses>(); | 
|---|
| 931 | for (auto *U : NewF->users()) { | 
|---|
| 932 | auto *UserF = cast<CallBase>(Val: U)->getFunction(); | 
|---|
| 933 | FAM.invalidate(IR&: *UserF, PA: FuncPA); | 
|---|
| 934 | } | 
|---|
| 935 | } | 
|---|
| 936 |  | 
|---|
| 937 | Changed |= LocalChange; | 
|---|
| 938 | } while (LocalChange); | 
|---|
| 939 |  | 
|---|
| 940 | if (!Changed) | 
|---|
| 941 | return PreservedAnalyses::all(); | 
|---|
| 942 |  | 
|---|
| 943 | PreservedAnalyses PA; | 
|---|
| 944 | // We've cleared out analyses for deleted functions. | 
|---|
| 945 | PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); | 
|---|
| 946 | // We've manually invalidated analyses for functions we've modified. | 
|---|
| 947 | PA.preserveSet<AllAnalysesOn<Function>>(); | 
|---|
| 948 | return PA; | 
|---|
| 949 | } | 
|---|
| 950 |  | 
|---|