1 | //===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass promotes "by reference" arguments to be "by value" arguments. In |
10 | // practice, this means looking for internal functions that have pointer |
11 | // arguments. If it can prove, through the use of alias analysis, that an |
12 | // argument is *only* loaded, then it can pass the value into the function |
13 | // instead of the address of the value. This can cause recursive simplification |
14 | // of code and lead to the elimination of allocas (especially in C++ template |
15 | // code like the STL). |
16 | // |
17 | // This pass also handles aggregate arguments that are passed into a function, |
18 | // scalarizing them if the elements of the aggregate are only loaded. Note that |
19 | // by default it refuses to scalarize aggregates which would require passing in |
20 | // more than three operands to the function, because passing thousands of |
21 | // operands for a large array or structure is unprofitable! This limit can be |
22 | // configured or disabled, however. |
23 | // |
24 | // Note that this transformation could also be done for arguments that are only |
25 | // stored to (returning the value instead), but does not currently. This case |
26 | // would be best handled when and if LLVM begins supporting multiple return |
27 | // values from functions. |
28 | // |
29 | //===----------------------------------------------------------------------===// |
30 | |
31 | #include "llvm/Transforms/IPO/ArgumentPromotion.h" |
32 | |
33 | #include "llvm/ADT/DepthFirstIterator.h" |
34 | #include "llvm/ADT/STLExtras.h" |
35 | #include "llvm/ADT/ScopeExit.h" |
36 | #include "llvm/ADT/SmallPtrSet.h" |
37 | #include "llvm/ADT/SmallVector.h" |
38 | #include "llvm/ADT/Statistic.h" |
39 | #include "llvm/ADT/Twine.h" |
40 | #include "llvm/Analysis/AssumptionCache.h" |
41 | #include "llvm/Analysis/BasicAliasAnalysis.h" |
42 | #include "llvm/Analysis/CallGraph.h" |
43 | #include "llvm/Analysis/Loads.h" |
44 | #include "llvm/Analysis/MemoryLocation.h" |
45 | #include "llvm/Analysis/TargetTransformInfo.h" |
46 | #include "llvm/Analysis/ValueTracking.h" |
47 | #include "llvm/IR/Argument.h" |
48 | #include "llvm/IR/Attributes.h" |
49 | #include "llvm/IR/BasicBlock.h" |
50 | #include "llvm/IR/CFG.h" |
51 | #include "llvm/IR/Constants.h" |
52 | #include "llvm/IR/DataLayout.h" |
53 | #include "llvm/IR/DerivedTypes.h" |
54 | #include "llvm/IR/Dominators.h" |
55 | #include "llvm/IR/Function.h" |
56 | #include "llvm/IR/IRBuilder.h" |
57 | #include "llvm/IR/InstrTypes.h" |
58 | #include "llvm/IR/Instruction.h" |
59 | #include "llvm/IR/Instructions.h" |
60 | #include "llvm/IR/Metadata.h" |
61 | #include "llvm/IR/Module.h" |
62 | #include "llvm/IR/NoFolder.h" |
63 | #include "llvm/IR/PassManager.h" |
64 | #include "llvm/IR/Type.h" |
65 | #include "llvm/IR/Use.h" |
66 | #include "llvm/IR/User.h" |
67 | #include "llvm/IR/Value.h" |
68 | #include "llvm/Support/Casting.h" |
69 | #include "llvm/Support/Debug.h" |
70 | #include "llvm/Support/raw_ostream.h" |
71 | #include "llvm/Transforms/Utils/Local.h" |
72 | #include "llvm/Transforms/Utils/PromoteMemToReg.h" |
73 | #include <algorithm> |
74 | #include <cassert> |
75 | #include <cstdint> |
76 | #include <utility> |
77 | #include <vector> |
78 | |
79 | using namespace llvm; |
80 | |
81 | #define DEBUG_TYPE "argpromotion" |
82 | |
83 | STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted" ); |
84 | STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated" ); |
85 | |
86 | namespace { |
87 | |
88 | struct ArgPart { |
89 | Type *Ty; |
90 | Align Alignment; |
91 | /// A representative guaranteed-executed load or store instruction for use by |
92 | /// metadata transfer. |
93 | Instruction *MustExecInstr; |
94 | }; |
95 | |
96 | using OffsetAndArgPart = std::pair<int64_t, ArgPart>; |
97 | |
98 | } // end anonymous namespace |
99 | |
100 | static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL, |
101 | Value *Ptr, Type *ResElemTy, int64_t Offset) { |
102 | if (Offset != 0) { |
103 | APInt APOffset(DL.getIndexTypeSizeInBits(Ty: Ptr->getType()), Offset); |
104 | Ptr = IRB.CreatePtrAdd(Ptr, Offset: IRB.getInt(AI: APOffset)); |
105 | } |
106 | return Ptr; |
107 | } |
108 | |
109 | /// DoPromotion - This method actually performs the promotion of the specified |
110 | /// arguments, and returns the new function. At this point, we know that it's |
111 | /// safe to do so. |
112 | static Function * |
113 | doPromotion(Function *F, FunctionAnalysisManager &FAM, |
114 | const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> |
115 | &ArgsToPromote) { |
116 | // Start by computing a new prototype for the function, which is the same as |
117 | // the old function, but has modified arguments. |
118 | FunctionType *FTy = F->getFunctionType(); |
119 | std::vector<Type *> Params; |
120 | |
121 | // Attribute - Keep track of the parameter attributes for the arguments |
122 | // that we are *not* promoting. For the ones that we do promote, the parameter |
123 | // attributes are lost |
124 | SmallVector<AttributeSet, 8> ArgAttrVec; |
125 | // Mapping from old to new argument indices. -1 for promoted or removed |
126 | // arguments. |
127 | SmallVector<unsigned> NewArgIndices; |
128 | AttributeList PAL = F->getAttributes(); |
129 | |
130 | // First, determine the new argument list |
131 | unsigned ArgNo = 0, NewArgNo = 0; |
132 | for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; |
133 | ++I, ++ArgNo) { |
134 | if (!ArgsToPromote.count(Val: &*I)) { |
135 | // Unchanged argument |
136 | Params.push_back(x: I->getType()); |
137 | ArgAttrVec.push_back(Elt: PAL.getParamAttrs(ArgNo)); |
138 | NewArgIndices.push_back(Elt: NewArgNo++); |
139 | } else if (I->use_empty()) { |
140 | // Dead argument (which are always marked as promotable) |
141 | ++NumArgumentsDead; |
142 | NewArgIndices.push_back(Elt: (unsigned)-1); |
143 | } else { |
144 | const auto &ArgParts = ArgsToPromote.find(Val: &*I)->second; |
145 | for (const auto &Pair : ArgParts) { |
146 | Params.push_back(x: Pair.second.Ty); |
147 | ArgAttrVec.push_back(Elt: AttributeSet()); |
148 | } |
149 | ++NumArgumentsPromoted; |
150 | NewArgIndices.push_back(Elt: (unsigned)-1); |
151 | NewArgNo += ArgParts.size(); |
152 | } |
153 | } |
154 | |
155 | Type *RetTy = FTy->getReturnType(); |
156 | |
157 | // Construct the new function type using the new arguments. |
158 | FunctionType *NFTy = FunctionType::get(Result: RetTy, Params, isVarArg: FTy->isVarArg()); |
159 | |
160 | // Create the new function body and insert it into the module. |
161 | Function *NF = Function::Create(Ty: NFTy, Linkage: F->getLinkage(), AddrSpace: F->getAddressSpace(), |
162 | N: F->getName()); |
163 | NF->copyAttributesFrom(Src: F); |
164 | NF->copyMetadata(Src: F, Offset: 0); |
165 | NF->setIsNewDbgInfoFormat(F->IsNewDbgInfoFormat); |
166 | |
167 | // The new function will have the !dbg metadata copied from the original |
168 | // function. The original function may not be deleted, and dbg metadata need |
169 | // to be unique, so we need to drop it. |
170 | F->setSubprogram(nullptr); |
171 | |
172 | LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" |
173 | << "From: " << *F); |
174 | |
175 | uint64_t LargestVectorWidth = 0; |
176 | for (auto *I : Params) |
177 | if (auto *VT = dyn_cast<llvm::VectorType>(Val: I)) |
178 | LargestVectorWidth = std::max( |
179 | a: LargestVectorWidth, b: VT->getPrimitiveSizeInBits().getKnownMinValue()); |
180 | |
181 | // Recompute the parameter attributes list based on the new arguments for |
182 | // the function. |
183 | NF->setAttributes(AttributeList::get(C&: F->getContext(), FnAttrs: PAL.getFnAttrs(), |
184 | RetAttrs: PAL.getRetAttrs(), ArgAttrs: ArgAttrVec)); |
185 | |
186 | // Remap argument indices in allocsize attribute. |
187 | if (auto AllocSize = NF->getAttributes().getFnAttrs().getAllocSizeArgs()) { |
188 | unsigned Arg1 = NewArgIndices[AllocSize->first]; |
189 | assert(Arg1 != (unsigned)-1 && "allocsize cannot be promoted argument" ); |
190 | std::optional<unsigned> Arg2; |
191 | if (AllocSize->second) { |
192 | Arg2 = NewArgIndices[*AllocSize->second]; |
193 | assert(Arg2 != (unsigned)-1 && "allocsize cannot be promoted argument" ); |
194 | } |
195 | NF->addFnAttr(Attr: Attribute::getWithAllocSizeArgs(Context&: F->getContext(), ElemSizeArg: Arg1, NumElemsArg: Arg2)); |
196 | } |
197 | |
198 | AttributeFuncs::updateMinLegalVectorWidthAttr(Fn&: *NF, Width: LargestVectorWidth); |
199 | ArgAttrVec.clear(); |
200 | |
201 | F->getParent()->getFunctionList().insert(where: F->getIterator(), New: NF); |
202 | NF->takeName(V: F); |
203 | |
204 | // Loop over all the callers of the function, transforming the call sites to |
205 | // pass in the loaded pointers. |
206 | SmallVector<Value *, 16> Args; |
207 | const DataLayout &DL = F->getDataLayout(); |
208 | SmallVector<WeakTrackingVH, 16> DeadArgs; |
209 | |
210 | while (!F->use_empty()) { |
211 | CallBase &CB = cast<CallBase>(Val&: *F->user_back()); |
212 | assert(CB.getCalledFunction() == F); |
213 | const AttributeList &CallPAL = CB.getAttributes(); |
214 | IRBuilder<NoFolder> IRB(&CB); |
215 | |
216 | // Loop over the operands, inserting GEP and loads in the caller as |
217 | // appropriate. |
218 | auto *AI = CB.arg_begin(); |
219 | ArgNo = 0; |
220 | for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; |
221 | ++I, ++AI, ++ArgNo) { |
222 | if (!ArgsToPromote.count(Val: &*I)) { |
223 | Args.push_back(Elt: *AI); // Unmodified argument |
224 | ArgAttrVec.push_back(Elt: CallPAL.getParamAttrs(ArgNo)); |
225 | } else if (!I->use_empty()) { |
226 | Value *V = *AI; |
227 | const auto &ArgParts = ArgsToPromote.find(Val: &*I)->second; |
228 | for (const auto &Pair : ArgParts) { |
229 | LoadInst *LI = IRB.CreateAlignedLoad( |
230 | Ty: Pair.second.Ty, |
231 | Ptr: createByteGEP(IRB, DL, Ptr: V, ResElemTy: Pair.second.Ty, Offset: Pair.first), |
232 | Align: Pair.second.Alignment, Name: V->getName() + ".val" ); |
233 | if (Pair.second.MustExecInstr) { |
234 | LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata()); |
235 | LI->copyMetadata(SrcInst: *Pair.second.MustExecInstr, |
236 | WL: {LLVMContext::MD_dereferenceable, |
237 | LLVMContext::MD_dereferenceable_or_null, |
238 | LLVMContext::MD_noundef, |
239 | LLVMContext::MD_nontemporal}); |
240 | // Only transfer poison-generating metadata if we also have |
241 | // !noundef. |
242 | // TODO: Without !noundef, we could merge this metadata across |
243 | // all promoted loads. |
244 | if (LI->hasMetadata(KindID: LLVMContext::MD_noundef)) |
245 | LI->copyMetadata(SrcInst: *Pair.second.MustExecInstr, |
246 | WL: {LLVMContext::MD_range, LLVMContext::MD_nonnull, |
247 | LLVMContext::MD_align}); |
248 | } |
249 | Args.push_back(Elt: LI); |
250 | ArgAttrVec.push_back(Elt: AttributeSet()); |
251 | } |
252 | } else { |
253 | assert(ArgsToPromote.count(&*I) && I->use_empty()); |
254 | DeadArgs.emplace_back(Args: AI->get()); |
255 | } |
256 | } |
257 | |
258 | // Push any varargs arguments on the list. |
259 | for (; AI != CB.arg_end(); ++AI, ++ArgNo) { |
260 | Args.push_back(Elt: *AI); |
261 | ArgAttrVec.push_back(Elt: CallPAL.getParamAttrs(ArgNo)); |
262 | } |
263 | |
264 | SmallVector<OperandBundleDef, 1> OpBundles; |
265 | CB.getOperandBundlesAsDefs(Defs&: OpBundles); |
266 | |
267 | CallBase *NewCS = nullptr; |
268 | if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) { |
269 | NewCS = InvokeInst::Create(Func: NF, IfNormal: II->getNormalDest(), IfException: II->getUnwindDest(), |
270 | Args, Bundles: OpBundles, NameStr: "" , InsertBefore: CB.getIterator()); |
271 | } else { |
272 | auto *NewCall = |
273 | CallInst::Create(Func: NF, Args, Bundles: OpBundles, NameStr: "" , InsertBefore: CB.getIterator()); |
274 | NewCall->setTailCallKind(cast<CallInst>(Val: &CB)->getTailCallKind()); |
275 | NewCS = NewCall; |
276 | } |
277 | NewCS->setCallingConv(CB.getCallingConv()); |
278 | NewCS->setAttributes(AttributeList::get(C&: F->getContext(), |
279 | FnAttrs: CallPAL.getFnAttrs(), |
280 | RetAttrs: CallPAL.getRetAttrs(), ArgAttrs: ArgAttrVec)); |
281 | NewCS->copyMetadata(SrcInst: CB, WL: {LLVMContext::MD_prof, LLVMContext::MD_dbg}); |
282 | Args.clear(); |
283 | ArgAttrVec.clear(); |
284 | |
285 | AttributeFuncs::updateMinLegalVectorWidthAttr(Fn&: *CB.getCaller(), |
286 | Width: LargestVectorWidth); |
287 | |
288 | if (!CB.use_empty()) { |
289 | CB.replaceAllUsesWith(V: NewCS); |
290 | NewCS->takeName(V: &CB); |
291 | } |
292 | |
293 | // Finally, remove the old call from the program, reducing the use-count of |
294 | // F. |
295 | CB.eraseFromParent(); |
296 | } |
297 | |
298 | RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts&: DeadArgs); |
299 | |
300 | // Since we have now created the new function, splice the body of the old |
301 | // function right into the new function, leaving the old rotting hulk of the |
302 | // function empty. |
303 | NF->splice(ToIt: NF->begin(), FromF: F); |
304 | |
305 | // We will collect all the new created allocas to promote them into registers |
306 | // after the following loop |
307 | SmallVector<AllocaInst *, 4> Allocas; |
308 | |
309 | // Loop over the argument list, transferring uses of the old arguments over to |
310 | // the new arguments, also transferring over the names as well. |
311 | Function::arg_iterator I2 = NF->arg_begin(); |
312 | for (Argument &Arg : F->args()) { |
313 | if (!ArgsToPromote.count(Val: &Arg)) { |
314 | // If this is an unmodified argument, move the name and users over to the |
315 | // new version. |
316 | Arg.replaceAllUsesWith(V: &*I2); |
317 | I2->takeName(V: &Arg); |
318 | ++I2; |
319 | continue; |
320 | } |
321 | |
322 | // There potentially are metadata uses for things like llvm.dbg.value. |
323 | // Replace them with undef, after handling the other regular uses. |
324 | auto RauwUndefMetadata = make_scope_exit( |
325 | F: [&]() { Arg.replaceAllUsesWith(V: UndefValue::get(T: Arg.getType())); }); |
326 | |
327 | if (Arg.use_empty()) |
328 | continue; |
329 | |
330 | // Otherwise, if we promoted this argument, we have to create an alloca in |
331 | // the callee for every promotable part and store each of the new incoming |
332 | // arguments into the corresponding alloca, what lets the old code (the |
333 | // store instructions if they are allowed especially) a chance to work as |
334 | // before. |
335 | assert(Arg.getType()->isPointerTy() && |
336 | "Only arguments with a pointer type are promotable" ); |
337 | |
338 | IRBuilder<NoFolder> IRB(&NF->begin()->front()); |
339 | |
340 | // Add only the promoted elements, so parts from ArgsToPromote |
341 | SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca; |
342 | for (const auto &Pair : ArgsToPromote.find(Val: &Arg)->second) { |
343 | int64_t Offset = Pair.first; |
344 | const ArgPart &Part = Pair.second; |
345 | |
346 | Argument *NewArg = I2++; |
347 | NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val" ); |
348 | |
349 | AllocaInst *NewAlloca = IRB.CreateAlloca( |
350 | Ty: Part.Ty, ArraySize: nullptr, Name: Arg.getName() + "." + Twine(Offset) + ".allc" ); |
351 | NewAlloca->setAlignment(Pair.second.Alignment); |
352 | IRB.CreateAlignedStore(Val: NewArg, Ptr: NewAlloca, Align: Pair.second.Alignment); |
353 | |
354 | // Collect the alloca to retarget the users to |
355 | OffsetToAlloca.insert(KV: {Offset, NewAlloca}); |
356 | } |
357 | |
358 | auto GetAlloca = [&](Value *Ptr) { |
359 | APInt Offset(DL.getIndexTypeSizeInBits(Ty: Ptr->getType()), 0); |
360 | Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, |
361 | /* AllowNonInbounds */ true); |
362 | assert(Ptr == &Arg && "Not constant offset from arg?" ); |
363 | return OffsetToAlloca.lookup(Val: Offset.getSExtValue()); |
364 | }; |
365 | |
366 | // Cleanup the code from the dead instructions: GEPs and BitCasts in between |
367 | // the original argument and its users: loads and stores. Retarget every |
368 | // user to the new created alloca. |
369 | SmallVector<Value *, 16> Worklist; |
370 | SmallVector<Instruction *, 16> DeadInsts; |
371 | append_range(C&: Worklist, R: Arg.users()); |
372 | while (!Worklist.empty()) { |
373 | Value *V = Worklist.pop_back_val(); |
374 | if (isa<BitCastInst>(Val: V) || isa<GetElementPtrInst>(Val: V)) { |
375 | DeadInsts.push_back(Elt: cast<Instruction>(Val: V)); |
376 | append_range(C&: Worklist, R: V->users()); |
377 | continue; |
378 | } |
379 | |
380 | if (auto *LI = dyn_cast<LoadInst>(Val: V)) { |
381 | Value *Ptr = LI->getPointerOperand(); |
382 | LI->setOperand(i_nocapture: LoadInst::getPointerOperandIndex(), Val_nocapture: GetAlloca(Ptr)); |
383 | continue; |
384 | } |
385 | |
386 | if (auto *SI = dyn_cast<StoreInst>(Val: V)) { |
387 | assert(!SI->isVolatile() && "Volatile operations can't be promoted." ); |
388 | Value *Ptr = SI->getPointerOperand(); |
389 | SI->setOperand(i_nocapture: StoreInst::getPointerOperandIndex(), Val_nocapture: GetAlloca(Ptr)); |
390 | continue; |
391 | } |
392 | |
393 | llvm_unreachable("Unexpected user" ); |
394 | } |
395 | |
396 | for (Instruction *I : DeadInsts) { |
397 | I->replaceAllUsesWith(V: PoisonValue::get(T: I->getType())); |
398 | I->eraseFromParent(); |
399 | } |
400 | |
401 | // Collect the allocas for promotion |
402 | for (const auto &Pair : OffsetToAlloca) { |
403 | assert(isAllocaPromotable(Pair.second) && |
404 | "By design, only promotable allocas should be produced." ); |
405 | Allocas.push_back(Elt: Pair.second); |
406 | } |
407 | } |
408 | |
409 | LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size() |
410 | << " alloca(s) are promotable by Mem2Reg\n" ); |
411 | |
412 | if (!Allocas.empty()) { |
413 | // And we are able to call the `promoteMemoryToRegister()` function. |
414 | // Our earlier checks have ensured that PromoteMemToReg() will |
415 | // succeed. |
416 | auto &DT = FAM.getResult<DominatorTreeAnalysis>(IR&: *NF); |
417 | auto &AC = FAM.getResult<AssumptionAnalysis>(IR&: *NF); |
418 | PromoteMemToReg(Allocas, DT, AC: &AC); |
419 | } |
420 | |
421 | return NF; |
422 | } |
423 | |
424 | /// Return true if we can prove that all callees pass in a valid pointer for the |
425 | /// specified function argument. |
426 | static bool allCallersPassValidPointerForArgument( |
427 | Argument *Arg, SmallPtrSetImpl<CallBase *> &RecursiveCalls, |
428 | Align NeededAlign, uint64_t NeededDerefBytes) { |
429 | Function *Callee = Arg->getParent(); |
430 | const DataLayout &DL = Callee->getDataLayout(); |
431 | APInt Bytes(64, NeededDerefBytes); |
432 | |
433 | // Check if the argument itself is marked dereferenceable and aligned. |
434 | if (isDereferenceableAndAlignedPointer(V: Arg, Alignment: NeededAlign, Size: Bytes, DL)) |
435 | return true; |
436 | |
437 | // Look at all call sites of the function. At this point we know we only have |
438 | // direct callees. |
439 | return all_of(Range: Callee->users(), P: [&](User *U) { |
440 | CallBase &CB = cast<CallBase>(Val&: *U); |
441 | // In case of functions with recursive calls, this check |
442 | // (isDereferenceableAndAlignedPointer) will fail when it tries to look at |
443 | // the first caller of this function. The caller may or may not have a load, |
444 | // incase it doesn't load the pointer being passed, this check will fail. |
445 | // So, it's safe to skip the check incase we know that we are dealing with a |
446 | // recursive call. For example we have a IR given below. |
447 | // |
448 | // def fun(ptr %a) { |
449 | // ... |
450 | // %loadres = load i32, ptr %a, align 4 |
451 | // %res = call i32 @fun(ptr %a) |
452 | // ... |
453 | // } |
454 | // |
455 | // def bar(ptr %x) { |
456 | // ... |
457 | // %resbar = call i32 @fun(ptr %x) |
458 | // ... |
459 | // } |
460 | // |
461 | // Since we record processed recursive calls, we check if the current |
462 | // CallBase has been processed before. If yes it means that it is a |
463 | // recursive call and we can skip the check just for this call. So, just |
464 | // return true. |
465 | if (RecursiveCalls.contains(Ptr: &CB)) |
466 | return true; |
467 | |
468 | return isDereferenceableAndAlignedPointer(V: CB.getArgOperand(i: Arg->getArgNo()), |
469 | Alignment: NeededAlign, Size: Bytes, DL); |
470 | }); |
471 | } |
472 | |
473 | /// Determine that this argument is safe to promote, and find the argument |
474 | /// parts it can be promoted into. |
475 | static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR, |
476 | unsigned MaxElements, bool IsRecursive, |
477 | SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) { |
478 | // Quick exit for unused arguments |
479 | if (Arg->use_empty()) |
480 | return true; |
481 | |
482 | // We can only promote this argument if all the uses are loads at known |
483 | // offsets. |
484 | // |
485 | // Promoting the argument causes it to be loaded in the caller |
486 | // unconditionally. This is only safe if we can prove that either the load |
487 | // would have happened in the callee anyway (ie, there is a load in the entry |
488 | // block) or the pointer passed in at every call site is guaranteed to be |
489 | // valid. |
490 | // In the former case, invalid loads can happen, but would have happened |
491 | // anyway, in the latter case, invalid loads won't happen. This prevents us |
492 | // from introducing an invalid load that wouldn't have happened in the |
493 | // original code. |
494 | |
495 | SmallDenseMap<int64_t, ArgPart, 4> ArgParts; |
496 | Align NeededAlign(1); |
497 | uint64_t NeededDerefBytes = 0; |
498 | |
499 | // And if this is a byval argument we also allow to have store instructions. |
500 | // Only handle in such way arguments with specified alignment; |
501 | // if it's unspecified, the actual alignment of the argument is |
502 | // target-specific. |
503 | bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign(); |
504 | |
505 | // An end user of a pointer argument is a load or store instruction. |
506 | // Returns std::nullopt if this load or store is not based on the argument. |
507 | // Return true if we can promote the instruction, false otherwise. |
508 | auto HandleEndUser = [&](auto *I, Type *Ty, |
509 | bool GuaranteedToExecute) -> std::optional<bool> { |
510 | // Don't promote volatile or atomic instructions. |
511 | if (!I->isSimple()) |
512 | return false; |
513 | |
514 | Value *Ptr = I->getPointerOperand(); |
515 | APInt Offset(DL.getIndexTypeSizeInBits(Ty: Ptr->getType()), 0); |
516 | Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, |
517 | /* AllowNonInbounds */ true); |
518 | if (Ptr != Arg) |
519 | return std::nullopt; |
520 | |
521 | if (Offset.getSignificantBits() >= 64) |
522 | return false; |
523 | |
524 | TypeSize Size = DL.getTypeStoreSize(Ty); |
525 | // Don't try to promote scalable types. |
526 | if (Size.isScalable()) |
527 | return false; |
528 | |
529 | // If this is a recursive function and one of the types is a pointer, |
530 | // then promoting it might lead to recursive promotion. |
531 | if (IsRecursive && Ty->isPointerTy()) |
532 | return false; |
533 | |
534 | int64_t Off = Offset.getSExtValue(); |
535 | auto Pair = ArgParts.try_emplace( |
536 | Key: Off, Args: ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr}); |
537 | ArgPart &Part = Pair.first->second; |
538 | bool OffsetNotSeenBefore = Pair.second; |
539 | |
540 | // We limit promotion to only promoting up to a fixed number of elements of |
541 | // the aggregate. |
542 | if (MaxElements > 0 && ArgParts.size() > MaxElements) { |
543 | LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " |
544 | << "more than " << MaxElements << " parts\n" ); |
545 | return false; |
546 | } |
547 | |
548 | // For now, we only support loading/storing one specific type at a given |
549 | // offset. |
550 | if (Part.Ty != Ty) { |
551 | LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " |
552 | << "accessed as both " << *Part.Ty << " and " << *Ty |
553 | << " at offset " << Off << "\n" ); |
554 | return false; |
555 | } |
556 | |
557 | // If this instruction is not guaranteed to execute, and we haven't seen a |
558 | // load or store at this offset before (or it had lower alignment), then we |
559 | // need to remember that requirement. |
560 | // Note that skipping instructions of previously seen offsets is only |
561 | // correct because we only allow a single type for a given offset, which |
562 | // also means that the number of accessed bytes will be the same. |
563 | if (!GuaranteedToExecute && |
564 | (OffsetNotSeenBefore || Part.Alignment < I->getAlign())) { |
565 | // We won't be able to prove dereferenceability for negative offsets. |
566 | if (Off < 0) |
567 | return false; |
568 | |
569 | // If the offset is not aligned, an aligned base pointer won't help. |
570 | if (!isAligned(I->getAlign(), Off)) |
571 | return false; |
572 | |
573 | NeededDerefBytes = std::max(a: NeededDerefBytes, b: Off + Size.getFixedValue()); |
574 | NeededAlign = std::max(NeededAlign, I->getAlign()); |
575 | } |
576 | |
577 | Part.Alignment = std::max(Part.Alignment, I->getAlign()); |
578 | return true; |
579 | }; |
580 | |
581 | // Look for loads and stores that are guaranteed to execute on entry. |
582 | for (Instruction &I : Arg->getParent()->getEntryBlock()) { |
583 | std::optional<bool> Res{}; |
584 | if (LoadInst *LI = dyn_cast<LoadInst>(Val: &I)) |
585 | Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true); |
586 | else if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I)) |
587 | Res = HandleEndUser(SI, SI->getValueOperand()->getType(), |
588 | /* GuaranteedToExecute */ true); |
589 | if (Res && !*Res) |
590 | return false; |
591 | |
592 | if (!isGuaranteedToTransferExecutionToSuccessor(I: &I)) |
593 | break; |
594 | } |
595 | |
596 | // Now look at all loads of the argument. Remember the load instructions |
597 | // for the aliasing check below. |
598 | SmallVector<const Use *, 16> Worklist; |
599 | SmallPtrSet<const Use *, 16> Visited; |
600 | SmallVector<LoadInst *, 16> Loads; |
601 | SmallPtrSet<CallBase *, 4> RecursiveCalls; |
602 | auto AppendUses = [&](const Value *V) { |
603 | for (const Use &U : V->uses()) |
604 | if (Visited.insert(Ptr: &U).second) |
605 | Worklist.push_back(Elt: &U); |
606 | }; |
607 | AppendUses(Arg); |
608 | while (!Worklist.empty()) { |
609 | const Use *U = Worklist.pop_back_val(); |
610 | Value *V = U->getUser(); |
611 | if (isa<BitCastInst>(Val: V)) { |
612 | AppendUses(V); |
613 | continue; |
614 | } |
615 | |
616 | if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: V)) { |
617 | if (!GEP->hasAllConstantIndices()) |
618 | return false; |
619 | AppendUses(V); |
620 | continue; |
621 | } |
622 | |
623 | if (auto *LI = dyn_cast<LoadInst>(Val: V)) { |
624 | if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false)) |
625 | return false; |
626 | Loads.push_back(Elt: LI); |
627 | continue; |
628 | } |
629 | |
630 | // Stores are allowed for byval arguments |
631 | auto *SI = dyn_cast<StoreInst>(Val: V); |
632 | if (AreStoresAllowed && SI && |
633 | U->getOperandNo() == StoreInst::getPointerOperandIndex()) { |
634 | if (!*HandleEndUser(SI, SI->getValueOperand()->getType(), |
635 | /* GuaranteedToExecute */ false)) |
636 | return false; |
637 | continue; |
638 | // Only stores TO the argument is allowed, all the other stores are |
639 | // unknown users |
640 | } |
641 | |
642 | auto *CB = dyn_cast<CallBase>(Val: V); |
643 | Value *PtrArg = U->get(); |
644 | if (CB && CB->getCalledFunction() == CB->getFunction()) { |
645 | if (PtrArg != Arg) { |
646 | LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " |
647 | << "pointer offset is not equal to zero\n" ); |
648 | return false; |
649 | } |
650 | |
651 | unsigned int ArgNo = Arg->getArgNo(); |
652 | if (U->getOperandNo() != ArgNo) { |
653 | LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " |
654 | << "arg position is different in callee\n" ); |
655 | return false; |
656 | } |
657 | |
658 | // We limit promotion to only promoting up to a fixed number of elements |
659 | // of the aggregate. |
660 | if (MaxElements > 0 && ArgParts.size() > MaxElements) { |
661 | LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " |
662 | << "more than " << MaxElements << " parts\n" ); |
663 | return false; |
664 | } |
665 | |
666 | RecursiveCalls.insert(Ptr: CB); |
667 | continue; |
668 | } |
669 | // Unknown user. |
670 | LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " |
671 | << "unknown user " << *V << "\n" ); |
672 | return false; |
673 | } |
674 | |
675 | if (NeededDerefBytes || NeededAlign > 1) { |
676 | // Try to prove a required deref / aligned requirement. |
677 | if (!allCallersPassValidPointerForArgument(Arg, RecursiveCalls, NeededAlign, |
678 | NeededDerefBytes)) { |
679 | LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " |
680 | << "not dereferenceable or aligned\n" ); |
681 | return false; |
682 | } |
683 | } |
684 | |
685 | if (ArgParts.empty()) |
686 | return true; // No users, this is a dead argument. |
687 | |
688 | // Sort parts by offset. |
689 | append_range(C&: ArgPartsVec, R&: ArgParts); |
690 | sort(C&: ArgPartsVec, Comp: llvm::less_first()); |
691 | |
692 | // Make sure the parts are non-overlapping. |
693 | int64_t Offset = ArgPartsVec[0].first; |
694 | for (const auto &Pair : ArgPartsVec) { |
695 | if (Pair.first < Offset) |
696 | return false; // Overlap with previous part. |
697 | |
698 | Offset = Pair.first + DL.getTypeStoreSize(Ty: Pair.second.Ty); |
699 | } |
700 | |
701 | // If store instructions are allowed, the path from the entry of the function |
702 | // to each load may be not free of instructions that potentially invalidate |
703 | // the load, and this is an admissible situation. |
704 | if (AreStoresAllowed) |
705 | return true; |
706 | |
707 | // Okay, now we know that the argument is only used by load instructions, and |
708 | // it is safe to unconditionally perform all of them. Use alias analysis to |
709 | // check to see if the pointer is guaranteed to not be modified from entry of |
710 | // the function to each of the load instructions. |
711 | |
712 | for (LoadInst *Load : Loads) { |
713 | // Check to see if the load is invalidated from the start of the block to |
714 | // the load itself. |
715 | BasicBlock *BB = Load->getParent(); |
716 | |
717 | MemoryLocation Loc = MemoryLocation::get(LI: Load); |
718 | if (AAR.canInstructionRangeModRef(I1: BB->front(), I2: *Load, Loc, Mode: ModRefInfo::Mod)) |
719 | return false; // Pointer is invalidated! |
720 | |
721 | // Now check every path from the entry block to the load for transparency. |
722 | // To do this, we perform a depth first search on the inverse CFG from the |
723 | // loading block. |
724 | for (BasicBlock *P : predecessors(BB)) { |
725 | for (BasicBlock *TranspBB : inverse_depth_first(G: P)) |
726 | if (AAR.canBasicBlockModify(BB: *TranspBB, Loc)) |
727 | return false; |
728 | } |
729 | } |
730 | |
731 | // If the path from the entry of the function to each load is free of |
732 | // instructions that potentially invalidate the load, we can make the |
733 | // transformation! |
734 | return true; |
735 | } |
736 | |
737 | /// Check if callers and callee agree on how promoted arguments would be |
738 | /// passed. |
739 | static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F, |
740 | const TargetTransformInfo &TTI) { |
741 | return all_of(Range: F.uses(), P: [&](const Use &U) { |
742 | CallBase *CB = dyn_cast<CallBase>(Val: U.getUser()); |
743 | if (!CB) |
744 | return false; |
745 | |
746 | const Function *Caller = CB->getCaller(); |
747 | const Function *Callee = CB->getCalledFunction(); |
748 | return TTI.areTypesABICompatible(Caller, Callee, Types); |
749 | }); |
750 | } |
751 | |
752 | /// PromoteArguments - This method checks the specified function to see if there |
753 | /// are any promotable arguments and if it is safe to promote the function (for |
754 | /// example, all callers are direct). If safe to promote some arguments, it |
755 | /// calls the DoPromotion method. |
756 | static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM, |
757 | unsigned MaxElements, bool IsRecursive) { |
758 | // Don't perform argument promotion for naked functions; otherwise we can end |
759 | // up removing parameters that are seemingly 'not used' as they are referred |
760 | // to in the assembly. |
761 | if (F->hasFnAttribute(Kind: Attribute::Naked)) |
762 | return nullptr; |
763 | |
764 | // Make sure that it is local to this module. |
765 | if (!F->hasLocalLinkage()) |
766 | return nullptr; |
767 | |
768 | // Don't promote arguments for variadic functions. Adding, removing, or |
769 | // changing non-pack parameters can change the classification of pack |
770 | // parameters. Frontends encode that classification at the call site in the |
771 | // IR, while in the callee the classification is determined dynamically based |
772 | // on the number of registers consumed so far. |
773 | if (F->isVarArg()) |
774 | return nullptr; |
775 | |
776 | // Don't transform functions that receive inallocas, as the transformation may |
777 | // not be safe depending on calling convention. |
778 | if (F->getAttributes().hasAttrSomewhere(Kind: Attribute::InAlloca)) |
779 | return nullptr; |
780 | |
781 | // First check: see if there are any pointer arguments! If not, quick exit. |
782 | SmallVector<Argument *, 16> PointerArgs; |
783 | for (Argument &I : F->args()) |
784 | if (I.getType()->isPointerTy()) |
785 | PointerArgs.push_back(Elt: &I); |
786 | if (PointerArgs.empty()) |
787 | return nullptr; |
788 | |
789 | // Second check: make sure that all callers are direct callers. We can't |
790 | // transform functions that have indirect callers. Also see if the function |
791 | // is self-recursive. |
792 | for (Use &U : F->uses()) { |
793 | CallBase *CB = dyn_cast<CallBase>(Val: U.getUser()); |
794 | // Must be a direct call. |
795 | if (CB == nullptr || !CB->isCallee(U: &U) || |
796 | CB->getFunctionType() != F->getFunctionType()) |
797 | return nullptr; |
798 | |
799 | // Can't change signature of musttail callee |
800 | if (CB->isMustTailCall()) |
801 | return nullptr; |
802 | |
803 | if (CB->getFunction() == F) |
804 | IsRecursive = true; |
805 | } |
806 | |
807 | // Can't change signature of musttail caller |
808 | // FIXME: Support promoting whole chain of musttail functions |
809 | for (BasicBlock &BB : *F) |
810 | if (BB.getTerminatingMustTailCall()) |
811 | return nullptr; |
812 | |
813 | const DataLayout &DL = F->getDataLayout(); |
814 | auto &AAR = FAM.getResult<AAManager>(IR&: *F); |
815 | const auto &TTI = FAM.getResult<TargetIRAnalysis>(IR&: *F); |
816 | |
817 | // Check to see which arguments are promotable. If an argument is promotable, |
818 | // add it to ArgsToPromote. |
819 | DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote; |
820 | unsigned NumArgsAfterPromote = F->getFunctionType()->getNumParams(); |
821 | for (Argument *PtrArg : PointerArgs) { |
822 | // Replace sret attribute with noalias. This reduces register pressure by |
823 | // avoiding a register copy. |
824 | if (PtrArg->hasStructRetAttr()) { |
825 | unsigned ArgNo = PtrArg->getArgNo(); |
826 | F->removeParamAttr(ArgNo, Kind: Attribute::StructRet); |
827 | F->addParamAttr(ArgNo, Kind: Attribute::NoAlias); |
828 | for (Use &U : F->uses()) { |
829 | CallBase &CB = cast<CallBase>(Val&: *U.getUser()); |
830 | CB.removeParamAttr(ArgNo, Kind: Attribute::StructRet); |
831 | CB.addParamAttr(ArgNo, Kind: Attribute::NoAlias); |
832 | } |
833 | } |
834 | |
835 | // If we can promote the pointer to its value. |
836 | SmallVector<OffsetAndArgPart, 4> ArgParts; |
837 | |
838 | if (findArgParts(Arg: PtrArg, DL, AAR, MaxElements, IsRecursive, ArgPartsVec&: ArgParts)) { |
839 | SmallVector<Type *, 4> Types; |
840 | for (const auto &Pair : ArgParts) |
841 | Types.push_back(Elt: Pair.second.Ty); |
842 | |
843 | if (areTypesABICompatible(Types, F: *F, TTI)) { |
844 | NumArgsAfterPromote += ArgParts.size() - 1; |
845 | ArgsToPromote.insert(KV: {PtrArg, std::move(ArgParts)}); |
846 | } |
847 | } |
848 | } |
849 | |
850 | // No promotable pointer arguments. |
851 | if (ArgsToPromote.empty()) |
852 | return nullptr; |
853 | |
854 | if (NumArgsAfterPromote > TTI.getMaxNumArgs()) |
855 | return nullptr; |
856 | |
857 | return doPromotion(F, FAM, ArgsToPromote); |
858 | } |
859 | |
860 | PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, |
861 | CGSCCAnalysisManager &AM, |
862 | LazyCallGraph &CG, |
863 | CGSCCUpdateResult &UR) { |
864 | bool Changed = false, LocalChange; |
865 | |
866 | // Iterate until we stop promoting from this SCC. |
867 | do { |
868 | LocalChange = false; |
869 | |
870 | FunctionAnalysisManager &FAM = |
871 | AM.getResult<FunctionAnalysisManagerCGSCCProxy>(IR&: C, ExtraArgs&: CG).getManager(); |
872 | |
873 | bool IsRecursive = C.size() > 1; |
874 | for (LazyCallGraph::Node &N : C) { |
875 | Function &OldF = N.getFunction(); |
876 | Function *NewF = promoteArguments(F: &OldF, FAM, MaxElements, IsRecursive); |
877 | if (!NewF) |
878 | continue; |
879 | LocalChange = true; |
880 | |
881 | // Directly substitute the functions in the call graph. Note that this |
882 | // requires the old function to be completely dead and completely |
883 | // replaced by the new function. It does no call graph updates, it merely |
884 | // swaps out the particular function mapped to a particular node in the |
885 | // graph. |
886 | C.getOuterRefSCC().replaceNodeFunction(N, NewF&: *NewF); |
887 | FAM.clear(IR&: OldF, Name: OldF.getName()); |
888 | OldF.eraseFromParent(); |
889 | |
890 | PreservedAnalyses FuncPA; |
891 | FuncPA.preserveSet<CFGAnalyses>(); |
892 | for (auto *U : NewF->users()) { |
893 | auto *UserF = cast<CallBase>(Val: U)->getFunction(); |
894 | FAM.invalidate(IR&: *UserF, PA: FuncPA); |
895 | } |
896 | } |
897 | |
898 | Changed |= LocalChange; |
899 | } while (LocalChange); |
900 | |
901 | if (!Changed) |
902 | return PreservedAnalyses::all(); |
903 | |
904 | PreservedAnalyses PA; |
905 | // We've cleared out analyses for deleted functions. |
906 | PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); |
907 | // We've manually invalidated analyses for functions we've modified. |
908 | PA.preserveSet<AllAnalysesOn<Function>>(); |
909 | return PA; |
910 | } |
911 | |