1//===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass promotes "by reference" arguments to be "by value" arguments. In
10// practice, this means looking for internal functions that have pointer
11// arguments. If it can prove, through the use of alias analysis, that an
12// argument is *only* loaded, then it can pass the value into the function
13// instead of the address of the value. This can cause recursive simplification
14// of code and lead to the elimination of allocas (especially in C++ template
15// code like the STL).
16//
17// This pass also handles aggregate arguments that are passed into a function,
18// scalarizing them if the elements of the aggregate are only loaded. Note that
19// by default it refuses to scalarize aggregates which would require passing in
20// more than three operands to the function, because passing thousands of
21// operands for a large array or structure is unprofitable! This limit can be
22// configured or disabled, however.
23//
24// Note that this transformation could also be done for arguments that are only
25// stored to (returning the value instead), but does not currently. This case
26// would be best handled when and if LLVM begins supporting multiple return
27// values from functions.
28//
29//===----------------------------------------------------------------------===//
30
31#include "llvm/Transforms/IPO/ArgumentPromotion.h"
32
33#include "llvm/ADT/DepthFirstIterator.h"
34#include "llvm/ADT/STLExtras.h"
35#include "llvm/ADT/ScopeExit.h"
36#include "llvm/ADT/SmallPtrSet.h"
37#include "llvm/ADT/SmallVector.h"
38#include "llvm/ADT/Statistic.h"
39#include "llvm/ADT/Twine.h"
40#include "llvm/Analysis/AssumptionCache.h"
41#include "llvm/Analysis/BasicAliasAnalysis.h"
42#include "llvm/Analysis/CallGraph.h"
43#include "llvm/Analysis/Loads.h"
44#include "llvm/Analysis/MemoryLocation.h"
45#include "llvm/Analysis/OptimizationRemarkEmitter.h"
46#include "llvm/Analysis/TargetTransformInfo.h"
47#include "llvm/Analysis/ValueTracking.h"
48#include "llvm/IR/Argument.h"
49#include "llvm/IR/Attributes.h"
50#include "llvm/IR/BasicBlock.h"
51#include "llvm/IR/CFG.h"
52#include "llvm/IR/Constants.h"
53#include "llvm/IR/DIBuilder.h"
54#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/DerivedTypes.h"
56#include "llvm/IR/Dominators.h"
57#include "llvm/IR/Function.h"
58#include "llvm/IR/IRBuilder.h"
59#include "llvm/IR/InstrTypes.h"
60#include "llvm/IR/Instruction.h"
61#include "llvm/IR/Instructions.h"
62#include "llvm/IR/Metadata.h"
63#include "llvm/IR/Module.h"
64#include "llvm/IR/NoFolder.h"
65#include "llvm/IR/PassManager.h"
66#include "llvm/IR/Type.h"
67#include "llvm/IR/Use.h"
68#include "llvm/IR/User.h"
69#include "llvm/IR/Value.h"
70#include "llvm/Support/Casting.h"
71#include "llvm/Support/Debug.h"
72#include "llvm/Support/raw_ostream.h"
73#include "llvm/Transforms/Utils/Local.h"
74#include "llvm/Transforms/Utils/PromoteMemToReg.h"
75#include <algorithm>
76#include <cassert>
77#include <cstdint>
78#include <utility>
79#include <vector>
80
81using namespace llvm;
82
83#define DEBUG_TYPE "argpromotion"
84
85STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted");
86STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated");
87
88namespace {
89
90struct ArgPart {
91 Type *Ty;
92 Align Alignment;
93 /// A representative guaranteed-executed load or store instruction for use by
94 /// metadata transfer.
95 Instruction *MustExecInstr;
96};
97
98using OffsetAndArgPart = std::pair<int64_t, ArgPart>;
99
100} // end anonymous namespace
101
102static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL,
103 Value *Ptr, Type *ResElemTy, int64_t Offset) {
104 if (Offset != 0) {
105 APInt APOffset(DL.getIndexTypeSizeInBits(Ty: Ptr->getType()), Offset,
106 /*isSigned=*/true);
107 Ptr = IRB.CreatePtrAdd(Ptr, Offset: IRB.getInt(AI: APOffset));
108 }
109 return Ptr;
110}
111
112/// DoPromotion - This method actually performs the promotion of the specified
113/// arguments, and returns the new function. At this point, we know that it's
114/// safe to do so.
115static Function *
116doPromotion(Function *F, FunctionAnalysisManager &FAM,
117 const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>>
118 &ArgsToPromote) {
119 // Start by computing a new prototype for the function, which is the same as
120 // the old function, but has modified arguments.
121 FunctionType *FTy = F->getFunctionType();
122 std::vector<Type *> Params;
123
124 // Attribute - Keep track of the parameter attributes for the arguments
125 // that we are *not* promoting. For the ones that we do promote, the parameter
126 // attributes are lost
127 SmallVector<AttributeSet, 8> ArgAttrVec;
128 // Mapping from old to new argument indices. -1 for promoted or removed
129 // arguments.
130 SmallVector<unsigned> NewArgIndices;
131 AttributeList PAL = F->getAttributes();
132 OptimizationRemarkEmitter ORE(F);
133
134 // First, determine the new argument list
135 unsigned ArgNo = 0, NewArgNo = 0;
136 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
137 ++I, ++ArgNo) {
138 auto It = ArgsToPromote.find(Val: &*I);
139 if (It == ArgsToPromote.end()) {
140 // Unchanged argument
141 Params.push_back(x: I->getType());
142 ArgAttrVec.push_back(Elt: PAL.getParamAttrs(ArgNo));
143 NewArgIndices.push_back(Elt: NewArgNo++);
144 } else if (I->use_empty()) {
145 // Dead argument (which are always marked as promotable)
146 ++NumArgumentsDead;
147 ORE.emit(RemarkBuilder: [&]() {
148 return OptimizationRemark(DEBUG_TYPE, "ArgumentRemoved", F)
149 << "eliminating argument " << ore::NV("ArgName", I->getName())
150 << "(" << ore::NV("ArgIndex", ArgNo) << ")";
151 });
152
153 NewArgIndices.push_back(Elt: (unsigned)-1);
154 } else {
155 const auto &ArgParts = It->second;
156 for (const auto &Pair : ArgParts) {
157 Params.push_back(x: Pair.second.Ty);
158 ArgAttrVec.push_back(Elt: AttributeSet());
159 }
160 ++NumArgumentsPromoted;
161 ORE.emit(RemarkBuilder: [&]() {
162 return OptimizationRemark(DEBUG_TYPE, "ArgumentPromoted", F)
163 << "promoting argument " << ore::NV("ArgName", I->getName())
164 << "(" << ore::NV("ArgIndex", ArgNo) << ")"
165 << " to pass by value";
166 });
167
168 NewArgIndices.push_back(Elt: (unsigned)-1);
169 NewArgNo += ArgParts.size();
170 }
171 }
172
173 Type *RetTy = FTy->getReturnType();
174
175 // Construct the new function type using the new arguments.
176 FunctionType *NFTy = FunctionType::get(Result: RetTy, Params, isVarArg: FTy->isVarArg());
177
178 // Create the new function body and insert it into the module.
179 Function *NF = Function::Create(Ty: NFTy, Linkage: F->getLinkage(), AddrSpace: F->getAddressSpace(),
180 N: F->getName());
181 NF->copyAttributesFrom(Src: F);
182 NF->copyMetadata(Src: F, Offset: 0);
183
184 // The new function will have the !dbg metadata copied from the original
185 // function. The original function may not be deleted, and dbg metadata need
186 // to be unique, so we need to drop it.
187 F->setSubprogram(nullptr);
188
189 LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
190 << "From: " << *F);
191
192 uint64_t LargestVectorWidth = 0;
193 for (auto *I : Params)
194 if (auto *VT = dyn_cast<llvm::VectorType>(Val: I))
195 LargestVectorWidth = std::max(
196 a: LargestVectorWidth, b: VT->getPrimitiveSizeInBits().getKnownMinValue());
197
198 // Recompute the parameter attributes list based on the new arguments for
199 // the function.
200 NF->setAttributes(AttributeList::get(C&: F->getContext(), FnAttrs: PAL.getFnAttrs(),
201 RetAttrs: PAL.getRetAttrs(), ArgAttrs: ArgAttrVec));
202
203 // Remap argument indices in allocsize attribute.
204 if (auto AllocSize = NF->getAttributes().getFnAttrs().getAllocSizeArgs()) {
205 unsigned Arg1 = NewArgIndices[AllocSize->first];
206 assert(Arg1 != (unsigned)-1 && "allocsize cannot be promoted argument");
207 std::optional<unsigned> Arg2;
208 if (AllocSize->second) {
209 Arg2 = NewArgIndices[*AllocSize->second];
210 assert(Arg2 != (unsigned)-1 && "allocsize cannot be promoted argument");
211 }
212 NF->addFnAttr(Attr: Attribute::getWithAllocSizeArgs(Context&: F->getContext(), ElemSizeArg: Arg1, NumElemsArg: Arg2));
213 }
214
215 AttributeFuncs::updateMinLegalVectorWidthAttr(Fn&: *NF, Width: LargestVectorWidth);
216 ArgAttrVec.clear();
217
218 F->getParent()->getFunctionList().insert(where: F->getIterator(), New: NF);
219 NF->takeName(V: F);
220
221 // Loop over all the callers of the function, transforming the call sites to
222 // pass in the loaded pointers.
223 SmallVector<Value *, 16> Args;
224 const DataLayout &DL = F->getDataLayout();
225 SmallVector<WeakTrackingVH, 16> DeadArgs;
226
227 while (!F->use_empty()) {
228 CallBase &CB = cast<CallBase>(Val&: *F->user_back());
229 assert(CB.getCalledFunction() == F);
230 const AttributeList &CallPAL = CB.getAttributes();
231 IRBuilder<NoFolder> IRB(&CB);
232
233 // Loop over the operands, inserting GEP and loads in the caller as
234 // appropriate.
235 auto *AI = CB.arg_begin();
236 ArgNo = 0;
237 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
238 ++I, ++AI, ++ArgNo) {
239 auto ArgIt = ArgsToPromote.find(Val: &*I);
240 if (ArgIt == ArgsToPromote.end()) {
241 Args.push_back(Elt: *AI); // Unmodified argument
242 ArgAttrVec.push_back(Elt: CallPAL.getParamAttrs(ArgNo));
243 } else if (!I->use_empty()) {
244 Value *V = *AI;
245 for (const auto &Pair : ArgIt->second) {
246 LoadInst *LI = IRB.CreateAlignedLoad(
247 Ty: Pair.second.Ty,
248 Ptr: createByteGEP(IRB, DL, Ptr: V, ResElemTy: Pair.second.Ty, Offset: Pair.first),
249 Align: Pair.second.Alignment, Name: V->getName() + ".val");
250 if (Pair.second.MustExecInstr) {
251 LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata());
252 LI->copyMetadata(SrcInst: *Pair.second.MustExecInstr,
253 WL: {LLVMContext::MD_dereferenceable,
254 LLVMContext::MD_dereferenceable_or_null,
255 LLVMContext::MD_noundef,
256 LLVMContext::MD_nontemporal});
257 // Only transfer poison-generating metadata if we also have
258 // !noundef.
259 // TODO: Without !noundef, we could merge this metadata across
260 // all promoted loads.
261 if (LI->hasMetadata(KindID: LLVMContext::MD_noundef))
262 LI->copyMetadata(SrcInst: *Pair.second.MustExecInstr,
263 WL: Metadata::PoisonGeneratingIDs);
264 }
265 Args.push_back(Elt: LI);
266 ArgAttrVec.push_back(Elt: AttributeSet());
267 }
268 } else {
269 assert(I->use_empty());
270 DeadArgs.emplace_back(Args: AI->get());
271 }
272 }
273
274 // Push any varargs arguments on the list.
275 for (; AI != CB.arg_end(); ++AI, ++ArgNo) {
276 Args.push_back(Elt: *AI);
277 ArgAttrVec.push_back(Elt: CallPAL.getParamAttrs(ArgNo));
278 }
279
280 SmallVector<OperandBundleDef, 1> OpBundles;
281 CB.getOperandBundlesAsDefs(Defs&: OpBundles);
282
283 CallBase *NewCS = nullptr;
284 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) {
285 NewCS = InvokeInst::Create(Func: NF, IfNormal: II->getNormalDest(), IfException: II->getUnwindDest(),
286 Args, Bundles: OpBundles, NameStr: "", InsertBefore: CB.getIterator());
287 } else {
288 auto *NewCall =
289 CallInst::Create(Func: NF, Args, Bundles: OpBundles, NameStr: "", InsertBefore: CB.getIterator());
290 NewCall->setTailCallKind(cast<CallInst>(Val: &CB)->getTailCallKind());
291 NewCS = NewCall;
292 }
293 NewCS->setCallingConv(CB.getCallingConv());
294 NewCS->setAttributes(AttributeList::get(C&: F->getContext(),
295 FnAttrs: CallPAL.getFnAttrs(),
296 RetAttrs: CallPAL.getRetAttrs(), ArgAttrs: ArgAttrVec));
297 NewCS->copyMetadata(SrcInst: CB, WL: {LLVMContext::MD_prof, LLVMContext::MD_dbg});
298 Args.clear();
299 ArgAttrVec.clear();
300
301 AttributeFuncs::updateMinLegalVectorWidthAttr(Fn&: *CB.getCaller(),
302 Width: LargestVectorWidth);
303
304 if (!CB.use_empty()) {
305 CB.replaceAllUsesWith(V: NewCS);
306 NewCS->takeName(V: &CB);
307 }
308
309 // Finally, remove the old call from the program, reducing the use-count of
310 // F.
311 CB.eraseFromParent();
312 }
313
314 RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts&: DeadArgs);
315
316 // Since we have now created the new function, splice the body of the old
317 // function right into the new function, leaving the old rotting hulk of the
318 // function empty.
319 NF->splice(ToIt: NF->begin(), FromF: F);
320
321 // We will collect all the new created allocas to promote them into registers
322 // after the following loop
323 SmallVector<AllocaInst *, 4> Allocas;
324
325 // Loop over the argument list, transferring uses of the old arguments over to
326 // the new arguments, also transferring over the names as well.
327 Function::arg_iterator I2 = NF->arg_begin();
328 for (Argument &Arg : F->args()) {
329 if (!ArgsToPromote.count(Val: &Arg)) {
330 // If this is an unmodified argument, move the name and users over to the
331 // new version.
332 Arg.replaceAllUsesWith(V: &*I2);
333 I2->takeName(V: &Arg);
334 ++I2;
335 continue;
336 }
337
338 // There potentially are metadata uses for things like llvm.dbg.value.
339 // Replace them with poison, after handling the other regular uses.
340 llvm::scope_exit RauwPoisonMetadata(
341 [&]() { Arg.replaceAllUsesWith(V: PoisonValue::get(T: Arg.getType())); });
342
343 if (Arg.use_empty())
344 continue;
345
346 // Otherwise, if we promoted this argument, we have to create an alloca in
347 // the callee for every promotable part and store each of the new incoming
348 // arguments into the corresponding alloca, what lets the old code (the
349 // store instructions if they are allowed especially) a chance to work as
350 // before.
351 assert(Arg.getType()->isPointerTy() &&
352 "Only arguments with a pointer type are promotable");
353
354 IRBuilder<NoFolder> IRB(&NF->begin()->front());
355
356 // Add only the promoted elements, so parts from ArgsToPromote
357 SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca;
358 for (const auto &Pair : ArgsToPromote.find(Val: &Arg)->second) {
359 int64_t Offset = Pair.first;
360 const ArgPart &Part = Pair.second;
361
362 Argument *NewArg = I2++;
363 NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val");
364
365 AllocaInst *NewAlloca = IRB.CreateAlloca(
366 Ty: Part.Ty, ArraySize: nullptr, Name: Arg.getName() + "." + Twine(Offset) + ".allc");
367 NewAlloca->setAlignment(Pair.second.Alignment);
368 IRB.CreateAlignedStore(Val: NewArg, Ptr: NewAlloca, Align: Pair.second.Alignment);
369
370 // Collect the alloca to retarget the users to
371 OffsetToAlloca.insert(KV: {Offset, NewAlloca});
372 }
373
374 auto GetAlloca = [&](Value *Ptr) {
375 APInt Offset(DL.getIndexTypeSizeInBits(Ty: Ptr->getType()), 0);
376 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
377 /* AllowNonInbounds */ true);
378 assert(Ptr == &Arg && "Not constant offset from arg?");
379 return OffsetToAlloca.lookup(Val: Offset.getSExtValue());
380 };
381
382 // Cleanup the code from the dead instructions: GEPs and BitCasts in between
383 // the original argument and its users: loads and stores. Retarget every
384 // user to the new created alloca.
385 SmallVector<Value *, 16> Worklist(Arg.users());
386 SmallVector<Instruction *, 16> DeadInsts;
387 while (!Worklist.empty()) {
388 Value *V = Worklist.pop_back_val();
389 if (isa<GetElementPtrInst>(Val: V)) {
390 DeadInsts.push_back(Elt: cast<Instruction>(Val: V));
391 append_range(C&: Worklist, R: V->users());
392 continue;
393 }
394
395 if (auto *LI = dyn_cast<LoadInst>(Val: V)) {
396 Value *Ptr = LI->getPointerOperand();
397 LI->setOperand(i_nocapture: LoadInst::getPointerOperandIndex(), Val_nocapture: GetAlloca(Ptr));
398 continue;
399 }
400
401 if (auto *SI = dyn_cast<StoreInst>(Val: V)) {
402 assert(!SI->isVolatile() && "Volatile operations can't be promoted.");
403 Value *Ptr = SI->getPointerOperand();
404 SI->setOperand(i_nocapture: StoreInst::getPointerOperandIndex(), Val_nocapture: GetAlloca(Ptr));
405 continue;
406 }
407
408 llvm_unreachable("Unexpected user");
409 }
410
411 for (Instruction *I : DeadInsts) {
412 I->replaceAllUsesWith(V: PoisonValue::get(T: I->getType()));
413 I->eraseFromParent();
414 }
415
416 // Collect the allocas for promotion
417 for (const auto &Pair : OffsetToAlloca) {
418 assert(isAllocaPromotable(Pair.second) &&
419 "By design, only promotable allocas should be produced.");
420 Allocas.push_back(Elt: Pair.second);
421 }
422 }
423
424 LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size()
425 << " alloca(s) are promotable by Mem2Reg\n");
426
427 if (!Allocas.empty()) {
428 // And we are able to call the `promoteMemoryToRegister()` function.
429 // Our earlier checks have ensured that PromoteMemToReg() will
430 // succeed.
431 auto &DT = FAM.getResult<DominatorTreeAnalysis>(IR&: *NF);
432 auto &AC = FAM.getResult<AssumptionAnalysis>(IR&: *NF);
433 PromoteMemToReg(Allocas, DT, AC: &AC);
434 }
435
436 // If argument(s) are dead (hence removed) or promoted, the function probably
437 // does not follow the standard calling convention anymore. Add DW_CC_nocall
438 // to DISubroutineType to inform debugger that it may not be safe to call this
439 // function.
440 DISubprogram *SP = NF->getSubprogram();
441 if (SP) {
442 auto Temp = SP->getType()->cloneWithCC(CC: llvm::dwarf::DW_CC_nocall);
443 SP->replaceType(Ty: MDNode::replaceWithPermanent(N: std::move(Temp)));
444 }
445
446 return NF;
447}
448
449/// Return true if we can prove that all callees pass in a valid pointer for the
450/// specified function argument.
451static bool allCallersPassValidPointerForArgument(
452 Argument *Arg, SmallPtrSetImpl<CallBase *> &RecursiveCalls,
453 Align NeededAlign, uint64_t NeededDerefBytes) {
454 Function *Callee = Arg->getParent();
455 const DataLayout &DL = Callee->getDataLayout();
456 APInt Bytes(64, NeededDerefBytes);
457
458 // Check if the argument itself is marked dereferenceable and aligned.
459 if (isDereferenceableAndAlignedPointer(
460 V: Arg, Alignment: NeededAlign, Size: Bytes,
461 Q: SimplifyQuery(DL, &Callee->getEntryBlock().front())))
462 return true;
463
464 // Look at all call sites of the function. At this point we know we only have
465 // direct callees.
466 return all_of(Range: Callee->users(), P: [&](User *U) {
467 CallBase &CB = cast<CallBase>(Val&: *U);
468 // In case of functions with recursive calls, this check
469 // (isDereferenceableAndAlignedPointer) will fail when it tries to look at
470 // the first caller of this function. The caller may or may not have a load,
471 // incase it doesn't load the pointer being passed, this check will fail.
472 // So, it's safe to skip the check incase we know that we are dealing with a
473 // recursive call. For example we have a IR given below.
474 //
475 // def fun(ptr %a) {
476 // ...
477 // %loadres = load i32, ptr %a, align 4
478 // %res = call i32 @fun(ptr %a)
479 // ...
480 // }
481 //
482 // def bar(ptr %x) {
483 // ...
484 // %resbar = call i32 @fun(ptr %x)
485 // ...
486 // }
487 //
488 // Since we record processed recursive calls, we check if the current
489 // CallBase has been processed before. If yes it means that it is a
490 // recursive call and we can skip the check just for this call. So, just
491 // return true.
492 if (RecursiveCalls.contains(Ptr: &CB))
493 return true;
494
495 return isDereferenceableAndAlignedPointer(V: CB.getArgOperand(i: Arg->getArgNo()),
496 Alignment: NeededAlign, Size: Bytes,
497 Q: SimplifyQuery(DL, &CB));
498 });
499}
500
501// Try to prove that all Calls to F do not modify the memory pointed to by Arg,
502// using alias analysis local to each caller of F.
503static bool isArgUnmodifiedByAllCalls(Argument *Arg,
504 FunctionAnalysisManager &FAM) {
505 for (User *U : Arg->getParent()->users()) {
506
507 auto *Call = cast<CallBase>(Val: U);
508
509 MemoryLocation Loc =
510 MemoryLocation::getForArgument(Call, ArgIdx: Arg->getArgNo(), TLI: nullptr);
511
512 AAResults &AAR = FAM.getResult<AAManager>(IR&: *Call->getFunction());
513 // Bail as soon as we find a Call where Arg may be modified.
514 if (isModSet(MRI: AAR.getModRefInfo(I: Call, OptLoc: Loc)))
515 return false;
516 }
517
518 // All Users are Calls which do not modify the Arg.
519 return true;
520}
521
522/// Determine that this argument is safe to promote, and find the argument
523/// parts it can be promoted into.
524static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
525 unsigned MaxElements, bool IsRecursive,
526 SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec,
527 FunctionAnalysisManager &FAM) {
528 // Quick exit for unused arguments
529 if (Arg->use_empty())
530 return true;
531
532 // We can only promote this argument if all the uses are loads at known
533 // offsets.
534 //
535 // Promoting the argument causes it to be loaded in the caller
536 // unconditionally. This is only safe if we can prove that either the load
537 // would have happened in the callee anyway (ie, there is a load in the entry
538 // block) or the pointer passed in at every call site is guaranteed to be
539 // valid.
540 // In the former case, invalid loads can happen, but would have happened
541 // anyway, in the latter case, invalid loads won't happen. This prevents us
542 // from introducing an invalid load that wouldn't have happened in the
543 // original code.
544
545 SmallDenseMap<int64_t, ArgPart, 4> ArgParts;
546 Align NeededAlign(1);
547 uint64_t NeededDerefBytes = 0;
548
549 // And if this is a byval argument we also allow to have store instructions.
550 // Only handle in such way arguments with specified alignment;
551 // if it's unspecified, the actual alignment of the argument is
552 // target-specific.
553 bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign();
554
555 // An end user of a pointer argument is a load or store instruction.
556 // Returns std::nullopt if this load or store is not based on the argument.
557 // Return true if we can promote the instruction, false otherwise.
558 auto HandleEndUser = [&](auto *I, Type *Ty,
559 bool GuaranteedToExecute) -> std::optional<bool> {
560 // Don't promote volatile or atomic instructions.
561 if (!I->isSimple())
562 return false;
563
564 Value *Ptr = I->getPointerOperand();
565 APInt Offset(DL.getIndexTypeSizeInBits(Ty: Ptr->getType()), 0);
566 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
567 /* AllowNonInbounds */ true);
568 if (Ptr != Arg)
569 return std::nullopt;
570
571 if (Offset.getSignificantBits() >= 64)
572 return false;
573
574 TypeSize Size = DL.getTypeStoreSize(Ty);
575 // Don't try to promote scalable types.
576 if (Size.isScalable())
577 return false;
578
579 // If this is a recursive function and one of the types is a pointer,
580 // then promoting it might lead to recursive promotion.
581 if (IsRecursive && Ty->isPointerTy())
582 return false;
583
584 int64_t Off = Offset.getSExtValue();
585 auto Pair = ArgParts.try_emplace(
586 Key: Off, Args: ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr});
587 ArgPart &Part = Pair.first->second;
588 bool OffsetNotSeenBefore = Pair.second;
589
590 // We limit promotion to only promoting up to a fixed number of elements of
591 // the aggregate.
592 if (MaxElements > 0 && ArgParts.size() > MaxElements) {
593 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
594 << "more than " << MaxElements << " parts\n");
595 return false;
596 }
597
598 // For now, we only support loading/storing one specific type at a given
599 // offset.
600 if (Part.Ty != Ty) {
601 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
602 << "accessed as both " << *Part.Ty << " and " << *Ty
603 << " at offset " << Off << "\n");
604 return false;
605 }
606
607 // If this instruction is not guaranteed to execute, and we haven't seen a
608 // load or store at this offset before (or it had lower alignment), then we
609 // need to remember that requirement.
610 // Note that skipping instructions of previously seen offsets is only
611 // correct because we only allow a single type for a given offset, which
612 // also means that the number of accessed bytes will be the same.
613 if (!GuaranteedToExecute &&
614 (OffsetNotSeenBefore || Part.Alignment < I->getAlign())) {
615 // We won't be able to prove dereferenceability for negative offsets.
616 if (Off < 0)
617 return false;
618
619 // If the offset is not aligned, an aligned base pointer won't help.
620 if (!isAligned(I->getAlign(), Off))
621 return false;
622
623 NeededDerefBytes = std::max(a: NeededDerefBytes, b: Off + Size.getFixedValue());
624 NeededAlign = std::max(NeededAlign, I->getAlign());
625 }
626
627 Part.Alignment = std::max(Part.Alignment, I->getAlign());
628 return true;
629 };
630
631 // Look for loads and stores that are guaranteed to execute on entry.
632 for (Instruction &I : Arg->getParent()->getEntryBlock()) {
633 std::optional<bool> Res{};
634 if (LoadInst *LI = dyn_cast<LoadInst>(Val: &I))
635 Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true);
636 else if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I))
637 Res = HandleEndUser(SI, SI->getValueOperand()->getType(),
638 /* GuaranteedToExecute */ true);
639 if (Res && !*Res)
640 return false;
641
642 if (!isGuaranteedToTransferExecutionToSuccessor(I: &I))
643 break;
644 }
645
646 // Now look at all loads of the argument. Remember the load instructions
647 // for the aliasing check below.
648 SmallVector<const Use *, 16> Worklist;
649 SmallPtrSet<const Use *, 16> Visited;
650 SmallVector<LoadInst *, 16> Loads;
651 SmallPtrSet<CallBase *, 4> RecursiveCalls;
652 auto AppendUses = [&](const Value *V) {
653 for (const Use &U : V->uses())
654 if (Visited.insert(Ptr: &U).second)
655 Worklist.push_back(Elt: &U);
656 };
657 AppendUses(Arg);
658 while (!Worklist.empty()) {
659 const Use *U = Worklist.pop_back_val();
660 Value *V = U->getUser();
661
662 if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: V)) {
663 if (!GEP->hasAllConstantIndices())
664 return false;
665 AppendUses(V);
666 continue;
667 }
668
669 if (auto *LI = dyn_cast<LoadInst>(Val: V)) {
670 if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false))
671 return false;
672 Loads.push_back(Elt: LI);
673 continue;
674 }
675
676 // Stores are allowed for byval arguments
677 auto *SI = dyn_cast<StoreInst>(Val: V);
678 if (AreStoresAllowed && SI &&
679 U->getOperandNo() == StoreInst::getPointerOperandIndex()) {
680 if (!*HandleEndUser(SI, SI->getValueOperand()->getType(),
681 /* GuaranteedToExecute */ false))
682 return false;
683 continue;
684 // Only stores TO the argument is allowed, all the other stores are
685 // unknown users
686 }
687
688 auto *CB = dyn_cast<CallBase>(Val: V);
689 Value *PtrArg = U->get();
690 if (CB && CB->getCalledFunction() == CB->getFunction()) {
691 if (PtrArg != Arg) {
692 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
693 << "pointer offset is not equal to zero\n");
694 return false;
695 }
696
697 unsigned int ArgNo = Arg->getArgNo();
698 if (U->getOperandNo() != ArgNo) {
699 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
700 << "arg position is different in callee\n");
701 return false;
702 }
703
704 // We limit promotion to only promoting up to a fixed number of elements
705 // of the aggregate.
706 if (MaxElements > 0 && ArgParts.size() > MaxElements) {
707 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
708 << "more than " << MaxElements << " parts\n");
709 return false;
710 }
711
712 RecursiveCalls.insert(Ptr: CB);
713 continue;
714 }
715 // Unknown user.
716 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
717 << "unknown user " << *V << "\n");
718 return false;
719 }
720
721 if (NeededDerefBytes || NeededAlign > 1) {
722 // Try to prove a required deref / aligned requirement.
723 if (!allCallersPassValidPointerForArgument(Arg, RecursiveCalls, NeededAlign,
724 NeededDerefBytes)) {
725 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
726 << "not dereferenceable or aligned\n");
727 return false;
728 }
729 }
730
731 if (ArgParts.empty())
732 return true; // No users, this is a dead argument.
733
734 // Sort parts by offset.
735 append_range(C&: ArgPartsVec, R&: ArgParts);
736 sort(C&: ArgPartsVec, Comp: llvm::less_first());
737
738 // Make sure the parts are non-overlapping.
739 int64_t Offset = ArgPartsVec[0].first;
740 for (const auto &Pair : ArgPartsVec) {
741 if (Pair.first < Offset)
742 return false; // Overlap with previous part.
743
744 Offset = Pair.first + DL.getTypeStoreSize(Ty: Pair.second.Ty);
745 }
746
747 // If store instructions are allowed, the path from the entry of the function
748 // to each load may be not free of instructions that potentially invalidate
749 // the load, and this is an admissible situation.
750 if (AreStoresAllowed)
751 return true;
752
753 // Okay, now we know that the argument is only used by load instructions, and
754 // it is safe to unconditionally perform all of them.
755
756 // If we can determine that no call to the Function modifies the memory region
757 // accessed through Arg, through alias analysis using actual arguments in the
758 // callers, we know that it is guaranteed to be safe to promote the argument.
759 if (isArgUnmodifiedByAllCalls(Arg, FAM))
760 return true;
761
762 // Otherwise, use alias analysis to check if the pointer is guaranteed to not
763 // be modified from entry of the function to each of the load instructions.
764 for (LoadInst *Load : Loads) {
765 // Check to see if the load is invalidated from the start of the block to
766 // the load itself.
767 BasicBlock *BB = Load->getParent();
768
769 MemoryLocation Loc = MemoryLocation::get(LI: Load);
770 if (AAR.canInstructionRangeModRef(I1: BB->front(), I2: *Load, Loc, Mode: ModRefInfo::Mod))
771 return false; // Pointer is invalidated!
772
773 // Now check every path from the entry block to the load for transparency.
774 // To do this, we perform a depth first search on the inverse CFG from the
775 // loading block.
776 for (BasicBlock *P : predecessors(BB)) {
777 for (BasicBlock *TranspBB : inverse_depth_first(G: P))
778 if (AAR.canBasicBlockModify(BB: *TranspBB, Loc))
779 return false;
780 }
781 }
782
783 // If the path from the entry of the function to each load is free of
784 // instructions that potentially invalidate the load, we can make the
785 // transformation!
786 return true;
787}
788
789/// Check if callers and callee agree on how promoted arguments would be
790/// passed.
791static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F,
792 const TargetTransformInfo &TTI) {
793 return all_of(Range: F.uses(), P: [&](const Use &U) {
794 CallBase *CB = dyn_cast<CallBase>(Val: U.getUser());
795 if (!CB)
796 return false;
797
798 const Function *Caller = CB->getCaller();
799 const Function *Callee = CB->getCalledFunction();
800 return TTI.areTypesABICompatible(Caller, Callee, Types);
801 });
802}
803
804/// PromoteArguments - This method checks the specified function to see if there
805/// are any promotable arguments and if it is safe to promote the function (for
806/// example, all callers are direct). If safe to promote some arguments, it
807/// calls the DoPromotion method.
808static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
809 unsigned MaxElements, bool IsRecursive) {
810 // Don't perform argument promotion for naked functions; otherwise we can end
811 // up removing parameters that are seemingly 'not used' as they are referred
812 // to in the assembly.
813 if (F->hasFnAttribute(Kind: Attribute::Naked))
814 return nullptr;
815
816 // Make sure that it is local to this module.
817 if (!F->hasLocalLinkage())
818 return nullptr;
819
820 // Ensure function definition is available for interprocedural analysis.
821 if (!F->isDefinitionExact())
822 return nullptr;
823
824 // Don't promote arguments for variadic functions. Adding, removing, or
825 // changing non-pack parameters can change the classification of pack
826 // parameters. Frontends encode that classification at the call site in the
827 // IR, while in the callee the classification is determined dynamically based
828 // on the number of registers consumed so far.
829 if (F->isVarArg())
830 return nullptr;
831
832 // Don't transform functions that receive inallocas, as the transformation may
833 // not be safe depending on calling convention.
834 if (F->getAttributes().hasAttrSomewhere(Kind: Attribute::InAlloca))
835 return nullptr;
836
837 // First check: see if there are any pointer arguments! If not, quick exit.
838 SmallVector<Argument *, 16> PointerArgs;
839 for (Argument &I : F->args())
840 if (I.getType()->isPointerTy())
841 PointerArgs.push_back(Elt: &I);
842 if (PointerArgs.empty())
843 return nullptr;
844
845 // Second check: make sure that all callers are direct callers. We can't
846 // transform functions that have indirect callers. Also see if the function
847 // is self-recursive.
848 for (Use &U : F->uses()) {
849 CallBase *CB = dyn_cast<CallBase>(Val: U.getUser());
850 // Must be a direct call.
851 if (CB == nullptr || !CB->isCallee(U: &U) ||
852 CB->getFunctionType() != F->getFunctionType())
853 return nullptr;
854
855 // Can't change signature of musttail callee
856 if (CB->isMustTailCall())
857 return nullptr;
858
859 if (CB->getFunction() == F)
860 IsRecursive = true;
861 }
862
863 // Can't change signature of musttail caller
864 // FIXME: Support promoting whole chain of musttail functions
865 for (BasicBlock &BB : *F)
866 if (BB.getTerminatingMustTailCall())
867 return nullptr;
868
869 const DataLayout &DL = F->getDataLayout();
870 auto &AAR = FAM.getResult<AAManager>(IR&: *F);
871 const auto &TTI = FAM.getResult<TargetIRAnalysis>(IR&: *F);
872
873 // Check to see which arguments are promotable. If an argument is promotable,
874 // add it to ArgsToPromote.
875 DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote;
876 unsigned NumArgsAfterPromote = F->getFunctionType()->getNumParams();
877 for (Argument *PtrArg : PointerArgs) {
878 // Replace sret attribute with noalias. This reduces register pressure by
879 // avoiding a register copy.
880 if (PtrArg->hasStructRetAttr()) {
881 unsigned ArgNo = PtrArg->getArgNo();
882 F->removeParamAttr(ArgNo, Kind: Attribute::StructRet);
883 F->addParamAttr(ArgNo, Kind: Attribute::NoAlias);
884 for (Use &U : F->uses()) {
885 CallBase &CB = cast<CallBase>(Val&: *U.getUser());
886 CB.removeParamAttr(ArgNo, Kind: Attribute::StructRet);
887 CB.addParamAttr(ArgNo, Kind: Attribute::NoAlias);
888 }
889 }
890
891 // If we can promote the pointer to its value.
892 SmallVector<OffsetAndArgPart, 4> ArgParts;
893
894 if (findArgParts(Arg: PtrArg, DL, AAR, MaxElements, IsRecursive, ArgPartsVec&: ArgParts,
895 FAM)) {
896 SmallVector<Type *, 4> Types;
897 for (const auto &Pair : ArgParts)
898 Types.push_back(Elt: Pair.second.Ty);
899
900 if (areTypesABICompatible(Types, F: *F, TTI)) {
901 NumArgsAfterPromote += ArgParts.size() - 1;
902 ArgsToPromote.insert(KV: {PtrArg, std::move(ArgParts)});
903 }
904 }
905 }
906
907 // No promotable pointer arguments.
908 if (ArgsToPromote.empty())
909 return nullptr;
910
911 if (NumArgsAfterPromote > TTI.getMaxNumArgs())
912 return nullptr;
913
914 return doPromotion(F, FAM, ArgsToPromote);
915}
916
917PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
918 CGSCCAnalysisManager &AM,
919 LazyCallGraph &CG,
920 CGSCCUpdateResult &UR) {
921 bool Changed = false, LocalChange;
922
923 // Iterate until we stop promoting from this SCC.
924 do {
925 LocalChange = false;
926
927 FunctionAnalysisManager &FAM =
928 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(IR&: C, ExtraArgs&: CG).getManager();
929
930 bool IsRecursive = C.size() > 1;
931 for (LazyCallGraph::Node &N : C) {
932 Function &OldF = N.getFunction();
933 Function *NewF = promoteArguments(F: &OldF, FAM, MaxElements, IsRecursive);
934 if (!NewF)
935 continue;
936 LocalChange = true;
937
938 // Directly substitute the functions in the call graph. Note that this
939 // requires the old function to be completely dead and completely
940 // replaced by the new function. It does no call graph updates, it merely
941 // swaps out the particular function mapped to a particular node in the
942 // graph.
943 C.getOuterRefSCC().replaceNodeFunction(N, NewF&: *NewF);
944 FAM.clear(IR&: OldF, Name: OldF.getName());
945 OldF.eraseFromParent();
946
947 PreservedAnalyses FuncPA;
948 FuncPA.preserveSet<CFGAnalyses>();
949 for (auto *U : NewF->users()) {
950 auto *UserF = cast<CallBase>(Val: U)->getFunction();
951 FAM.invalidate(IR&: *UserF, PA: FuncPA);
952 }
953 }
954
955 Changed |= LocalChange;
956 } while (LocalChange);
957
958 if (!Changed)
959 return PreservedAnalyses::all();
960
961 PreservedAnalyses PA;
962 // We've cleared out analyses for deleted functions.
963 PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
964 // We've manually invalidated analyses for functions we've modified.
965 PA.preserveSet<AllAnalysesOn<Function>>();
966 return PA;
967}
968