1//===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass promotes "by reference" arguments to be "by value" arguments. In
10// practice, this means looking for internal functions that have pointer
11// arguments. If it can prove, through the use of alias analysis, that an
12// argument is *only* loaded, then it can pass the value into the function
13// instead of the address of the value. This can cause recursive simplification
14// of code and lead to the elimination of allocas (especially in C++ template
15// code like the STL).
16//
17// This pass also handles aggregate arguments that are passed into a function,
18// scalarizing them if the elements of the aggregate are only loaded. Note that
19// by default it refuses to scalarize aggregates which would require passing in
20// more than three operands to the function, because passing thousands of
21// operands for a large array or structure is unprofitable! This limit can be
22// configured or disabled, however.
23//
24// Note that this transformation could also be done for arguments that are only
25// stored to (returning the value instead), but does not currently. This case
26// would be best handled when and if LLVM begins supporting multiple return
27// values from functions.
28//
29//===----------------------------------------------------------------------===//
30
31#include "llvm/Transforms/IPO/ArgumentPromotion.h"
32
33#include "llvm/ADT/DepthFirstIterator.h"
34#include "llvm/ADT/STLExtras.h"
35#include "llvm/ADT/ScopeExit.h"
36#include "llvm/ADT/SmallPtrSet.h"
37#include "llvm/ADT/SmallVector.h"
38#include "llvm/ADT/Statistic.h"
39#include "llvm/ADT/Twine.h"
40#include "llvm/Analysis/AssumptionCache.h"
41#include "llvm/Analysis/BasicAliasAnalysis.h"
42#include "llvm/Analysis/CallGraph.h"
43#include "llvm/Analysis/Loads.h"
44#include "llvm/Analysis/MemoryLocation.h"
45#include "llvm/Analysis/TargetTransformInfo.h"
46#include "llvm/Analysis/ValueTracking.h"
47#include "llvm/IR/Argument.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/BasicBlock.h"
50#include "llvm/IR/CFG.h"
51#include "llvm/IR/Constants.h"
52#include "llvm/IR/DataLayout.h"
53#include "llvm/IR/DerivedTypes.h"
54#include "llvm/IR/Dominators.h"
55#include "llvm/IR/Function.h"
56#include "llvm/IR/IRBuilder.h"
57#include "llvm/IR/InstrTypes.h"
58#include "llvm/IR/Instruction.h"
59#include "llvm/IR/Instructions.h"
60#include "llvm/IR/Metadata.h"
61#include "llvm/IR/Module.h"
62#include "llvm/IR/NoFolder.h"
63#include "llvm/IR/PassManager.h"
64#include "llvm/IR/Type.h"
65#include "llvm/IR/Use.h"
66#include "llvm/IR/User.h"
67#include "llvm/IR/Value.h"
68#include "llvm/Support/Casting.h"
69#include "llvm/Support/Debug.h"
70#include "llvm/Support/raw_ostream.h"
71#include "llvm/Transforms/Utils/Local.h"
72#include "llvm/Transforms/Utils/PromoteMemToReg.h"
73#include <algorithm>
74#include <cassert>
75#include <cstdint>
76#include <utility>
77#include <vector>
78
79using namespace llvm;
80
81#define DEBUG_TYPE "argpromotion"
82
83STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted");
84STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated");
85
86namespace {
87
88struct ArgPart {
89 Type *Ty;
90 Align Alignment;
91 /// A representative guaranteed-executed load or store instruction for use by
92 /// metadata transfer.
93 Instruction *MustExecInstr;
94};
95
96using OffsetAndArgPart = std::pair<int64_t, ArgPart>;
97
98} // end anonymous namespace
99
100static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL,
101 Value *Ptr, Type *ResElemTy, int64_t Offset) {
102 if (Offset != 0) {
103 APInt APOffset(DL.getIndexTypeSizeInBits(Ty: Ptr->getType()), Offset);
104 Ptr = IRB.CreatePtrAdd(Ptr, Offset: IRB.getInt(AI: APOffset));
105 }
106 return Ptr;
107}
108
109/// DoPromotion - This method actually performs the promotion of the specified
110/// arguments, and returns the new function. At this point, we know that it's
111/// safe to do so.
112static Function *
113doPromotion(Function *F, FunctionAnalysisManager &FAM,
114 const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>>
115 &ArgsToPromote) {
116 // Start by computing a new prototype for the function, which is the same as
117 // the old function, but has modified arguments.
118 FunctionType *FTy = F->getFunctionType();
119 std::vector<Type *> Params;
120
121 // Attribute - Keep track of the parameter attributes for the arguments
122 // that we are *not* promoting. For the ones that we do promote, the parameter
123 // attributes are lost
124 SmallVector<AttributeSet, 8> ArgAttrVec;
125 // Mapping from old to new argument indices. -1 for promoted or removed
126 // arguments.
127 SmallVector<unsigned> NewArgIndices;
128 AttributeList PAL = F->getAttributes();
129
130 // First, determine the new argument list
131 unsigned ArgNo = 0, NewArgNo = 0;
132 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
133 ++I, ++ArgNo) {
134 if (!ArgsToPromote.count(Val: &*I)) {
135 // Unchanged argument
136 Params.push_back(x: I->getType());
137 ArgAttrVec.push_back(Elt: PAL.getParamAttrs(ArgNo));
138 NewArgIndices.push_back(Elt: NewArgNo++);
139 } else if (I->use_empty()) {
140 // Dead argument (which are always marked as promotable)
141 ++NumArgumentsDead;
142 NewArgIndices.push_back(Elt: (unsigned)-1);
143 } else {
144 const auto &ArgParts = ArgsToPromote.find(Val: &*I)->second;
145 for (const auto &Pair : ArgParts) {
146 Params.push_back(x: Pair.second.Ty);
147 ArgAttrVec.push_back(Elt: AttributeSet());
148 }
149 ++NumArgumentsPromoted;
150 NewArgIndices.push_back(Elt: (unsigned)-1);
151 NewArgNo += ArgParts.size();
152 }
153 }
154
155 Type *RetTy = FTy->getReturnType();
156
157 // Construct the new function type using the new arguments.
158 FunctionType *NFTy = FunctionType::get(Result: RetTy, Params, isVarArg: FTy->isVarArg());
159
160 // Create the new function body and insert it into the module.
161 Function *NF = Function::Create(Ty: NFTy, Linkage: F->getLinkage(), AddrSpace: F->getAddressSpace(),
162 N: F->getName());
163 NF->copyAttributesFrom(Src: F);
164 NF->copyMetadata(Src: F, Offset: 0);
165 NF->setIsNewDbgInfoFormat(F->IsNewDbgInfoFormat);
166
167 // The new function will have the !dbg metadata copied from the original
168 // function. The original function may not be deleted, and dbg metadata need
169 // to be unique, so we need to drop it.
170 F->setSubprogram(nullptr);
171
172 LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
173 << "From: " << *F);
174
175 uint64_t LargestVectorWidth = 0;
176 for (auto *I : Params)
177 if (auto *VT = dyn_cast<llvm::VectorType>(Val: I))
178 LargestVectorWidth = std::max(
179 a: LargestVectorWidth, b: VT->getPrimitiveSizeInBits().getKnownMinValue());
180
181 // Recompute the parameter attributes list based on the new arguments for
182 // the function.
183 NF->setAttributes(AttributeList::get(C&: F->getContext(), FnAttrs: PAL.getFnAttrs(),
184 RetAttrs: PAL.getRetAttrs(), ArgAttrs: ArgAttrVec));
185
186 // Remap argument indices in allocsize attribute.
187 if (auto AllocSize = NF->getAttributes().getFnAttrs().getAllocSizeArgs()) {
188 unsigned Arg1 = NewArgIndices[AllocSize->first];
189 assert(Arg1 != (unsigned)-1 && "allocsize cannot be promoted argument");
190 std::optional<unsigned> Arg2;
191 if (AllocSize->second) {
192 Arg2 = NewArgIndices[*AllocSize->second];
193 assert(Arg2 != (unsigned)-1 && "allocsize cannot be promoted argument");
194 }
195 NF->addFnAttr(Attr: Attribute::getWithAllocSizeArgs(Context&: F->getContext(), ElemSizeArg: Arg1, NumElemsArg: Arg2));
196 }
197
198 AttributeFuncs::updateMinLegalVectorWidthAttr(Fn&: *NF, Width: LargestVectorWidth);
199 ArgAttrVec.clear();
200
201 F->getParent()->getFunctionList().insert(where: F->getIterator(), New: NF);
202 NF->takeName(V: F);
203
204 // Loop over all the callers of the function, transforming the call sites to
205 // pass in the loaded pointers.
206 SmallVector<Value *, 16> Args;
207 const DataLayout &DL = F->getDataLayout();
208 SmallVector<WeakTrackingVH, 16> DeadArgs;
209
210 while (!F->use_empty()) {
211 CallBase &CB = cast<CallBase>(Val&: *F->user_back());
212 assert(CB.getCalledFunction() == F);
213 const AttributeList &CallPAL = CB.getAttributes();
214 IRBuilder<NoFolder> IRB(&CB);
215
216 // Loop over the operands, inserting GEP and loads in the caller as
217 // appropriate.
218 auto *AI = CB.arg_begin();
219 ArgNo = 0;
220 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
221 ++I, ++AI, ++ArgNo) {
222 if (!ArgsToPromote.count(Val: &*I)) {
223 Args.push_back(Elt: *AI); // Unmodified argument
224 ArgAttrVec.push_back(Elt: CallPAL.getParamAttrs(ArgNo));
225 } else if (!I->use_empty()) {
226 Value *V = *AI;
227 const auto &ArgParts = ArgsToPromote.find(Val: &*I)->second;
228 for (const auto &Pair : ArgParts) {
229 LoadInst *LI = IRB.CreateAlignedLoad(
230 Ty: Pair.second.Ty,
231 Ptr: createByteGEP(IRB, DL, Ptr: V, ResElemTy: Pair.second.Ty, Offset: Pair.first),
232 Align: Pair.second.Alignment, Name: V->getName() + ".val");
233 if (Pair.second.MustExecInstr) {
234 LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata());
235 LI->copyMetadata(SrcInst: *Pair.second.MustExecInstr,
236 WL: {LLVMContext::MD_dereferenceable,
237 LLVMContext::MD_dereferenceable_or_null,
238 LLVMContext::MD_noundef,
239 LLVMContext::MD_nontemporal});
240 // Only transfer poison-generating metadata if we also have
241 // !noundef.
242 // TODO: Without !noundef, we could merge this metadata across
243 // all promoted loads.
244 if (LI->hasMetadata(KindID: LLVMContext::MD_noundef))
245 LI->copyMetadata(SrcInst: *Pair.second.MustExecInstr,
246 WL: {LLVMContext::MD_range, LLVMContext::MD_nonnull,
247 LLVMContext::MD_align});
248 }
249 Args.push_back(Elt: LI);
250 ArgAttrVec.push_back(Elt: AttributeSet());
251 }
252 } else {
253 assert(ArgsToPromote.count(&*I) && I->use_empty());
254 DeadArgs.emplace_back(Args: AI->get());
255 }
256 }
257
258 // Push any varargs arguments on the list.
259 for (; AI != CB.arg_end(); ++AI, ++ArgNo) {
260 Args.push_back(Elt: *AI);
261 ArgAttrVec.push_back(Elt: CallPAL.getParamAttrs(ArgNo));
262 }
263
264 SmallVector<OperandBundleDef, 1> OpBundles;
265 CB.getOperandBundlesAsDefs(Defs&: OpBundles);
266
267 CallBase *NewCS = nullptr;
268 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) {
269 NewCS = InvokeInst::Create(Func: NF, IfNormal: II->getNormalDest(), IfException: II->getUnwindDest(),
270 Args, Bundles: OpBundles, NameStr: "", InsertBefore: CB.getIterator());
271 } else {
272 auto *NewCall =
273 CallInst::Create(Func: NF, Args, Bundles: OpBundles, NameStr: "", InsertBefore: CB.getIterator());
274 NewCall->setTailCallKind(cast<CallInst>(Val: &CB)->getTailCallKind());
275 NewCS = NewCall;
276 }
277 NewCS->setCallingConv(CB.getCallingConv());
278 NewCS->setAttributes(AttributeList::get(C&: F->getContext(),
279 FnAttrs: CallPAL.getFnAttrs(),
280 RetAttrs: CallPAL.getRetAttrs(), ArgAttrs: ArgAttrVec));
281 NewCS->copyMetadata(SrcInst: CB, WL: {LLVMContext::MD_prof, LLVMContext::MD_dbg});
282 Args.clear();
283 ArgAttrVec.clear();
284
285 AttributeFuncs::updateMinLegalVectorWidthAttr(Fn&: *CB.getCaller(),
286 Width: LargestVectorWidth);
287
288 if (!CB.use_empty()) {
289 CB.replaceAllUsesWith(V: NewCS);
290 NewCS->takeName(V: &CB);
291 }
292
293 // Finally, remove the old call from the program, reducing the use-count of
294 // F.
295 CB.eraseFromParent();
296 }
297
298 RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts&: DeadArgs);
299
300 // Since we have now created the new function, splice the body of the old
301 // function right into the new function, leaving the old rotting hulk of the
302 // function empty.
303 NF->splice(ToIt: NF->begin(), FromF: F);
304
305 // We will collect all the new created allocas to promote them into registers
306 // after the following loop
307 SmallVector<AllocaInst *, 4> Allocas;
308
309 // Loop over the argument list, transferring uses of the old arguments over to
310 // the new arguments, also transferring over the names as well.
311 Function::arg_iterator I2 = NF->arg_begin();
312 for (Argument &Arg : F->args()) {
313 if (!ArgsToPromote.count(Val: &Arg)) {
314 // If this is an unmodified argument, move the name and users over to the
315 // new version.
316 Arg.replaceAllUsesWith(V: &*I2);
317 I2->takeName(V: &Arg);
318 ++I2;
319 continue;
320 }
321
322 // There potentially are metadata uses for things like llvm.dbg.value.
323 // Replace them with undef, after handling the other regular uses.
324 auto RauwUndefMetadata = make_scope_exit(
325 F: [&]() { Arg.replaceAllUsesWith(V: UndefValue::get(T: Arg.getType())); });
326
327 if (Arg.use_empty())
328 continue;
329
330 // Otherwise, if we promoted this argument, we have to create an alloca in
331 // the callee for every promotable part and store each of the new incoming
332 // arguments into the corresponding alloca, what lets the old code (the
333 // store instructions if they are allowed especially) a chance to work as
334 // before.
335 assert(Arg.getType()->isPointerTy() &&
336 "Only arguments with a pointer type are promotable");
337
338 IRBuilder<NoFolder> IRB(&NF->begin()->front());
339
340 // Add only the promoted elements, so parts from ArgsToPromote
341 SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca;
342 for (const auto &Pair : ArgsToPromote.find(Val: &Arg)->second) {
343 int64_t Offset = Pair.first;
344 const ArgPart &Part = Pair.second;
345
346 Argument *NewArg = I2++;
347 NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val");
348
349 AllocaInst *NewAlloca = IRB.CreateAlloca(
350 Ty: Part.Ty, ArraySize: nullptr, Name: Arg.getName() + "." + Twine(Offset) + ".allc");
351 NewAlloca->setAlignment(Pair.second.Alignment);
352 IRB.CreateAlignedStore(Val: NewArg, Ptr: NewAlloca, Align: Pair.second.Alignment);
353
354 // Collect the alloca to retarget the users to
355 OffsetToAlloca.insert(KV: {Offset, NewAlloca});
356 }
357
358 auto GetAlloca = [&](Value *Ptr) {
359 APInt Offset(DL.getIndexTypeSizeInBits(Ty: Ptr->getType()), 0);
360 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
361 /* AllowNonInbounds */ true);
362 assert(Ptr == &Arg && "Not constant offset from arg?");
363 return OffsetToAlloca.lookup(Val: Offset.getSExtValue());
364 };
365
366 // Cleanup the code from the dead instructions: GEPs and BitCasts in between
367 // the original argument and its users: loads and stores. Retarget every
368 // user to the new created alloca.
369 SmallVector<Value *, 16> Worklist;
370 SmallVector<Instruction *, 16> DeadInsts;
371 append_range(C&: Worklist, R: Arg.users());
372 while (!Worklist.empty()) {
373 Value *V = Worklist.pop_back_val();
374 if (isa<BitCastInst>(Val: V) || isa<GetElementPtrInst>(Val: V)) {
375 DeadInsts.push_back(Elt: cast<Instruction>(Val: V));
376 append_range(C&: Worklist, R: V->users());
377 continue;
378 }
379
380 if (auto *LI = dyn_cast<LoadInst>(Val: V)) {
381 Value *Ptr = LI->getPointerOperand();
382 LI->setOperand(i_nocapture: LoadInst::getPointerOperandIndex(), Val_nocapture: GetAlloca(Ptr));
383 continue;
384 }
385
386 if (auto *SI = dyn_cast<StoreInst>(Val: V)) {
387 assert(!SI->isVolatile() && "Volatile operations can't be promoted.");
388 Value *Ptr = SI->getPointerOperand();
389 SI->setOperand(i_nocapture: StoreInst::getPointerOperandIndex(), Val_nocapture: GetAlloca(Ptr));
390 continue;
391 }
392
393 llvm_unreachable("Unexpected user");
394 }
395
396 for (Instruction *I : DeadInsts) {
397 I->replaceAllUsesWith(V: PoisonValue::get(T: I->getType()));
398 I->eraseFromParent();
399 }
400
401 // Collect the allocas for promotion
402 for (const auto &Pair : OffsetToAlloca) {
403 assert(isAllocaPromotable(Pair.second) &&
404 "By design, only promotable allocas should be produced.");
405 Allocas.push_back(Elt: Pair.second);
406 }
407 }
408
409 LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size()
410 << " alloca(s) are promotable by Mem2Reg\n");
411
412 if (!Allocas.empty()) {
413 // And we are able to call the `promoteMemoryToRegister()` function.
414 // Our earlier checks have ensured that PromoteMemToReg() will
415 // succeed.
416 auto &DT = FAM.getResult<DominatorTreeAnalysis>(IR&: *NF);
417 auto &AC = FAM.getResult<AssumptionAnalysis>(IR&: *NF);
418 PromoteMemToReg(Allocas, DT, AC: &AC);
419 }
420
421 return NF;
422}
423
424/// Return true if we can prove that all callees pass in a valid pointer for the
425/// specified function argument.
426static bool allCallersPassValidPointerForArgument(
427 Argument *Arg, SmallPtrSetImpl<CallBase *> &RecursiveCalls,
428 Align NeededAlign, uint64_t NeededDerefBytes) {
429 Function *Callee = Arg->getParent();
430 const DataLayout &DL = Callee->getDataLayout();
431 APInt Bytes(64, NeededDerefBytes);
432
433 // Check if the argument itself is marked dereferenceable and aligned.
434 if (isDereferenceableAndAlignedPointer(V: Arg, Alignment: NeededAlign, Size: Bytes, DL))
435 return true;
436
437 // Look at all call sites of the function. At this point we know we only have
438 // direct callees.
439 return all_of(Range: Callee->users(), P: [&](User *U) {
440 CallBase &CB = cast<CallBase>(Val&: *U);
441 // In case of functions with recursive calls, this check
442 // (isDereferenceableAndAlignedPointer) will fail when it tries to look at
443 // the first caller of this function. The caller may or may not have a load,
444 // incase it doesn't load the pointer being passed, this check will fail.
445 // So, it's safe to skip the check incase we know that we are dealing with a
446 // recursive call. For example we have a IR given below.
447 //
448 // def fun(ptr %a) {
449 // ...
450 // %loadres = load i32, ptr %a, align 4
451 // %res = call i32 @fun(ptr %a)
452 // ...
453 // }
454 //
455 // def bar(ptr %x) {
456 // ...
457 // %resbar = call i32 @fun(ptr %x)
458 // ...
459 // }
460 //
461 // Since we record processed recursive calls, we check if the current
462 // CallBase has been processed before. If yes it means that it is a
463 // recursive call and we can skip the check just for this call. So, just
464 // return true.
465 if (RecursiveCalls.contains(Ptr: &CB))
466 return true;
467
468 return isDereferenceableAndAlignedPointer(V: CB.getArgOperand(i: Arg->getArgNo()),
469 Alignment: NeededAlign, Size: Bytes, DL);
470 });
471}
472
473/// Determine that this argument is safe to promote, and find the argument
474/// parts it can be promoted into.
475static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
476 unsigned MaxElements, bool IsRecursive,
477 SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) {
478 // Quick exit for unused arguments
479 if (Arg->use_empty())
480 return true;
481
482 // We can only promote this argument if all the uses are loads at known
483 // offsets.
484 //
485 // Promoting the argument causes it to be loaded in the caller
486 // unconditionally. This is only safe if we can prove that either the load
487 // would have happened in the callee anyway (ie, there is a load in the entry
488 // block) or the pointer passed in at every call site is guaranteed to be
489 // valid.
490 // In the former case, invalid loads can happen, but would have happened
491 // anyway, in the latter case, invalid loads won't happen. This prevents us
492 // from introducing an invalid load that wouldn't have happened in the
493 // original code.
494
495 SmallDenseMap<int64_t, ArgPart, 4> ArgParts;
496 Align NeededAlign(1);
497 uint64_t NeededDerefBytes = 0;
498
499 // And if this is a byval argument we also allow to have store instructions.
500 // Only handle in such way arguments with specified alignment;
501 // if it's unspecified, the actual alignment of the argument is
502 // target-specific.
503 bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign();
504
505 // An end user of a pointer argument is a load or store instruction.
506 // Returns std::nullopt if this load or store is not based on the argument.
507 // Return true if we can promote the instruction, false otherwise.
508 auto HandleEndUser = [&](auto *I, Type *Ty,
509 bool GuaranteedToExecute) -> std::optional<bool> {
510 // Don't promote volatile or atomic instructions.
511 if (!I->isSimple())
512 return false;
513
514 Value *Ptr = I->getPointerOperand();
515 APInt Offset(DL.getIndexTypeSizeInBits(Ty: Ptr->getType()), 0);
516 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
517 /* AllowNonInbounds */ true);
518 if (Ptr != Arg)
519 return std::nullopt;
520
521 if (Offset.getSignificantBits() >= 64)
522 return false;
523
524 TypeSize Size = DL.getTypeStoreSize(Ty);
525 // Don't try to promote scalable types.
526 if (Size.isScalable())
527 return false;
528
529 // If this is a recursive function and one of the types is a pointer,
530 // then promoting it might lead to recursive promotion.
531 if (IsRecursive && Ty->isPointerTy())
532 return false;
533
534 int64_t Off = Offset.getSExtValue();
535 auto Pair = ArgParts.try_emplace(
536 Key: Off, Args: ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr});
537 ArgPart &Part = Pair.first->second;
538 bool OffsetNotSeenBefore = Pair.second;
539
540 // We limit promotion to only promoting up to a fixed number of elements of
541 // the aggregate.
542 if (MaxElements > 0 && ArgParts.size() > MaxElements) {
543 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
544 << "more than " << MaxElements << " parts\n");
545 return false;
546 }
547
548 // For now, we only support loading/storing one specific type at a given
549 // offset.
550 if (Part.Ty != Ty) {
551 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
552 << "accessed as both " << *Part.Ty << " and " << *Ty
553 << " at offset " << Off << "\n");
554 return false;
555 }
556
557 // If this instruction is not guaranteed to execute, and we haven't seen a
558 // load or store at this offset before (or it had lower alignment), then we
559 // need to remember that requirement.
560 // Note that skipping instructions of previously seen offsets is only
561 // correct because we only allow a single type for a given offset, which
562 // also means that the number of accessed bytes will be the same.
563 if (!GuaranteedToExecute &&
564 (OffsetNotSeenBefore || Part.Alignment < I->getAlign())) {
565 // We won't be able to prove dereferenceability for negative offsets.
566 if (Off < 0)
567 return false;
568
569 // If the offset is not aligned, an aligned base pointer won't help.
570 if (!isAligned(I->getAlign(), Off))
571 return false;
572
573 NeededDerefBytes = std::max(a: NeededDerefBytes, b: Off + Size.getFixedValue());
574 NeededAlign = std::max(NeededAlign, I->getAlign());
575 }
576
577 Part.Alignment = std::max(Part.Alignment, I->getAlign());
578 return true;
579 };
580
581 // Look for loads and stores that are guaranteed to execute on entry.
582 for (Instruction &I : Arg->getParent()->getEntryBlock()) {
583 std::optional<bool> Res{};
584 if (LoadInst *LI = dyn_cast<LoadInst>(Val: &I))
585 Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true);
586 else if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I))
587 Res = HandleEndUser(SI, SI->getValueOperand()->getType(),
588 /* GuaranteedToExecute */ true);
589 if (Res && !*Res)
590 return false;
591
592 if (!isGuaranteedToTransferExecutionToSuccessor(I: &I))
593 break;
594 }
595
596 // Now look at all loads of the argument. Remember the load instructions
597 // for the aliasing check below.
598 SmallVector<const Use *, 16> Worklist;
599 SmallPtrSet<const Use *, 16> Visited;
600 SmallVector<LoadInst *, 16> Loads;
601 SmallPtrSet<CallBase *, 4> RecursiveCalls;
602 auto AppendUses = [&](const Value *V) {
603 for (const Use &U : V->uses())
604 if (Visited.insert(Ptr: &U).second)
605 Worklist.push_back(Elt: &U);
606 };
607 AppendUses(Arg);
608 while (!Worklist.empty()) {
609 const Use *U = Worklist.pop_back_val();
610 Value *V = U->getUser();
611 if (isa<BitCastInst>(Val: V)) {
612 AppendUses(V);
613 continue;
614 }
615
616 if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: V)) {
617 if (!GEP->hasAllConstantIndices())
618 return false;
619 AppendUses(V);
620 continue;
621 }
622
623 if (auto *LI = dyn_cast<LoadInst>(Val: V)) {
624 if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false))
625 return false;
626 Loads.push_back(Elt: LI);
627 continue;
628 }
629
630 // Stores are allowed for byval arguments
631 auto *SI = dyn_cast<StoreInst>(Val: V);
632 if (AreStoresAllowed && SI &&
633 U->getOperandNo() == StoreInst::getPointerOperandIndex()) {
634 if (!*HandleEndUser(SI, SI->getValueOperand()->getType(),
635 /* GuaranteedToExecute */ false))
636 return false;
637 continue;
638 // Only stores TO the argument is allowed, all the other stores are
639 // unknown users
640 }
641
642 auto *CB = dyn_cast<CallBase>(Val: V);
643 Value *PtrArg = U->get();
644 if (CB && CB->getCalledFunction() == CB->getFunction()) {
645 if (PtrArg != Arg) {
646 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
647 << "pointer offset is not equal to zero\n");
648 return false;
649 }
650
651 unsigned int ArgNo = Arg->getArgNo();
652 if (U->getOperandNo() != ArgNo) {
653 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
654 << "arg position is different in callee\n");
655 return false;
656 }
657
658 // We limit promotion to only promoting up to a fixed number of elements
659 // of the aggregate.
660 if (MaxElements > 0 && ArgParts.size() > MaxElements) {
661 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
662 << "more than " << MaxElements << " parts\n");
663 return false;
664 }
665
666 RecursiveCalls.insert(Ptr: CB);
667 continue;
668 }
669 // Unknown user.
670 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
671 << "unknown user " << *V << "\n");
672 return false;
673 }
674
675 if (NeededDerefBytes || NeededAlign > 1) {
676 // Try to prove a required deref / aligned requirement.
677 if (!allCallersPassValidPointerForArgument(Arg, RecursiveCalls, NeededAlign,
678 NeededDerefBytes)) {
679 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
680 << "not dereferenceable or aligned\n");
681 return false;
682 }
683 }
684
685 if (ArgParts.empty())
686 return true; // No users, this is a dead argument.
687
688 // Sort parts by offset.
689 append_range(C&: ArgPartsVec, R&: ArgParts);
690 sort(C&: ArgPartsVec, Comp: llvm::less_first());
691
692 // Make sure the parts are non-overlapping.
693 int64_t Offset = ArgPartsVec[0].first;
694 for (const auto &Pair : ArgPartsVec) {
695 if (Pair.first < Offset)
696 return false; // Overlap with previous part.
697
698 Offset = Pair.first + DL.getTypeStoreSize(Ty: Pair.second.Ty);
699 }
700
701 // If store instructions are allowed, the path from the entry of the function
702 // to each load may be not free of instructions that potentially invalidate
703 // the load, and this is an admissible situation.
704 if (AreStoresAllowed)
705 return true;
706
707 // Okay, now we know that the argument is only used by load instructions, and
708 // it is safe to unconditionally perform all of them. Use alias analysis to
709 // check to see if the pointer is guaranteed to not be modified from entry of
710 // the function to each of the load instructions.
711
712 for (LoadInst *Load : Loads) {
713 // Check to see if the load is invalidated from the start of the block to
714 // the load itself.
715 BasicBlock *BB = Load->getParent();
716
717 MemoryLocation Loc = MemoryLocation::get(LI: Load);
718 if (AAR.canInstructionRangeModRef(I1: BB->front(), I2: *Load, Loc, Mode: ModRefInfo::Mod))
719 return false; // Pointer is invalidated!
720
721 // Now check every path from the entry block to the load for transparency.
722 // To do this, we perform a depth first search on the inverse CFG from the
723 // loading block.
724 for (BasicBlock *P : predecessors(BB)) {
725 for (BasicBlock *TranspBB : inverse_depth_first(G: P))
726 if (AAR.canBasicBlockModify(BB: *TranspBB, Loc))
727 return false;
728 }
729 }
730
731 // If the path from the entry of the function to each load is free of
732 // instructions that potentially invalidate the load, we can make the
733 // transformation!
734 return true;
735}
736
737/// Check if callers and callee agree on how promoted arguments would be
738/// passed.
739static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F,
740 const TargetTransformInfo &TTI) {
741 return all_of(Range: F.uses(), P: [&](const Use &U) {
742 CallBase *CB = dyn_cast<CallBase>(Val: U.getUser());
743 if (!CB)
744 return false;
745
746 const Function *Caller = CB->getCaller();
747 const Function *Callee = CB->getCalledFunction();
748 return TTI.areTypesABICompatible(Caller, Callee, Types);
749 });
750}
751
752/// PromoteArguments - This method checks the specified function to see if there
753/// are any promotable arguments and if it is safe to promote the function (for
754/// example, all callers are direct). If safe to promote some arguments, it
755/// calls the DoPromotion method.
756static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
757 unsigned MaxElements, bool IsRecursive) {
758 // Don't perform argument promotion for naked functions; otherwise we can end
759 // up removing parameters that are seemingly 'not used' as they are referred
760 // to in the assembly.
761 if (F->hasFnAttribute(Kind: Attribute::Naked))
762 return nullptr;
763
764 // Make sure that it is local to this module.
765 if (!F->hasLocalLinkage())
766 return nullptr;
767
768 // Don't promote arguments for variadic functions. Adding, removing, or
769 // changing non-pack parameters can change the classification of pack
770 // parameters. Frontends encode that classification at the call site in the
771 // IR, while in the callee the classification is determined dynamically based
772 // on the number of registers consumed so far.
773 if (F->isVarArg())
774 return nullptr;
775
776 // Don't transform functions that receive inallocas, as the transformation may
777 // not be safe depending on calling convention.
778 if (F->getAttributes().hasAttrSomewhere(Kind: Attribute::InAlloca))
779 return nullptr;
780
781 // First check: see if there are any pointer arguments! If not, quick exit.
782 SmallVector<Argument *, 16> PointerArgs;
783 for (Argument &I : F->args())
784 if (I.getType()->isPointerTy())
785 PointerArgs.push_back(Elt: &I);
786 if (PointerArgs.empty())
787 return nullptr;
788
789 // Second check: make sure that all callers are direct callers. We can't
790 // transform functions that have indirect callers. Also see if the function
791 // is self-recursive.
792 for (Use &U : F->uses()) {
793 CallBase *CB = dyn_cast<CallBase>(Val: U.getUser());
794 // Must be a direct call.
795 if (CB == nullptr || !CB->isCallee(U: &U) ||
796 CB->getFunctionType() != F->getFunctionType())
797 return nullptr;
798
799 // Can't change signature of musttail callee
800 if (CB->isMustTailCall())
801 return nullptr;
802
803 if (CB->getFunction() == F)
804 IsRecursive = true;
805 }
806
807 // Can't change signature of musttail caller
808 // FIXME: Support promoting whole chain of musttail functions
809 for (BasicBlock &BB : *F)
810 if (BB.getTerminatingMustTailCall())
811 return nullptr;
812
813 const DataLayout &DL = F->getDataLayout();
814 auto &AAR = FAM.getResult<AAManager>(IR&: *F);
815 const auto &TTI = FAM.getResult<TargetIRAnalysis>(IR&: *F);
816
817 // Check to see which arguments are promotable. If an argument is promotable,
818 // add it to ArgsToPromote.
819 DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote;
820 unsigned NumArgsAfterPromote = F->getFunctionType()->getNumParams();
821 for (Argument *PtrArg : PointerArgs) {
822 // Replace sret attribute with noalias. This reduces register pressure by
823 // avoiding a register copy.
824 if (PtrArg->hasStructRetAttr()) {
825 unsigned ArgNo = PtrArg->getArgNo();
826 F->removeParamAttr(ArgNo, Kind: Attribute::StructRet);
827 F->addParamAttr(ArgNo, Kind: Attribute::NoAlias);
828 for (Use &U : F->uses()) {
829 CallBase &CB = cast<CallBase>(Val&: *U.getUser());
830 CB.removeParamAttr(ArgNo, Kind: Attribute::StructRet);
831 CB.addParamAttr(ArgNo, Kind: Attribute::NoAlias);
832 }
833 }
834
835 // If we can promote the pointer to its value.
836 SmallVector<OffsetAndArgPart, 4> ArgParts;
837
838 if (findArgParts(Arg: PtrArg, DL, AAR, MaxElements, IsRecursive, ArgPartsVec&: ArgParts)) {
839 SmallVector<Type *, 4> Types;
840 for (const auto &Pair : ArgParts)
841 Types.push_back(Elt: Pair.second.Ty);
842
843 if (areTypesABICompatible(Types, F: *F, TTI)) {
844 NumArgsAfterPromote += ArgParts.size() - 1;
845 ArgsToPromote.insert(KV: {PtrArg, std::move(ArgParts)});
846 }
847 }
848 }
849
850 // No promotable pointer arguments.
851 if (ArgsToPromote.empty())
852 return nullptr;
853
854 if (NumArgsAfterPromote > TTI.getMaxNumArgs())
855 return nullptr;
856
857 return doPromotion(F, FAM, ArgsToPromote);
858}
859
860PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
861 CGSCCAnalysisManager &AM,
862 LazyCallGraph &CG,
863 CGSCCUpdateResult &UR) {
864 bool Changed = false, LocalChange;
865
866 // Iterate until we stop promoting from this SCC.
867 do {
868 LocalChange = false;
869
870 FunctionAnalysisManager &FAM =
871 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(IR&: C, ExtraArgs&: CG).getManager();
872
873 bool IsRecursive = C.size() > 1;
874 for (LazyCallGraph::Node &N : C) {
875 Function &OldF = N.getFunction();
876 Function *NewF = promoteArguments(F: &OldF, FAM, MaxElements, IsRecursive);
877 if (!NewF)
878 continue;
879 LocalChange = true;
880
881 // Directly substitute the functions in the call graph. Note that this
882 // requires the old function to be completely dead and completely
883 // replaced by the new function. It does no call graph updates, it merely
884 // swaps out the particular function mapped to a particular node in the
885 // graph.
886 C.getOuterRefSCC().replaceNodeFunction(N, NewF&: *NewF);
887 FAM.clear(IR&: OldF, Name: OldF.getName());
888 OldF.eraseFromParent();
889
890 PreservedAnalyses FuncPA;
891 FuncPA.preserveSet<CFGAnalyses>();
892 for (auto *U : NewF->users()) {
893 auto *UserF = cast<CallBase>(Val: U)->getFunction();
894 FAM.invalidate(IR&: *UserF, PA: FuncPA);
895 }
896 }
897
898 Changed |= LocalChange;
899 } while (LocalChange);
900
901 if (!Changed)
902 return PreservedAnalyses::all();
903
904 PreservedAnalyses PA;
905 // We've cleared out analyses for deleted functions.
906 PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
907 // We've manually invalidated analyses for functions we've modified.
908 PA.preserveSet<AllAnalysesOn<Function>>();
909 return PA;
910}
911