ArgumentPromotion.cpp source code [llvm_projects/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp]

1	//===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass promotes "by reference" arguments to be "by value" arguments. In
10	// practice, this means looking for internal functions that have pointer
11	// arguments. If it can prove, through the use of alias analysis, that an
12	// argument is only* loaded, then it can pass the value into the function*
13	// instead of the address of the value. This can cause recursive simplification
14	// of code and lead to the elimination of allocas (especially in C++ template
15	// code like the STL).
16	//
17	// This pass also handles aggregate arguments that are passed into a function,
18	// scalarizing them if the elements of the aggregate are only loaded. Note that
19	// by default it refuses to scalarize aggregates which would require passing in
20	// more than three operands to the function, because passing thousands of
21	// operands for a large array or structure is unprofitable! This limit can be
22	// configured or disabled, however.
23	//
24	// Note that this transformation could also be done for arguments that are only
25	// stored to (returning the value instead), but does not currently. This case
26	// would be best handled when and if LLVM begins supporting multiple return
27	// values from functions.
28	//
29	//===----------------------------------------------------------------------===//
30
31	#include "llvm/Transforms/IPO/ArgumentPromotion.h"
32
33	#include "llvm/ADT/DepthFirstIterator.h"
34	#include "llvm/ADT/STLExtras.h"
35	#include "llvm/ADT/ScopeExit.h"
36	#include "llvm/ADT/SmallPtrSet.h"
37	#include "llvm/ADT/SmallVector.h"
38	#include "llvm/ADT/Statistic.h"
39	#include "llvm/ADT/Twine.h"
40	#include "llvm/Analysis/AssumptionCache.h"
41	#include "llvm/Analysis/BasicAliasAnalysis.h"
42	#include "llvm/Analysis/CallGraph.h"
43	#include "llvm/Analysis/Loads.h"
44	#include "llvm/Analysis/MemoryLocation.h"
45	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
46	#include "llvm/Analysis/TargetTransformInfo.h"
47	#include "llvm/Analysis/ValueTracking.h"
48	#include "llvm/IR/Argument.h"
49	#include "llvm/IR/Attributes.h"
50	#include "llvm/IR/BasicBlock.h"
51	#include "llvm/IR/CFG.h"
52	#include "llvm/IR/Constants.h"
53	#include "llvm/IR/DIBuilder.h"
54	#include "llvm/IR/DataLayout.h"
55	#include "llvm/IR/DerivedTypes.h"
56	#include "llvm/IR/Dominators.h"
57	#include "llvm/IR/Function.h"
58	#include "llvm/IR/IRBuilder.h"
59	#include "llvm/IR/InstrTypes.h"
60	#include "llvm/IR/Instruction.h"
61	#include "llvm/IR/Instructions.h"
62	#include "llvm/IR/Metadata.h"
63	#include "llvm/IR/Module.h"
64	#include "llvm/IR/NoFolder.h"
65	#include "llvm/IR/PassManager.h"
66	#include "llvm/IR/Type.h"
67	#include "llvm/IR/Use.h"
68	#include "llvm/IR/User.h"
69	#include "llvm/IR/Value.h"
70	#include "llvm/Support/Casting.h"
71	#include "llvm/Support/Debug.h"
72	#include "llvm/Support/raw_ostream.h"
73	#include "llvm/Transforms/Utils/Local.h"
74	#include "llvm/Transforms/Utils/PromoteMemToReg.h"
75	#include <algorithm>
76	#include <cassert>
77	#include <cstdint>
78	#include <utility>
79	#include <vector>
80
81	using namespace llvm;
82
83	#define DEBUG_TYPE "argpromotion"
84
85	STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted");
86	STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated");
87
88	namespace {
89
90	struct ArgPart {
91	Type *Ty;
92	Align Alignment;
93	/// A representative guaranteed-executed load or store instruction for use by
94	/// metadata transfer.
95	Instruction *MustExecInstr;
96	};
97
98	using OffsetAndArgPart = std::pair<int64_t, ArgPart>;
99
100	} // end anonymous namespace
101
102	static Value createByteGEP(IRBuilderBase &IRB, const* DataLayout &DL,
103	Value Ptr, Type ResElemTy, int64_t Offset) {
104	if (Offset != `0`) {
105	APInt APOffset(DL.getIndexTypeSizeInBits(Ty: Ptr->getType()), Offset,
106	/isSigned=/true);
107	Ptr = IRB.CreatePtrAdd(Ptr, Offset: IRB.getInt(AI: APOffset));
108	}
109	return Ptr;
110	}
111
112	/// DoPromotion - This method actually performs the promotion of the specified
113	/// arguments, and returns the new function. At this point, we know that it's
114	/// safe to do so.
115	static Function *
116	doPromotion(Function *F, FunctionAnalysisManager &FAM,
117	const DenseMap<Argument *, SmallVector<OffsetAndArgPart, `4`>>
118	&ArgsToPromote) {
119	// Start by computing a new prototype for the function, which is the same as
120	// the old function, but has modified arguments.
121	FunctionType *FTy = F->getFunctionType();
122	std::vector<Type *> Params;
123
124	// Attribute - Keep track of the parameter attributes for the arguments
125	// that we are not* promoting. For the ones that we do promote, the parameter*
126	// attributes are lost
127	SmallVector<AttributeSet, `8`> ArgAttrVec;
128	// Mapping from old to new argument indices. -1 for promoted or removed
129	// arguments.
130	SmallVector<unsigned> NewArgIndices;
131	AttributeList PAL = F->getAttributes();
132	OptimizationRemarkEmitter ORE(F);
133
134	// First, determine the new argument list
135	unsigned ArgNo = `0`, NewArgNo = `0`;
136	for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
137	++I, ++ArgNo) {
138	auto It = ArgsToPromote.find(Val: &*I);
139	if (It == ArgsToPromote.end()) {
140	// Unchanged argument
141	Params.push_back(x: I->getType());
142	ArgAttrVec.push_back(Elt: PAL.getParamAttrs(ArgNo));
143	NewArgIndices.push_back(Elt: NewArgNo++);
144	} else if (I->use_empty()) {
145	// Dead argument (which are always marked as promotable)
146	++NumArgumentsDead;
147	ORE.emit(RemarkBuilder: [&]() {
148	return OptimizationRemark (DEBUG_TYPE, "ArgumentRemoved", F)
149	<< "eliminating argument " << ore::NV ("ArgName", I->getName())
150	<< "(" << ore::NV ("ArgIndex", ArgNo) << ")";
151	});
152
153	NewArgIndices.push_back(Elt: (unsigned)-`1`);
154	} else {
155	const auto &ArgParts = It ->second;
156	for (const auto &Pair : ArgParts) {
157	Params.push_back(x: Pair.second.Ty);
158	ArgAttrVec.push_back(Elt: AttributeSet ());
159	}
160	++NumArgumentsPromoted;
161	ORE.emit(RemarkBuilder: [&]() {
162	return OptimizationRemark (DEBUG_TYPE, "ArgumentPromoted", F)
163	<< "promoting argument " << ore::NV ("ArgName", I->getName())
164	<< "(" << ore::NV ("ArgIndex", ArgNo) << ")"
165	<< " to pass by value";
166	});
167
168	NewArgIndices.push_back(Elt: (unsigned)-`1`);
169	NewArgNo += ArgParts.size();
170	}
171	}
172
173	Type *RetTy = FTy->getReturnType();
174
175	// Construct the new function type using the new arguments.
176	FunctionType *NFTy = FunctionType::get(Result: RetTy, Params, isVarArg: FTy->isVarArg());
177
178	// Create the new function body and insert it into the module.
179	Function *NF = Function::Create(Ty: NFTy, Linkage: F->getLinkage(), AddrSpace: F->getAddressSpace(),
180	N: F->getName());
181	NF->copyAttributesFrom(Src: F);
182	NF->copyMetadata(Src: F, Offset: `0`);
183
184	// The new function will have the !dbg metadata copied from the original
185	// function. The original function may not be deleted, and dbg metadata need
186	// to be unique, so we need to drop it.
187	F->setSubprogram(nullptr);
188
189	LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
190	<< "From: " << *F);
191
192	uint64_t LargestVectorWidth = `0`;
193	for (auto *I : Params)
194	if (auto *VT = dyn_cast<llvm::VectorType>(Val: I))
195	LargestVectorWidth = std::max(
196	a: LargestVectorWidth, b: VT->getPrimitiveSizeInBits().getKnownMinValue());
197
198	// Recompute the parameter attributes list based on the new arguments for
199	// the function.
200	NF->setAttributes(AttributeList::get(C&: F->getContext(), FnAttrs: PAL.getFnAttrs(),
201	RetAttrs: PAL.getRetAttrs(), ArgAttrs: ArgAttrVec));
202
203	// Remap argument indices in allocsize attribute.
204	if (auto AllocSize = NF->getAttributes().getFnAttrs().getAllocSizeArgs()) {
205	unsigned Arg1 = NewArgIndices [AllocSize ->first];
206	assert(Arg1 != (unsigned)-`1` && "allocsize cannot be promoted argument");
207	std::optional<unsigned> Arg2;
208	if (AllocSize ->second) {
209	Arg2 = NewArgIndices [*AllocSize ->second];
210	assert(Arg2 != (unsigned)-`1` && "allocsize cannot be promoted argument");
211	}
212	NF->addFnAttr(Attr: Attribute::getWithAllocSizeArgs(Context&: F->getContext(), ElemSizeArg: Arg1, NumElemsArg: Arg2));
213	}
214
215	AttributeFuncs::updateMinLegalVectorWidthAttr(Fn&: *NF, Width: LargestVectorWidth);
216	ArgAttrVec.clear();
217
218	F->getParent()->getFunctionList().insert(where: F->getIterator(), New: NF);
219	NF->takeName(V: F);
220
221	// Loop over all the callers of the function, transforming the call sites to
222	// pass in the loaded pointers.
223	SmallVector<Value *, `16`> Args;
224	const DataLayout &DL = F->getDataLayout();
225	SmallVector<WeakTrackingVH, `16`> DeadArgs;
226
227	while (!F->use_empty()) {
228	CallBase &CB = cast<CallBase>(Val&: *F->user_back());
229	assert(CB.getCalledFunction() == F);
230	const AttributeList &CallPAL = CB.getAttributes();
231	IRBuilder<NoFolder> IRB(&CB);
232
233	// Loop over the operands, inserting GEP and loads in the caller as
234	// appropriate.
235	auto *AI = CB.arg_begin();
236	ArgNo = `0`;
237	for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
238	++I, ++AI, ++ArgNo) {
239	auto ArgIt = ArgsToPromote.find(Val: &*I);
240	if (ArgIt == ArgsToPromote.end()) {
241	Args.push_back(Elt: AI); // Unmodified argument*
242	ArgAttrVec.push_back(Elt: CallPAL.getParamAttrs(ArgNo));
243	} else if (!I->use_empty()) {
244	Value V = AI;
245	for (const auto &Pair : ArgIt ->second) {
246	LoadInst *LI = IRB.CreateAlignedLoad(
247	Ty: Pair.second.Ty,
248	Ptr: createByteGEP(IRB, DL, Ptr: V, ResElemTy: Pair.second.Ty, Offset: Pair.first),
249	Align: Pair.second.Alignment, Name: V->getName() + ".val");
250	if (Pair.second.MustExecInstr) {
251	LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata());
252	LI->copyMetadata(SrcInst: *Pair.second.MustExecInstr,
253	WL: {LLVMContext::MD_dereferenceable,
254	LLVMContext::MD_dereferenceable_or_null,
255	LLVMContext::MD_noundef,
256	LLVMContext::MD_nontemporal});
257	// Only transfer poison-generating metadata if we also have
258	// !noundef.
259	// TODO: Without !noundef, we could merge this metadata across
260	// all promoted loads.
261	if (LI->hasMetadata(KindID: LLVMContext::MD_noundef))
262	LI->copyMetadata(SrcInst: *Pair.second.MustExecInstr,
263	WL: Metadata::PoisonGeneratingIDs);
264	}
265	Args.push_back(Elt: LI);
266	ArgAttrVec.push_back(Elt: AttributeSet ());
267	}
268	} else {
269	assert(I->use_empty());
270	DeadArgs.emplace_back(Args: AI->get());
271	}
272	}
273
274	// Push any varargs arguments on the list.
275	for (; AI != CB.arg_end(); ++AI, ++ArgNo) {
276	Args.push_back(Elt: *AI);
277	ArgAttrVec.push_back(Elt: CallPAL.getParamAttrs(ArgNo));
278	}
279
280	SmallVector<OperandBundleDef, `1`> OpBundles;
281	CB.getOperandBundlesAsDefs(Defs&: OpBundles);
282
283	CallBase NewCS = nullptr*;
284	if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) {
285	NewCS = InvokeInst::Create(Func: NF, IfNormal: II->getNormalDest(), IfException: II->getUnwindDest(),
286	Args, Bundles: OpBundles, NameStr: "", InsertBefore: CB.getIterator());
287	} else {
288	auto *NewCall =
289	CallInst::Create(Func: NF, Args, Bundles: OpBundles, NameStr: "", InsertBefore: CB.getIterator());
290	NewCall->setTailCallKind(cast<CallInst>(Val: &CB)->getTailCallKind());
291	NewCS = NewCall;
292	}
293	NewCS->setCallingConv(CB.getCallingConv());
294	NewCS->setAttributes(AttributeList::get(C&: F->getContext(),
295	FnAttrs: CallPAL.getFnAttrs(),
296	RetAttrs: CallPAL.getRetAttrs(), ArgAttrs: ArgAttrVec));
297	NewCS->copyMetadata(SrcInst: CB, WL: {LLVMContext::MD_prof, LLVMContext::MD_dbg});
298	Args.clear();
299	ArgAttrVec.clear();
300
301	AttributeFuncs::updateMinLegalVectorWidthAttr(Fn&: *CB.getCaller(),
302	Width: LargestVectorWidth);
303
304	if (!CB.use_empty()) {
305	CB.replaceAllUsesWith(V: NewCS);
306	NewCS->takeName(V: &CB);
307	}
308
309	// Finally, remove the old call from the program, reducing the use-count of
310	// F.
311	CB.eraseFromParent();
312	}
313
314	RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts&: DeadArgs);
315
316	// Since we have now created the new function, splice the body of the old
317	// function right into the new function, leaving the old rotting hulk of the
318	// function empty.
319	NF->splice(ToIt: NF->begin(), FromF: F);
320
321	// We will collect all the new created allocas to promote them into registers
322	// after the following loop
323	SmallVector<AllocaInst *, `4`> Allocas;
324
325	// Loop over the argument list, transferring uses of the old arguments over to
326	// the new arguments, also transferring over the names as well.
327	Function::arg_iterator I2 = NF->arg_begin();
328	for (Argument &Arg : F->args()) {
329	if (!ArgsToPromote.count(Val: &Arg)) {
330	// If this is an unmodified argument, move the name and users over to the
331	// new version.
332	Arg.replaceAllUsesWith(V: &*I2);
333	I2->takeName(V: &Arg);
334	++I2;
335	continue;
336	}
337
338	// There potentially are metadata uses for things like llvm.dbg.value.
339	// Replace them with poison, after handling the other regular uses.
340	llvm::scope_exit RauwPoisonMetadata(
341	[&]() { Arg.replaceAllUsesWith(V: PoisonValue::get(T: Arg.getType())); });
342
343	if (Arg.use_empty())
344	continue;
345
346	// Otherwise, if we promoted this argument, we have to create an alloca in
347	// the callee for every promotable part and store each of the new incoming
348	// arguments into the corresponding alloca, what lets the old code (the
349	// store instructions if they are allowed especially) a chance to work as
350	// before.
351	assert(Arg.getType()->isPointerTy() &&
352	"Only arguments with a pointer type are promotable");
353
354	IRBuilder<NoFolder> IRB(&NF->begin()->front());
355
356	// Add only the promoted elements, so parts from ArgsToPromote
357	SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca;
358	for (const auto &Pair : ArgsToPromote.find(Val: &Arg)->second) {
359	int64_t Offset = Pair.first;
360	const ArgPart &Part = Pair.second;
361
362	Argument *NewArg = I2++;
363	NewArg->setName(Arg.getName() + "." + Twine (Offset) + ".val");
364
365	AllocaInst *NewAlloca = IRB.CreateAlloca(
366	Ty: Part.Ty, ArraySize: nullptr, Name: Arg.getName() + "." + Twine (Offset) + ".allc");
367	NewAlloca->setAlignment(Pair.second.Alignment);
368	IRB.CreateAlignedStore(Val: NewArg, Ptr: NewAlloca, Align: Pair.second.Alignment);
369
370	// Collect the alloca to retarget the users to
371	OffsetToAlloca.insert(KV: {Offset, NewAlloca});
372	}
373
374	auto GetAlloca = [&](Value *Ptr) {
375	APInt Offset(DL.getIndexTypeSizeInBits(Ty: Ptr->getType()), `0`);
376	Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
377	/ AllowNonInbounds / true);
378	assert(Ptr == &Arg && "Not constant offset from arg?");
379	return OffsetToAlloca.lookup(Val: Offset.getSExtValue());
380	};
381
382	// Cleanup the code from the dead instructions: GEPs and BitCasts in between
383	// the original argument and its users: loads and stores. Retarget every
384	// user to the new created alloca.
385	SmallVector<Value *, `16`> Worklist(Arg.users());
386	SmallVector<Instruction *, `16`> DeadInsts;
387	while (!Worklist.empty()) {
388	Value *V = Worklist.pop_back_val();
389	if (isa<GetElementPtrInst>(Val: V)) {
390	DeadInsts.push_back(Elt: cast<Instruction>(Val: V));
391	append_range(C&: Worklist, R: V->users());
392	continue;
393	}
394
395	if (auto *LI = dyn_cast<LoadInst>(Val: V)) {
396	Value *Ptr = LI->getPointerOperand();
397	LI->setOperand(i_nocapture: LoadInst::getPointerOperandIndex(), Val_nocapture: GetAlloca (Ptr));
398	continue;
399	}
400
401	if (auto *SI = dyn_cast<StoreInst>(Val: V)) {
402	assert(!SI->isVolatile() && "Volatile operations can't be promoted.");
403	Value *Ptr = SI->getPointerOperand();
404	SI->setOperand(i_nocapture: StoreInst::getPointerOperandIndex(), Val_nocapture: GetAlloca (Ptr));
405	continue;
406	}
407
408	llvm_unreachable("Unexpected user");
409	}
410
411	for (Instruction *I : DeadInsts) {
412	I->replaceAllUsesWith(V: PoisonValue::get(T: I->getType()));
413	I->eraseFromParent();
414	}
415
416	// Collect the allocas for promotion
417	for (const auto &Pair : OffsetToAlloca) {
418	assert(isAllocaPromotable(Pair.second) &&
419	"By design, only promotable allocas should be produced.");
420	Allocas.push_back(Elt: Pair.second);
421	}
422	}
423
424	LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size()
425	<< " alloca(s) are promotable by Mem2Reg\n");
426
427	if (!Allocas.empty()) {
428	// And we are able to call the `promoteMemoryToRegister()` function.
429	// Our earlier checks have ensured that PromoteMemToReg() will
430	// succeed.
431	auto &DT = FAM.getResult<DominatorTreeAnalysis>(IR&: *NF);
432	auto &AC = FAM.getResult<AssumptionAnalysis>(IR&: *NF);
433	PromoteMemToReg(Allocas, DT, AC: &AC);
434	}
435
436	// If argument(s) are dead (hence removed) or promoted, the function probably
437	// does not follow the standard calling convention anymore. Add DW_CC_nocall
438	// to DISubroutineType to inform debugger that it may not be safe to call this
439	// function.
440	DISubprogram *SP = NF->getSubprogram();
441	if (SP) {
442	auto Temp = SP->getType()->cloneWithCC(CC: llvm::dwarf::DW_CC_nocall);
443	SP->replaceType(Ty: MDNode::replaceWithPermanent(N: std::move(Temp)));
444	}
445
446	return NF;
447	}
448
449	/// Return true if we can prove that all callees pass in a valid pointer for the
450	/// specified function argument.
451	static bool allCallersPassValidPointerForArgument(
452	Argument Arg, SmallPtrSetImpl<CallBase > &RecursiveCalls,
453	Align NeededAlign, uint64_t NeededDerefBytes) {
454	Function *Callee = Arg->getParent();
455	const DataLayout &DL = Callee->getDataLayout();
456	APInt Bytes(`64`, NeededDerefBytes);
457
458	// Check if the argument itself is marked dereferenceable and aligned.
459	if (isDereferenceableAndAlignedPointer(V: Arg, Alignment: NeededAlign, Size: Bytes, DL))
460	return true;
461
462	// Look at all call sites of the function. At this point we know we only have
463	// direct callees.
464	return all_of(Range: Callee->users(), P: [&](User *U) {
465	CallBase &CB = cast<CallBase>(Val&: *U);
466	// In case of functions with recursive calls, this check
467	// (isDereferenceableAndAlignedPointer) will fail when it tries to look at
468	// the first caller of this function. The caller may or may not have a load,
469	// incase it doesn't load the pointer being passed, this check will fail.
470	// So, it's safe to skip the check incase we know that we are dealing with a
471	// recursive call. For example we have a IR given below.
472	//
473	// def fun(ptr %a) {
474	// ...
475	// %loadres = load i32, ptr %a, align 4
476	// %res = call i32 @fun(ptr %a)
477	// ...
478	// }
479	//
480	// def bar(ptr %x) {
481	// ...
482	// %resbar = call i32 @fun(ptr %x)
483	// ...
484	// }
485	//
486	// Since we record processed recursive calls, we check if the current
487	// CallBase has been processed before. If yes it means that it is a
488	// recursive call and we can skip the check just for this call. So, just
489	// return true.
490	if (RecursiveCalls.contains(Ptr: &CB))
491	return true;
492
493	return isDereferenceableAndAlignedPointer(V: CB.getArgOperand(i: Arg->getArgNo()),
494	Alignment: NeededAlign, Size: Bytes, DL);
495	});
496	}
497
498	// Try to prove that all Calls to F do not modify the memory pointed to by Arg,
499	// using alias analysis local to each caller of F.
500	static bool isArgUnmodifiedByAllCalls(Argument *Arg,
501	FunctionAnalysisManager &FAM) {
502	for (User *U : Arg->getParent()->users()) {
503
504	auto *Call = cast<CallBase>(Val: U);
505
506	MemoryLocation Loc =
507	MemoryLocation::getForArgument(Call, ArgIdx: Arg->getArgNo(), TLI: nullptr);
508
509	AAResults &AAR = FAM.getResult<AAManager>(IR&: *Call->getFunction());
510	// Bail as soon as we find a Call where Arg may be modified.
511	if (isModSet(MRI: AAR.getModRefInfo(I: Call, OptLoc: Loc)))
512	return false;
513	}
514
515	// All Users are Calls which do not modify the Arg.
516	return true;
517	}
518
519	/// Determine that this argument is safe to promote, and find the argument
520	/// parts it can be promoted into.
521	static bool findArgParts(Argument Arg, const* DataLayout &DL, AAResults &AAR,
522	unsigned MaxElements, bool IsRecursive,
523	SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec,
524	FunctionAnalysisManager &FAM) {
525	// Quick exit for unused arguments
526	if (Arg->use_empty())
527	return true;
528
529	// We can only promote this argument if all the uses are loads at known
530	// offsets.
531	//
532	// Promoting the argument causes it to be loaded in the caller
533	// unconditionally. This is only safe if we can prove that either the load
534	// would have happened in the callee anyway (ie, there is a load in the entry
535	// block) or the pointer passed in at every call site is guaranteed to be
536	// valid.
537	// In the former case, invalid loads can happen, but would have happened
538	// anyway, in the latter case, invalid loads won't happen. This prevents us
539	// from introducing an invalid load that wouldn't have happened in the
540	// original code.
541
542	SmallDenseMap<int64_t, ArgPart, `4`> ArgParts;
543	Align NeededAlign(`1`);
544	uint64_t NeededDerefBytes = `0`;
545
546	// And if this is a byval argument we also allow to have store instructions.
547	// Only handle in such way arguments with specified alignment;
548	// if it's unspecified, the actual alignment of the argument is
549	// target-specific.
550	bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign();
551
552	// An end user of a pointer argument is a load or store instruction.
553	// Returns std::nullopt if this load or store is not based on the argument.
554	// Return true if we can promote the instruction, false otherwise.
555	auto HandleEndUser = [&](auto I, Type Ty,
556	bool GuaranteedToExecute) -> std::optional<bool> {
557	// Don't promote volatile or atomic instructions.
558	if (!I->isSimple())
559	return false;
560
561	Value *Ptr = I->getPointerOperand();
562	APInt Offset(DL.getIndexTypeSizeInBits(Ty: Ptr->getType()), `0`);
563	Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
564	/ AllowNonInbounds / true);
565	if (Ptr != Arg)
566	return std::nullopt;
567
568	if (Offset.getSignificantBits() >= `64`)
569	return false;
570
571	TypeSize Size = DL.getTypeStoreSize(Ty);
572	// Don't try to promote scalable types.
573	if (Size.isScalable())
574	return false;
575
576	// If this is a recursive function and one of the types is a pointer,
577	// then promoting it might lead to recursive promotion.
578	if (IsRecursive && Ty->isPointerTy())
579	return false;
580
581	int64_t Off = Offset.getSExtValue();
582	auto Pair = ArgParts.try_emplace(
583	Key: Off, Args: ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr});
584	ArgPart &Part = Pair.first ->second;
585	bool OffsetNotSeenBefore = Pair.second;
586
587	// We limit promotion to only promoting up to a fixed number of elements of
588	// the aggregate.
589	if (MaxElements > `0` && ArgParts.size() > MaxElements) {
590	LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
591	<< "more than " << MaxElements << " parts\n");
592	return false;
593	}
594
595	// For now, we only support loading/storing one specific type at a given
596	// offset.
597	if (Part.Ty != Ty) {
598	LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
599	<< "accessed as both " << Part.Ty << " and " << Ty
600	<< " at offset " << Off << "\n");
601	return false;
602	}
603
604	// If this instruction is not guaranteed to execute, and we haven't seen a
605	// load or store at this offset before (or it had lower alignment), then we
606	// need to remember that requirement.
607	// Note that skipping instructions of previously seen offsets is only
608	// correct because we only allow a single type for a given offset, which
609	// also means that the number of accessed bytes will be the same.
610	if (!GuaranteedToExecute &&
611	(OffsetNotSeenBefore \|\| Part.Alignment < I->getAlign())) {
612	// We won't be able to prove dereferenceability for negative offsets.
613	if (Off < `0`)
614	return false;
615
616	// If the offset is not aligned, an aligned base pointer won't help.
617	if (!isAligned(I->getAlign(), Off))
618	return false;
619
620	NeededDerefBytes = std::max(a: NeededDerefBytes, b: Off + Size.getFixedValue());
621	NeededAlign = std::max(NeededAlign, I->getAlign());
622	}
623
624	Part.Alignment = std::max(Part.Alignment, I->getAlign());
625	return true;
626	};
627
628	// Look for loads and stores that are guaranteed to execute on entry.
629	for (Instruction &I : Arg->getParent()->getEntryBlock()) {
630	std::optional<bool> Res{};
631	if (LoadInst *LI = dyn_cast<LoadInst>(Val: &I))
632	Res = HandleEndUser (LI, LI->getType(), / GuaranteedToExecute / true);
633	else if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I))
634	Res = HandleEndUser (SI, SI->getValueOperand()->getType(),
635	/ GuaranteedToExecute / true);
636	if (Res && !*Res)
637	return false;
638
639	if (!isGuaranteedToTransferExecutionToSuccessor(I: &I))
640	break;
641	}
642
643	// Now look at all loads of the argument. Remember the load instructions
644	// for the aliasing check below.
645	SmallVector<const Use *, `16`> Worklist;
646	SmallPtrSet<const Use *, `16`> Visited;
647	SmallVector<LoadInst *, `16`> Loads;
648	SmallPtrSet<CallBase *, `4`> RecursiveCalls;
649	auto AppendUses = [&](const Value *V) {
650	for (const Use &U : V->uses())
651	if (Visited.insert(Ptr: &U).second)
652	Worklist.push_back(Elt: &U);
653	};
654	AppendUses (Arg);
655	while (!Worklist.empty()) {
656	const Use *U = Worklist.pop_back_val();
657	Value *V = U->getUser();
658
659	if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: V)) {
660	if (!GEP->hasAllConstantIndices())
661	return false;
662	AppendUses (V);
663	continue;
664	}
665
666	if (auto *LI = dyn_cast<LoadInst>(Val: V)) {
667	if (!HandleEndUser (LI, LI->getType(), /* GuaranteedToExecute / false))
668	return false;
669	Loads.push_back(Elt: LI);
670	continue;
671	}
672
673	// Stores are allowed for byval arguments
674	auto *SI = dyn_cast<StoreInst>(Val: V);
675	if (AreStoresAllowed && SI &&
676	U->getOperandNo() == StoreInst::getPointerOperandIndex()) {
677	if (!*HandleEndUser (SI, SI->getValueOperand()->getType(),
678	/ GuaranteedToExecute / false))
679	return false;
680	continue;
681	// Only stores TO the argument is allowed, all the other stores are
682	// unknown users
683	}
684
685	auto *CB = dyn_cast<CallBase>(Val: V);
686	Value *PtrArg = U->get();
687	if (CB && CB->getCalledFunction() == CB->getFunction()) {
688	if (PtrArg != Arg) {
689	LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
690	<< "pointer offset is not equal to zero\n");
691	return false;
692	}
693
694	unsigned int ArgNo = Arg->getArgNo();
695	if (U->getOperandNo() != ArgNo) {
696	LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
697	<< "arg position is different in callee\n");
698	return false;
699	}
700
701	// We limit promotion to only promoting up to a fixed number of elements
702	// of the aggregate.
703	if (MaxElements > `0` && ArgParts.size() > MaxElements) {
704	LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
705	<< "more than " << MaxElements << " parts\n");
706	return false;
707	}
708
709	RecursiveCalls.insert(Ptr: CB);
710	continue;
711	}
712	// Unknown user.
713	LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
714	<< "unknown user " << *V << "\n");
715	return false;
716	}
717
718	if (NeededDerefBytes \|\| NeededAlign > `1`) {
719	// Try to prove a required deref / aligned requirement.
720	if (!allCallersPassValidPointerForArgument(Arg, RecursiveCalls, NeededAlign,
721	NeededDerefBytes)) {
722	LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
723	<< "not dereferenceable or aligned\n");
724	return false;
725	}
726	}
727
728	if (ArgParts.empty())
729	return true; // No users, this is a dead argument.
730
731	// Sort parts by offset.
732	append_range(C&: ArgPartsVec, R&: ArgParts);
733	sort(C&: ArgPartsVec, Comp: llvm::less_first ());
734
735	// Make sure the parts are non-overlapping.
736	int64_t Offset = ArgPartsVec [`0`].first;
737	for (const auto &Pair : ArgPartsVec) {
738	if (Pair.first < Offset)
739	return false; // Overlap with previous part.
740
741	Offset = Pair.first + DL.getTypeStoreSize(Ty: Pair.second.Ty);
742	}
743
744	// If store instructions are allowed, the path from the entry of the function
745	// to each load may be not free of instructions that potentially invalidate
746	// the load, and this is an admissible situation.
747	if (AreStoresAllowed)
748	return true;
749
750	// Okay, now we know that the argument is only used by load instructions, and
751	// it is safe to unconditionally perform all of them.
752
753	// If we can determine that no call to the Function modifies the memory region
754	// accessed through Arg, through alias analysis using actual arguments in the
755	// callers, we know that it is guaranteed to be safe to promote the argument.
756	if (isArgUnmodifiedByAllCalls(Arg, FAM))
757	return true;
758
759	// Otherwise, use alias analysis to check if the pointer is guaranteed to not
760	// be modified from entry of the function to each of the load instructions.
761	for (LoadInst *Load : Loads) {
762	// Check to see if the load is invalidated from the start of the block to
763	// the load itself.
764	BasicBlock *BB = Load->getParent();
765
766	MemoryLocation Loc = MemoryLocation::get(LI: Load);
767	if (AAR.canInstructionRangeModRef(I1: BB->front(), I2: *Load, Loc, Mode: ModRefInfo::Mod))
768	return false; // Pointer is invalidated!
769
770	// Now check every path from the entry block to the load for transparency.
771	// To do this, we perform a depth first search on the inverse CFG from the
772	// loading block.
773	for (BasicBlock *P : predecessors(BB)) {
774	for (BasicBlock *TranspBB : inverse_depth_first(G: P))
775	if (AAR.canBasicBlockModify(BB: *TranspBB, Loc))
776	return false;
777	}
778	}
779
780	// If the path from the entry of the function to each load is free of
781	// instructions that potentially invalidate the load, we can make the
782	// transformation!
783	return true;
784	}
785
786	/// Check if callers and callee agree on how promoted arguments would be
787	/// passed.
788	static bool areTypesABICompatible(ArrayRef<Type > Types, const* Function &F,
789	const TargetTransformInfo &TTI) {
790	return all_of(Range: F.uses(), P: [&](const Use &U) {
791	CallBase *CB = dyn_cast<CallBase>(Val: U.getUser());
792	if (!CB)
793	return false;
794
795	const Function *Caller = CB->getCaller();
796	const Function *Callee = CB->getCalledFunction();
797	return TTI.areTypesABICompatible(Caller, Callee, Types);
798	});
799	}
800
801	/// PromoteArguments - This method checks the specified function to see if there
802	/// are any promotable arguments and if it is safe to promote the function (for
803	/// example, all callers are direct). If safe to promote some arguments, it
804	/// calls the DoPromotion method.
805	static Function promoteArguments(Function F, FunctionAnalysisManager &FAM,
806	unsigned MaxElements, bool IsRecursive) {
807	// Don't perform argument promotion for naked functions; otherwise we can end
808	// up removing parameters that are seemingly 'not used' as they are referred
809	// to in the assembly.
810	if (F->hasFnAttribute(Kind: Attribute::Naked))
811	return nullptr;
812
813	// Make sure that it is local to this module.
814	if (!F->hasLocalLinkage())
815	return nullptr;
816
817	// Don't promote arguments for variadic functions. Adding, removing, or
818	// changing non-pack parameters can change the classification of pack
819	// parameters. Frontends encode that classification at the call site in the
820	// IR, while in the callee the classification is determined dynamically based
821	// on the number of registers consumed so far.
822	if (F->isVarArg())
823	return nullptr;
824
825	// Don't transform functions that receive inallocas, as the transformation may
826	// not be safe depending on calling convention.
827	if (F->getAttributes().hasAttrSomewhere(Kind: Attribute::InAlloca))
828	return nullptr;
829
830	// First check: see if there are any pointer arguments! If not, quick exit.
831	SmallVector<Argument *, `16`> PointerArgs;
832	for (Argument &I : F->args())
833	if (I.getType()->isPointerTy())
834	PointerArgs.push_back(Elt: &I);
835	if (PointerArgs.empty())
836	return nullptr;
837
838	// Second check: make sure that all callers are direct callers. We can't
839	// transform functions that have indirect callers. Also see if the function
840	// is self-recursive.
841	for (Use &U : F->uses()) {
842	CallBase *CB = dyn_cast<CallBase>(Val: U.getUser());
843	// Must be a direct call.
844	if (CB == nullptr \|\| !CB->isCallee(U: &U) \|\|
845	CB->getFunctionType() != F->getFunctionType())
846	return nullptr;
847
848	// Can't change signature of musttail callee
849	if (CB->isMustTailCall())
850	return nullptr;
851
852	if (CB->getFunction() == F)
853	IsRecursive = true;
854	}
855
856	// Can't change signature of musttail caller
857	// FIXME: Support promoting whole chain of musttail functions
858	for (BasicBlock &BB : *F)
859	if (BB.getTerminatingMustTailCall())
860	return nullptr;
861
862	const DataLayout &DL = F->getDataLayout();
863	auto &AAR = FAM.getResult<AAManager>(IR&: *F);
864	const auto &TTI = FAM.getResult<TargetIRAnalysis>(IR&: *F);
865
866	// Check to see which arguments are promotable. If an argument is promotable,
867	// add it to ArgsToPromote.
868	DenseMap<Argument *, SmallVector<OffsetAndArgPart, `4`>> ArgsToPromote;
869	unsigned NumArgsAfterPromote = F->getFunctionType()->getNumParams();
870	for (Argument *PtrArg : PointerArgs) {
871	// Replace sret attribute with noalias. This reduces register pressure by
872	// avoiding a register copy.
873	if (PtrArg->hasStructRetAttr()) {
874	unsigned ArgNo = PtrArg->getArgNo();
875	F->removeParamAttr(ArgNo, Kind: Attribute::StructRet);
876	F->addParamAttr(ArgNo, Kind: Attribute::NoAlias);
877	for (Use &U : F->uses()) {
878	CallBase &CB = cast<CallBase>(Val&: *U.getUser());
879	CB.removeParamAttr(ArgNo, Kind: Attribute::StructRet);
880	CB.addParamAttr(ArgNo, Kind: Attribute::NoAlias);
881	}
882	}
883
884	// If we can promote the pointer to its value.
885	SmallVector<OffsetAndArgPart, `4`> ArgParts;
886
887	if (findArgParts(Arg: PtrArg, DL, AAR, MaxElements, IsRecursive, ArgPartsVec&: ArgParts,
888	FAM)) {
889	SmallVector<Type *, `4`> Types;
890	for (const auto &Pair : ArgParts)
891	Types.push_back(Elt: Pair.second.Ty);
892
893	if (areTypesABICompatible(Types, F: *F, TTI)) {
894	NumArgsAfterPromote += ArgParts.size() - `1`;
895	ArgsToPromote.insert(KV: {PtrArg, std::move(ArgParts)});
896	}
897	}
898	}
899
900	// No promotable pointer arguments.
901	if (ArgsToPromote.empty())
902	return nullptr;
903
904	if (NumArgsAfterPromote > TTI.getMaxNumArgs())
905	return nullptr;
906
907	return doPromotion(F, FAM, ArgsToPromote);
908	}
909
910	PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
911	CGSCCAnalysisManager &AM,
912	LazyCallGraph &CG,
913	CGSCCUpdateResult &UR) {
914	bool Changed = false, LocalChange;
915
916	// Iterate until we stop promoting from this SCC.
917	do {
918	LocalChange = false;
919
920	FunctionAnalysisManager &FAM =
921	AM.getResult<FunctionAnalysisManagerCGSCCProxy>(IR&: C, ExtraArgs&: CG).getManager();
922
923	bool IsRecursive = C.size() > `1`;
924	for (LazyCallGraph::Node &N : C) {
925	Function &OldF = N.getFunction();
926	Function *NewF = promoteArguments(F: &OldF, FAM, MaxElements, IsRecursive);
927	if (!NewF)
928	continue;
929	LocalChange = true;
930
931	// Directly substitute the functions in the call graph. Note that this
932	// requires the old function to be completely dead and completely
933	// replaced by the new function. It does no call graph updates, it merely
934	// swaps out the particular function mapped to a particular node in the
935	// graph.
936	C.getOuterRefSCC().replaceNodeFunction(N, NewF&: *NewF);
937	FAM.clear(IR&: OldF, Name: OldF.getName());
938	OldF.eraseFromParent();
939
940	PreservedAnalyses FuncPA;
941	FuncPA.preserveSet<CFGAnalyses>();
942	for (auto *U : NewF->users()) {
943	auto *UserF = cast<CallBase>(Val: U)->getFunction();
944	FAM.invalidate(IR&: *UserF, PA: FuncPA);
945	}
946	}
947
948	Changed \|= LocalChange;
949	} while (LocalChange);
950
951	if (!Changed)
952	return PreservedAnalyses::all();
953
954	PreservedAnalyses PA;
955	// We've cleared out analyses for deleted functions.
956	PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
957	// We've manually invalidated analyses for functions we've modified.
958	PA.preserveSet<AllAnalysesOn<Function>>();
959	return PA;
960	}
961

Browse the source code of llvm_projects/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp