1//===-- ExpandVariadicsPass.cpp --------------------------------*- C++ -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is an optimization pass for variadic functions. If called from codegen,
10// it can serve as the implementation of variadic functions for a given target.
11//
12// The strategy is to turn the ... part of a variadic function into a va_list
13// and fix up the call sites. The majority of the pass is target independent.
14// The exceptions are the va_list type itself and the rules for where to store
15// variables in memory such that va_arg can iterate over them given a va_list.
16//
17// The majority of the plumbing is splitting the variadic function into a
18// single basic block that packs the variadic arguments into a va_list and
19// a second function that does the work of the original. That packing is
20// exactly what is done by va_start. Further, the transform from ... to va_list
21// replaced va_start with an operation to copy a va_list from the new argument,
22// which is exactly a va_copy. This is useful for reducing target-dependence.
23//
24// A va_list instance is a forward iterator, where the primary operation va_arg
25// is dereference-then-increment. This interface forces significant convergent
26// evolution between target specific implementations. The variation in runtime
27// data layout is limited to that representable by the iterator, parameterised
28// by the type passed to the va_arg instruction.
29//
30// Therefore the majority of the target specific subtlety is packing arguments
31// into a stack allocated buffer such that a va_list can be initialised with it
32// and the va_arg expansion for the target will find the arguments at runtime.
33//
34// The aggregate effect is to unblock other transforms, most critically the
35// general purpose inliner. Known calls to variadic functions become zero cost.
36//
37// Consistency with clang is primarily tested by emitting va_arg using clang
38// then expanding the variadic functions using this pass, followed by trying
39// to constant fold the functions to no-ops.
40//
41// Target specific behaviour is tested in IR - mainly checking that values are
42// put into positions in call frames that make sense for that particular target.
43//
44// There is one "clever" invariant in use. va_start intrinsics that are not
45// within a varidic functions are an error in the IR verifier. When this
46// transform moves blocks from a variadic function into a fixed arity one, it
47// moves va_start intrinsics along with everything else. That means that the
48// va_start intrinsics that need to be rewritten to use the trailing argument
49// are exactly those that are in non-variadic functions so no further state
50// is needed to distinguish those that need to be rewritten.
51//
52//===----------------------------------------------------------------------===//
53
54#include "llvm/Transforms/IPO/ExpandVariadics.h"
55#include "llvm/ADT/SmallVector.h"
56#include "llvm/IR/Constants.h"
57#include "llvm/IR/IRBuilder.h"
58#include "llvm/IR/IntrinsicInst.h"
59#include "llvm/IR/Module.h"
60#include "llvm/IR/PassManager.h"
61#include "llvm/InitializePasses.h"
62#include "llvm/Pass.h"
63#include "llvm/Support/CommandLine.h"
64#include "llvm/TargetParser/Triple.h"
65#include "llvm/Transforms/Utils/ModuleUtils.h"
66
67#define DEBUG_TYPE "expand-variadics"
68
69using namespace llvm;
70
71namespace {
72
73cl::opt<ExpandVariadicsMode> ExpandVariadicsModeOption(
74 DEBUG_TYPE "-override", cl::desc("Override the behaviour of " DEBUG_TYPE),
75 cl::init(Val: ExpandVariadicsMode::Unspecified),
76 cl::values(clEnumValN(ExpandVariadicsMode::Unspecified, "unspecified",
77 "Use the implementation defaults"),
78 clEnumValN(ExpandVariadicsMode::Disable, "disable",
79 "Disable the pass entirely"),
80 clEnumValN(ExpandVariadicsMode::Optimize, "optimize",
81 "Optimise without changing ABI"),
82 clEnumValN(ExpandVariadicsMode::Lowering, "lowering",
83 "Change variadic calling convention")));
84
85bool commandLineOverride() {
86 return ExpandVariadicsModeOption != ExpandVariadicsMode::Unspecified;
87}
88
89// Instances of this class encapsulate the target-dependant behaviour as a
90// function of triple. Implementing a new ABI is adding a case to the switch
91// in create(llvm::Triple) at the end of this file.
92// This class may end up instantiated in TargetMachine instances, keeping it
93// here for now until enough targets are implemented for the API to evolve.
94class VariadicABIInfo {
95protected:
96 VariadicABIInfo() = default;
97
98public:
99 static std::unique_ptr<VariadicABIInfo> create(const Triple &T);
100
101 // Allow overriding whether the pass runs on a per-target basis
102 virtual bool enableForTarget() = 0;
103
104 // Whether a valist instance is passed by value or by address
105 // I.e. does it need to be alloca'ed and stored into, or can
106 // it be passed directly in a SSA register
107 virtual bool vaListPassedInSSARegister() = 0;
108
109 // The type of a va_list iterator object
110 virtual Type *vaListType(LLVMContext &Ctx) = 0;
111
112 // The type of a va_list as a function argument as lowered by C
113 virtual Type *vaListParameterType(Module &M) = 0;
114
115 // Initialize an allocated va_list object to point to an already
116 // initialized contiguous memory region.
117 // Return the value to pass as the va_list argument
118 virtual Value *initializeVaList(Module &M, LLVMContext &Ctx,
119 IRBuilder<> &Builder, AllocaInst *VaList,
120 Value *Buffer) = 0;
121
122 struct VAArgSlotInfo {
123 Align DataAlign; // With respect to the call frame
124 bool Indirect; // Passed via a pointer
125 };
126 virtual VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) = 0;
127
128 // Targets implemented so far all have the same trivial lowering for these
129 bool vaEndIsNop() { return true; }
130 bool vaCopyIsMemcpy() { return true; }
131
132 virtual ~VariadicABIInfo() = default;
133};
134
135class ExpandVariadics : public ModulePass {
136
137 // The pass construction sets the default to optimize when called from middle
138 // end and lowering when called from the backend. The command line variable
139 // overrides that. This is useful for testing and debugging. It also allows
140 // building an applications with variadic functions wholly removed if one
141 // has sufficient control over the dependencies, e.g. a statically linked
142 // clang that has no variadic function calls remaining in the binary.
143
144public:
145 static char ID;
146 const ExpandVariadicsMode Mode;
147 std::unique_ptr<VariadicABIInfo> ABI;
148
149 ExpandVariadics(ExpandVariadicsMode Mode)
150 : ModulePass(ID),
151 Mode(commandLineOverride() ? ExpandVariadicsModeOption : Mode) {}
152
153 StringRef getPassName() const override { return "Expand variadic functions"; }
154
155 bool rewriteABI() { return Mode == ExpandVariadicsMode::Lowering; }
156
157 bool runOnModule(Module &M) override;
158
159 bool runOnFunction(Module &M, IRBuilder<> &Builder, Function *F);
160
161 Function *replaceAllUsesWithNewDeclaration(Module &M,
162 Function *OriginalFunction);
163
164 Function *deriveFixedArityReplacement(Module &M, IRBuilder<> &Builder,
165 Function *OriginalFunction);
166
167 Function *defineVariadicWrapper(Module &M, IRBuilder<> &Builder,
168 Function *VariadicWrapper,
169 Function *FixedArityReplacement);
170
171 bool expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB, FunctionType *,
172 Function *NF);
173
174 // The intrinsic functions va_copy and va_end are removed unconditionally.
175 // They correspond to a memcpy and a no-op on all implemented targets.
176 // The va_start intrinsic is removed from basic blocks that were not created
177 // by this pass, some may remain if needed to maintain the external ABI.
178
179 template <Intrinsic::ID ID, typename InstructionType>
180 bool expandIntrinsicUsers(Module &M, IRBuilder<> &Builder,
181 PointerType *IntrinsicArgType) {
182 bool Changed = false;
183 const DataLayout &DL = M.getDataLayout();
184 if (Function *Intrinsic =
185 Intrinsic::getDeclarationIfExists(M: &M, id: ID, Tys: {IntrinsicArgType})) {
186 for (User *U : make_early_inc_range(Range: Intrinsic->users()))
187 if (auto *I = dyn_cast<InstructionType>(U))
188 Changed |= expandVAIntrinsicCall(Builder, DL, I);
189
190 if (Intrinsic->use_empty())
191 Intrinsic->eraseFromParent();
192 }
193 return Changed;
194 }
195
196 bool expandVAIntrinsicUsersWithAddrspace(Module &M, IRBuilder<> &Builder,
197 unsigned Addrspace) {
198 auto &Ctx = M.getContext();
199 PointerType *IntrinsicArgType = PointerType::get(C&: Ctx, AddressSpace: Addrspace);
200 bool Changed = false;
201
202 // expand vastart before vacopy as vastart may introduce a vacopy
203 Changed |= expandIntrinsicUsers<Intrinsic::vastart, VAStartInst>(
204 M, Builder, IntrinsicArgType);
205 Changed |= expandIntrinsicUsers<Intrinsic::vaend, VAEndInst>(
206 M, Builder, IntrinsicArgType);
207 Changed |= expandIntrinsicUsers<Intrinsic::vacopy, VACopyInst>(
208 M, Builder, IntrinsicArgType);
209 return Changed;
210 }
211
212 bool expandVAIntrinsicCall(IRBuilder<> &Builder, const DataLayout &DL,
213 VAStartInst *Inst);
214
215 bool expandVAIntrinsicCall(IRBuilder<> &, const DataLayout &,
216 VAEndInst *Inst);
217
218 bool expandVAIntrinsicCall(IRBuilder<> &Builder, const DataLayout &DL,
219 VACopyInst *Inst);
220
221 FunctionType *inlinableVariadicFunctionType(Module &M, FunctionType *FTy) {
222 // The type of "FTy" with the ... removed and a va_list appended
223 SmallVector<Type *> ArgTypes(FTy->params());
224 ArgTypes.push_back(Elt: ABI->vaListParameterType(M));
225 return FunctionType::get(Result: FTy->getReturnType(), Params: ArgTypes,
226 /*IsVarArgs=*/isVarArg: false);
227 }
228
229 static ConstantInt *sizeOfAlloca(LLVMContext &Ctx, const DataLayout &DL,
230 AllocaInst *Alloced) {
231 std::optional<TypeSize> AllocaTypeSize = Alloced->getAllocationSize(DL);
232 uint64_t AsInt = AllocaTypeSize ? AllocaTypeSize->getFixedValue() : 0;
233 return ConstantInt::get(Ty: Type::getInt64Ty(C&: Ctx), V: AsInt);
234 }
235
236 bool expansionApplicableToFunction(Module &M, Function *F) {
237 if (F->isIntrinsic() || !F->isVarArg() ||
238 F->hasFnAttribute(Kind: Attribute::Naked))
239 return false;
240
241 if (F->getCallingConv() != CallingConv::C)
242 return false;
243
244 if (rewriteABI())
245 return true;
246
247 if (!F->hasExactDefinition())
248 return false;
249
250 return true;
251 }
252
253 bool expansionApplicableToFunctionCall(CallBase *CB) {
254 if (CallInst *CI = dyn_cast<CallInst>(Val: CB)) {
255 if (CI->isMustTailCall()) {
256 // Cannot expand musttail calls
257 return false;
258 }
259
260 if (CI->getCallingConv() != CallingConv::C)
261 return false;
262
263 return true;
264 }
265
266 if (isa<InvokeInst>(Val: CB)) {
267 // Invoke not implemented in initial implementation of pass
268 return false;
269 }
270
271 // Other unimplemented derivative of CallBase
272 return false;
273 }
274
275 class ExpandedCallFrame {
276 // Helper for constructing an alloca instance containing the arguments bound
277 // to the variadic ... parameter, rearranged to allow indexing through a
278 // va_list iterator
279 enum { N = 4 };
280 SmallVector<Type *, N> FieldTypes;
281 enum Tag { Store, Memcpy, Padding };
282 SmallVector<std::tuple<Value *, uint64_t, Tag>, N> Source;
283
284 template <Tag tag> void append(Type *FieldType, Value *V, uint64_t Bytes) {
285 FieldTypes.push_back(Elt: FieldType);
286 Source.push_back(Elt: {V, Bytes, tag});
287 }
288
289 public:
290 void store(LLVMContext &Ctx, Type *T, Value *V) { append<Store>(FieldType: T, V, Bytes: 0); }
291
292 void memcpy(LLVMContext &Ctx, Type *T, Value *V, uint64_t Bytes) {
293 append<Memcpy>(FieldType: T, V, Bytes);
294 }
295
296 void padding(LLVMContext &Ctx, uint64_t By) {
297 append<Padding>(FieldType: ArrayType::get(ElementType: Type::getInt8Ty(C&: Ctx), NumElements: By), V: nullptr, Bytes: 0);
298 }
299
300 size_t size() const { return FieldTypes.size(); }
301 bool empty() const { return FieldTypes.empty(); }
302
303 StructType *asStruct(LLVMContext &Ctx, StringRef Name) {
304 const bool IsPacked = true;
305 return StructType::create(Context&: Ctx, Elements: FieldTypes,
306 Name: (Twine(Name) + ".vararg").str(), isPacked: IsPacked);
307 }
308
309 void initializeStructAlloca(const DataLayout &DL, IRBuilder<> &Builder,
310 AllocaInst *Alloced) {
311
312 StructType *VarargsTy = cast<StructType>(Val: Alloced->getAllocatedType());
313
314 for (size_t I = 0; I < size(); I++) {
315
316 auto [V, bytes, tag] = Source[I];
317
318 if (tag == Padding) {
319 assert(V == nullptr);
320 continue;
321 }
322
323 auto Dst = Builder.CreateStructGEP(Ty: VarargsTy, Ptr: Alloced, Idx: I);
324
325 assert(V != nullptr);
326
327 if (tag == Store)
328 Builder.CreateStore(Val: V, Ptr: Dst);
329
330 if (tag == Memcpy)
331 Builder.CreateMemCpy(Dst, DstAlign: {}, Src: V, SrcAlign: {}, Size: bytes);
332 }
333 }
334 };
335};
336
337bool ExpandVariadics::runOnModule(Module &M) {
338 bool Changed = false;
339 if (Mode == ExpandVariadicsMode::Disable)
340 return Changed;
341
342 Triple TT(M.getTargetTriple());
343 ABI = VariadicABIInfo::create(T: TT);
344 if (!ABI)
345 return Changed;
346
347 if (!ABI->enableForTarget())
348 return Changed;
349
350 auto &Ctx = M.getContext();
351 const DataLayout &DL = M.getDataLayout();
352 IRBuilder<> Builder(Ctx);
353
354 // Lowering needs to run on all functions exactly once.
355 // Optimize could run on functions containing va_start exactly once.
356 for (Function &F : make_early_inc_range(Range&: M))
357 Changed |= runOnFunction(M, Builder, F: &F);
358
359 // After runOnFunction, all known calls to known variadic functions have been
360 // replaced. va_start intrinsics are presently (and invalidly!) only present
361 // in functions that used to be variadic and have now been replaced to take a
362 // va_list instead. If lowering as opposed to optimising, calls to unknown
363 // variadic functions have also been replaced.
364
365 {
366 // 0 and AllocaAddrSpace are sufficient for the targets implemented so far
367 unsigned Addrspace = 0;
368 Changed |= expandVAIntrinsicUsersWithAddrspace(M, Builder, Addrspace);
369
370 Addrspace = DL.getAllocaAddrSpace();
371 if (Addrspace != 0)
372 Changed |= expandVAIntrinsicUsersWithAddrspace(M, Builder, Addrspace);
373 }
374
375 if (Mode != ExpandVariadicsMode::Lowering)
376 return Changed;
377
378 for (Function &F : make_early_inc_range(Range&: M)) {
379 if (F.isDeclaration())
380 continue;
381
382 // Now need to track down indirect calls. Can't find those
383 // by walking uses of variadic functions, need to crawl the instruction
384 // stream. Fortunately this is only necessary for the ABI rewrite case.
385 for (BasicBlock &BB : F) {
386 for (Instruction &I : make_early_inc_range(Range&: BB)) {
387 if (CallBase *CB = dyn_cast<CallBase>(Val: &I)) {
388 if (CB->isIndirectCall()) {
389 FunctionType *FTy = CB->getFunctionType();
390 if (FTy->isVarArg())
391 Changed |= expandCall(M, Builder, CB, FTy, NF: 0);
392 }
393 }
394 }
395 }
396 }
397
398 return Changed;
399}
400
401bool ExpandVariadics::runOnFunction(Module &M, IRBuilder<> &Builder,
402 Function *OriginalFunction) {
403 bool Changed = false;
404
405 if (!expansionApplicableToFunction(M, F: OriginalFunction))
406 return Changed;
407
408 [[maybe_unused]] const bool OriginalFunctionIsDeclaration =
409 OriginalFunction->isDeclaration();
410 assert(rewriteABI() || !OriginalFunctionIsDeclaration);
411
412 // Declare a new function and redirect every use to that new function
413 Function *VariadicWrapper =
414 replaceAllUsesWithNewDeclaration(M, OriginalFunction);
415 assert(VariadicWrapper->isDeclaration());
416 assert(OriginalFunction->use_empty());
417
418 // Create a new function taking va_list containing the implementation of the
419 // original
420 Function *FixedArityReplacement =
421 deriveFixedArityReplacement(M, Builder, OriginalFunction);
422 assert(OriginalFunction->isDeclaration());
423 assert(FixedArityReplacement->isDeclaration() ==
424 OriginalFunctionIsDeclaration);
425 assert(VariadicWrapper->isDeclaration());
426
427 // Create a single block forwarding wrapper that turns a ... into a va_list
428 [[maybe_unused]] Function *VariadicWrapperDefine =
429 defineVariadicWrapper(M, Builder, VariadicWrapper, FixedArityReplacement);
430 assert(VariadicWrapperDefine == VariadicWrapper);
431 assert(!VariadicWrapper->isDeclaration());
432
433 // We now have:
434 // 1. the original function, now as a declaration with no uses
435 // 2. a variadic function that unconditionally calls a fixed arity replacement
436 // 3. a fixed arity function equivalent to the original function
437
438 // Replace known calls to the variadic with calls to the va_list equivalent
439 for (User *U : make_early_inc_range(Range: VariadicWrapper->users())) {
440 if (CallBase *CB = dyn_cast<CallBase>(Val: U)) {
441 Value *CalledOperand = CB->getCalledOperand();
442 if (VariadicWrapper == CalledOperand)
443 Changed |=
444 expandCall(M, Builder, CB, VariadicWrapper->getFunctionType(),
445 NF: FixedArityReplacement);
446 }
447 }
448
449 // The original function will be erased.
450 // One of the two new functions will become a replacement for the original.
451 // When preserving the ABI, the other is an internal implementation detail.
452 // When rewriting the ABI, RAUW then the variadic one.
453 Function *const ExternallyAccessible =
454 rewriteABI() ? FixedArityReplacement : VariadicWrapper;
455 Function *const InternalOnly =
456 rewriteABI() ? VariadicWrapper : FixedArityReplacement;
457
458 // The external function is the replacement for the original
459 ExternallyAccessible->setLinkage(OriginalFunction->getLinkage());
460 ExternallyAccessible->setVisibility(OriginalFunction->getVisibility());
461 ExternallyAccessible->setComdat(OriginalFunction->getComdat());
462 ExternallyAccessible->takeName(V: OriginalFunction);
463
464 // Annotate the internal one as internal
465 InternalOnly->setVisibility(GlobalValue::DefaultVisibility);
466 InternalOnly->setLinkage(GlobalValue::InternalLinkage);
467
468 // The original is unused and obsolete
469 OriginalFunction->eraseFromParent();
470
471 InternalOnly->removeDeadConstantUsers();
472
473 if (rewriteABI()) {
474 // All known calls to the function have been removed by expandCall
475 // Resolve everything else by replaceAllUsesWith
476 VariadicWrapper->replaceAllUsesWith(V: FixedArityReplacement);
477 VariadicWrapper->eraseFromParent();
478 }
479
480 return Changed;
481}
482
483Function *
484ExpandVariadics::replaceAllUsesWithNewDeclaration(Module &M,
485 Function *OriginalFunction) {
486 auto &Ctx = M.getContext();
487 Function &F = *OriginalFunction;
488 FunctionType *FTy = F.getFunctionType();
489 Function *NF = Function::Create(Ty: FTy, Linkage: F.getLinkage(), AddrSpace: F.getAddressSpace());
490
491 NF->setName(F.getName() + ".varargs");
492
493 F.getParent()->getFunctionList().insert(where: F.getIterator(), New: NF);
494
495 AttrBuilder ParamAttrs(Ctx);
496 AttributeList Attrs = NF->getAttributes();
497 Attrs = Attrs.addParamAttributes(C&: Ctx, ArgNo: FTy->getNumParams(), B: ParamAttrs);
498 NF->setAttributes(Attrs);
499
500 OriginalFunction->replaceAllUsesWith(V: NF);
501 return NF;
502}
503
504Function *
505ExpandVariadics::deriveFixedArityReplacement(Module &M, IRBuilder<> &Builder,
506 Function *OriginalFunction) {
507 Function &F = *OriginalFunction;
508 // The purpose here is split the variadic function F into two functions
509 // One is a variadic function that bundles the passed argument into a va_list
510 // and passes it to the second function. The second function does whatever
511 // the original F does, except that it takes a va_list instead of the ...
512
513 assert(expansionApplicableToFunction(M, &F));
514
515 auto &Ctx = M.getContext();
516
517 // Returned value isDeclaration() is equal to F.isDeclaration()
518 // but that property is not invariant throughout this function
519 const bool FunctionIsDefinition = !F.isDeclaration();
520
521 FunctionType *FTy = F.getFunctionType();
522 SmallVector<Type *> ArgTypes(FTy->params());
523 ArgTypes.push_back(Elt: ABI->vaListParameterType(M));
524
525 FunctionType *NFTy = inlinableVariadicFunctionType(M, FTy);
526 Function *NF = Function::Create(Ty: NFTy, Linkage: F.getLinkage(), AddrSpace: F.getAddressSpace());
527
528 // Note - same attribute handling as DeadArgumentElimination
529 NF->copyAttributesFrom(Src: &F);
530 NF->setComdat(F.getComdat());
531 F.getParent()->getFunctionList().insert(where: F.getIterator(), New: NF);
532 NF->setName(F.getName() + ".valist");
533
534 AttrBuilder ParamAttrs(Ctx);
535
536 AttributeList Attrs = NF->getAttributes();
537 Attrs = Attrs.addParamAttributes(C&: Ctx, ArgNo: NFTy->getNumParams() - 1, B: ParamAttrs);
538 NF->setAttributes(Attrs);
539
540 // Splice the implementation into the new function with minimal changes
541 if (FunctionIsDefinition) {
542 NF->splice(ToIt: NF->begin(), FromF: &F);
543
544 auto NewArg = NF->arg_begin();
545 for (Argument &Arg : F.args()) {
546 Arg.replaceAllUsesWith(V: NewArg);
547 NewArg->setName(Arg.getName()); // takeName without killing the old one
548 ++NewArg;
549 }
550 NewArg->setName("varargs");
551 }
552
553 SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
554 F.getAllMetadata(MDs);
555 for (auto [KindID, Node] : MDs)
556 NF->addMetadata(KindID, MD&: *Node);
557 F.clearMetadata();
558
559 return NF;
560}
561
562Function *
563ExpandVariadics::defineVariadicWrapper(Module &M, IRBuilder<> &Builder,
564 Function *VariadicWrapper,
565 Function *FixedArityReplacement) {
566 auto &Ctx = Builder.getContext();
567 const DataLayout &DL = M.getDataLayout();
568 assert(VariadicWrapper->isDeclaration());
569 Function &F = *VariadicWrapper;
570
571 assert(F.isDeclaration());
572 Type *VaListTy = ABI->vaListType(Ctx);
573
574 auto *BB = BasicBlock::Create(Context&: Ctx, Name: "entry", Parent: &F);
575 Builder.SetInsertPoint(BB);
576
577 AllocaInst *VaListInstance =
578 Builder.CreateAlloca(Ty: VaListTy, ArraySize: nullptr, Name: "va_start");
579
580 Builder.CreateLifetimeStart(Ptr: VaListInstance,
581 Size: sizeOfAlloca(Ctx, DL, Alloced: VaListInstance));
582
583 Builder.CreateIntrinsic(ID: Intrinsic::vastart, Types: {DL.getAllocaPtrType(Ctx)},
584 Args: {VaListInstance});
585
586 SmallVector<Value *> Args(llvm::make_pointer_range(Range: F.args()));
587
588 Type *ParameterType = ABI->vaListParameterType(M);
589 if (ABI->vaListPassedInSSARegister())
590 Args.push_back(Elt: Builder.CreateLoad(Ty: ParameterType, Ptr: VaListInstance));
591 else
592 Args.push_back(Elt: Builder.CreateAddrSpaceCast(V: VaListInstance, DestTy: ParameterType));
593
594 CallInst *Result = Builder.CreateCall(Callee: FixedArityReplacement, Args);
595
596 Builder.CreateIntrinsic(ID: Intrinsic::vaend, Types: {DL.getAllocaPtrType(Ctx)},
597 Args: {VaListInstance});
598 Builder.CreateLifetimeEnd(Ptr: VaListInstance,
599 Size: sizeOfAlloca(Ctx, DL, Alloced: VaListInstance));
600
601 if (Result->getType()->isVoidTy())
602 Builder.CreateRetVoid();
603 else
604 Builder.CreateRet(V: Result);
605
606 return VariadicWrapper;
607}
608
609bool ExpandVariadics::expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB,
610 FunctionType *VarargFunctionType,
611 Function *NF) {
612 bool Changed = false;
613 const DataLayout &DL = M.getDataLayout();
614
615 if (!expansionApplicableToFunctionCall(CB)) {
616 if (rewriteABI())
617 report_fatal_error(reason: "Cannot lower callbase instruction");
618 return Changed;
619 }
620
621 // This is tricky. The call instruction's function type might not match
622 // the type of the caller. When optimising, can leave it unchanged.
623 // Webassembly detects that inconsistency and repairs it.
624 FunctionType *FuncType = CB->getFunctionType();
625 if (FuncType != VarargFunctionType) {
626 if (!rewriteABI())
627 return Changed;
628 FuncType = VarargFunctionType;
629 }
630
631 auto &Ctx = CB->getContext();
632
633 Align MaxFieldAlign(1);
634
635 // The strategy is to allocate a call frame containing the variadic
636 // arguments laid out such that a target specific va_list can be initialized
637 // with it, such that target specific va_arg instructions will correctly
638 // iterate over it. This means getting the alignment right and sometimes
639 // embedding a pointer to the value instead of embedding the value itself.
640
641 Function *CBF = CB->getParent()->getParent();
642
643 ExpandedCallFrame Frame;
644
645 uint64_t CurrentOffset = 0;
646
647 for (unsigned I = FuncType->getNumParams(), E = CB->arg_size(); I < E; ++I) {
648 Value *ArgVal = CB->getArgOperand(i: I);
649 const bool IsByVal = CB->paramHasAttr(ArgNo: I, Kind: Attribute::ByVal);
650 const bool IsByRef = CB->paramHasAttr(ArgNo: I, Kind: Attribute::ByRef);
651
652 // The type of the value being passed, decoded from byval/byref metadata if
653 // required
654 Type *const UnderlyingType = IsByVal ? CB->getParamByValType(ArgNo: I)
655 : IsByRef ? CB->getParamByRefType(ArgNo: I)
656 : ArgVal->getType();
657 const uint64_t UnderlyingSize =
658 DL.getTypeAllocSize(Ty: UnderlyingType).getFixedValue();
659
660 // The type to be written into the call frame
661 Type *FrameFieldType = UnderlyingType;
662
663 // The value to copy from when initialising the frame alloca
664 Value *SourceValue = ArgVal;
665
666 VariadicABIInfo::VAArgSlotInfo SlotInfo = ABI->slotInfo(DL, Parameter: UnderlyingType);
667
668 if (SlotInfo.Indirect) {
669 // The va_arg lowering loads through a pointer. Set up an alloca to aim
670 // that pointer at.
671 Builder.SetInsertPointPastAllocas(CBF);
672 Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
673 Value *CallerCopy =
674 Builder.CreateAlloca(Ty: UnderlyingType, ArraySize: nullptr, Name: "IndirectAlloca");
675
676 Builder.SetInsertPoint(CB);
677 if (IsByVal)
678 Builder.CreateMemCpy(Dst: CallerCopy, DstAlign: {}, Src: ArgVal, SrcAlign: {}, Size: UnderlyingSize);
679 else
680 Builder.CreateStore(Val: ArgVal, Ptr: CallerCopy);
681
682 // Indirection now handled, pass the alloca ptr by value
683 FrameFieldType = DL.getAllocaPtrType(Ctx);
684 SourceValue = CallerCopy;
685 }
686
687 // Alignment of the value within the frame
688 // This probably needs to be controllable as a function of type
689 Align DataAlign = SlotInfo.DataAlign;
690
691 MaxFieldAlign = std::max(a: MaxFieldAlign, b: DataAlign);
692
693 uint64_t DataAlignV = DataAlign.value();
694 if (uint64_t Rem = CurrentOffset % DataAlignV) {
695 // Inject explicit padding to deal with alignment requirements
696 uint64_t Padding = DataAlignV - Rem;
697 Frame.padding(Ctx, By: Padding);
698 CurrentOffset += Padding;
699 }
700
701 if (SlotInfo.Indirect) {
702 Frame.store(Ctx, T: FrameFieldType, V: SourceValue);
703 } else {
704 if (IsByVal)
705 Frame.memcpy(Ctx, T: FrameFieldType, V: SourceValue, Bytes: UnderlyingSize);
706 else
707 Frame.store(Ctx, T: FrameFieldType, V: SourceValue);
708 }
709
710 CurrentOffset += DL.getTypeAllocSize(Ty: FrameFieldType).getFixedValue();
711 }
712
713 if (Frame.empty()) {
714 // Not passing any arguments, hopefully va_arg won't try to read any
715 // Creating a single byte frame containing nothing to point the va_list
716 // instance as that is less special-casey in the compiler and probably
717 // easier to interpret in a debugger.
718 Frame.padding(Ctx, By: 1);
719 }
720
721 StructType *VarargsTy = Frame.asStruct(Ctx, Name: CBF->getName());
722
723 // The struct instance needs to be at least MaxFieldAlign for the alignment of
724 // the fields to be correct at runtime. Use the native stack alignment instead
725 // if that's greater as that tends to give better codegen.
726 // This is an awkward way to guess whether there is a known stack alignment
727 // without hitting an assert in DL.getStackAlignment, 1024 is an arbitrary
728 // number likely to be greater than the natural stack alignment.
729 Align AllocaAlign = MaxFieldAlign;
730 if (MaybeAlign StackAlign = DL.getStackAlignment();
731 StackAlign && *StackAlign > AllocaAlign)
732 AllocaAlign = *StackAlign;
733
734 // Put the alloca to hold the variadic args in the entry basic block.
735 Builder.SetInsertPointPastAllocas(CBF);
736
737 // SetCurrentDebugLocation when the builder SetInsertPoint method does not
738 Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
739
740 // The awkward construction here is to set the alignment on the instance
741 AllocaInst *Alloced = Builder.Insert(
742 I: new AllocaInst(VarargsTy, DL.getAllocaAddrSpace(), nullptr, AllocaAlign),
743 Name: "vararg_buffer");
744 Changed = true;
745 assert(Alloced->getAllocatedType() == VarargsTy);
746
747 // Initialize the fields in the struct
748 Builder.SetInsertPoint(CB);
749 Builder.CreateLifetimeStart(Ptr: Alloced, Size: sizeOfAlloca(Ctx, DL, Alloced));
750 Frame.initializeStructAlloca(DL, Builder, Alloced);
751
752 const unsigned NumArgs = FuncType->getNumParams();
753 SmallVector<Value *> Args(CB->arg_begin(), CB->arg_begin() + NumArgs);
754
755 // Initialize a va_list pointing to that struct and pass it as the last
756 // argument
757 AllocaInst *VaList = nullptr;
758 {
759 if (!ABI->vaListPassedInSSARegister()) {
760 Type *VaListTy = ABI->vaListType(Ctx);
761 Builder.SetInsertPointPastAllocas(CBF);
762 Builder.SetCurrentDebugLocation(CB->getStableDebugLoc());
763 VaList = Builder.CreateAlloca(Ty: VaListTy, ArraySize: nullptr, Name: "va_argument");
764 Builder.SetInsertPoint(CB);
765 Builder.CreateLifetimeStart(Ptr: VaList, Size: sizeOfAlloca(Ctx, DL, Alloced: VaList));
766 }
767 Builder.SetInsertPoint(CB);
768 Args.push_back(Elt: ABI->initializeVaList(M, Ctx, Builder, VaList, Buffer: Alloced));
769 }
770
771 // Attributes excluding any on the vararg arguments
772 AttributeList PAL = CB->getAttributes();
773 if (!PAL.isEmpty()) {
774 SmallVector<AttributeSet, 8> ArgAttrs;
775 for (unsigned ArgNo = 0; ArgNo < NumArgs; ArgNo++)
776 ArgAttrs.push_back(Elt: PAL.getParamAttrs(ArgNo));
777 PAL =
778 AttributeList::get(C&: Ctx, FnAttrs: PAL.getFnAttrs(), RetAttrs: PAL.getRetAttrs(), ArgAttrs);
779 }
780
781 SmallVector<OperandBundleDef, 1> OpBundles;
782 CB->getOperandBundlesAsDefs(Defs&: OpBundles);
783
784 CallBase *NewCB = nullptr;
785
786 if (CallInst *CI = dyn_cast<CallInst>(Val: CB)) {
787 Value *Dst = NF ? NF : CI->getCalledOperand();
788 FunctionType *NFTy = inlinableVariadicFunctionType(M, FTy: VarargFunctionType);
789
790 NewCB = CallInst::Create(Ty: NFTy, Func: Dst, Args, Bundles: OpBundles, NameStr: "", InsertBefore: CI->getIterator());
791
792 CallInst::TailCallKind TCK = CI->getTailCallKind();
793 assert(TCK != CallInst::TCK_MustTail);
794
795 // Can't tail call a function that is being passed a pointer to an alloca
796 if (TCK == CallInst::TCK_Tail)
797 TCK = CallInst::TCK_None;
798 CI->setTailCallKind(TCK);
799
800 } else {
801 llvm_unreachable("Unreachable when !expansionApplicableToFunctionCall()");
802 }
803
804 if (VaList)
805 Builder.CreateLifetimeEnd(Ptr: VaList, Size: sizeOfAlloca(Ctx, DL, Alloced: VaList));
806
807 Builder.CreateLifetimeEnd(Ptr: Alloced, Size: sizeOfAlloca(Ctx, DL, Alloced));
808
809 NewCB->setAttributes(PAL);
810 NewCB->takeName(V: CB);
811 NewCB->setCallingConv(CB->getCallingConv());
812 NewCB->setDebugLoc(DebugLoc());
813
814 // DeadArgElim and ArgPromotion copy exactly this metadata
815 NewCB->copyMetadata(SrcInst: *CB, WL: {LLVMContext::MD_prof, LLVMContext::MD_dbg});
816
817 CB->replaceAllUsesWith(V: NewCB);
818 CB->eraseFromParent();
819 return Changed;
820}
821
822bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &Builder,
823 const DataLayout &DL,
824 VAStartInst *Inst) {
825 // Only removing va_start instructions that are not in variadic functions.
826 // Those would be rejected by the IR verifier before this pass.
827 // After splicing basic blocks from a variadic function into a fixed arity
828 // one the va_start that used to refer to the ... parameter still exist.
829 // There are also variadic functions that this pass did not change and
830 // va_start instances in the created single block wrapper functions.
831 // Replace exactly the instances in non-variadic functions as those are
832 // the ones to be fixed up to use the va_list passed as the final argument.
833
834 Function *ContainingFunction = Inst->getFunction();
835 if (ContainingFunction->isVarArg()) {
836 return false;
837 }
838
839 // The last argument is a vaListParameterType, either a va_list
840 // or a pointer to one depending on the target.
841 bool PassedByValue = ABI->vaListPassedInSSARegister();
842 Argument *PassedVaList =
843 ContainingFunction->getArg(i: ContainingFunction->arg_size() - 1);
844
845 // va_start takes a pointer to a va_list, e.g. one on the stack
846 Value *VaStartArg = Inst->getArgList();
847
848 Builder.SetInsertPoint(Inst);
849
850 if (PassedByValue) {
851 // The general thing to do is create an alloca, store the va_list argument
852 // to it, then create a va_copy. When vaCopyIsMemcpy(), this optimises to a
853 // store to the VaStartArg.
854 assert(ABI->vaCopyIsMemcpy());
855 Builder.CreateStore(Val: PassedVaList, Ptr: VaStartArg);
856 } else {
857
858 // Otherwise emit a vacopy to pick up target-specific handling if any
859 auto &Ctx = Builder.getContext();
860
861 Builder.CreateIntrinsic(ID: Intrinsic::vacopy, Types: {DL.getAllocaPtrType(Ctx)},
862 Args: {VaStartArg, PassedVaList});
863 }
864
865 Inst->eraseFromParent();
866 return true;
867}
868
869bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &, const DataLayout &,
870 VAEndInst *Inst) {
871 assert(ABI->vaEndIsNop());
872 Inst->eraseFromParent();
873 return true;
874}
875
876bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &Builder,
877 const DataLayout &DL,
878 VACopyInst *Inst) {
879 assert(ABI->vaCopyIsMemcpy());
880 Builder.SetInsertPoint(Inst);
881
882 auto &Ctx = Builder.getContext();
883 Type *VaListTy = ABI->vaListType(Ctx);
884 uint64_t Size = DL.getTypeAllocSize(Ty: VaListTy).getFixedValue();
885
886 Builder.CreateMemCpy(Dst: Inst->getDest(), DstAlign: {}, Src: Inst->getSrc(), SrcAlign: {},
887 Size: Builder.getInt32(C: Size));
888
889 Inst->eraseFromParent();
890 return true;
891}
892
893struct Amdgpu final : public VariadicABIInfo {
894
895 bool enableForTarget() override { return true; }
896
897 bool vaListPassedInSSARegister() override { return true; }
898
899 Type *vaListType(LLVMContext &Ctx) override {
900 return PointerType::getUnqual(C&: Ctx);
901 }
902
903 Type *vaListParameterType(Module &M) override {
904 return PointerType::getUnqual(C&: M.getContext());
905 }
906
907 Value *initializeVaList(Module &M, LLVMContext &Ctx, IRBuilder<> &Builder,
908 AllocaInst * /*va_list*/, Value *Buffer) override {
909 // Given Buffer, which is an AllocInst of vararg_buffer
910 // need to return something usable as parameter type
911 return Builder.CreateAddrSpaceCast(V: Buffer, DestTy: vaListParameterType(M));
912 }
913
914 VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
915 return {.DataAlign: Align(4), .Indirect: false};
916 }
917};
918
919struct NVPTX final : public VariadicABIInfo {
920
921 bool enableForTarget() override { return true; }
922
923 bool vaListPassedInSSARegister() override { return true; }
924
925 Type *vaListType(LLVMContext &Ctx) override {
926 return PointerType::getUnqual(C&: Ctx);
927 }
928
929 Type *vaListParameterType(Module &M) override {
930 return PointerType::getUnqual(C&: M.getContext());
931 }
932
933 Value *initializeVaList(Module &M, LLVMContext &Ctx, IRBuilder<> &Builder,
934 AllocaInst *, Value *Buffer) override {
935 return Builder.CreateAddrSpaceCast(V: Buffer, DestTy: vaListParameterType(M));
936 }
937
938 VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
939 // NVPTX expects natural alignment in all cases. The variadic call ABI will
940 // handle promoting types to their appropriate size and alignment.
941 Align A = DL.getABITypeAlign(Ty: Parameter);
942 return {.DataAlign: A, .Indirect: false};
943 }
944};
945
946struct Wasm final : public VariadicABIInfo {
947
948 bool enableForTarget() override {
949 // Currently wasm is only used for testing.
950 return commandLineOverride();
951 }
952
953 bool vaListPassedInSSARegister() override { return true; }
954
955 Type *vaListType(LLVMContext &Ctx) override {
956 return PointerType::getUnqual(C&: Ctx);
957 }
958
959 Type *vaListParameterType(Module &M) override {
960 return PointerType::getUnqual(C&: M.getContext());
961 }
962
963 Value *initializeVaList(Module &M, LLVMContext &Ctx, IRBuilder<> &Builder,
964 AllocaInst * /*va_list*/, Value *Buffer) override {
965 return Buffer;
966 }
967
968 VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override {
969 LLVMContext &Ctx = Parameter->getContext();
970 const unsigned MinAlign = 4;
971 Align A = DL.getABITypeAlign(Ty: Parameter);
972 if (A < MinAlign)
973 A = Align(MinAlign);
974
975 if (auto *S = dyn_cast<StructType>(Val: Parameter)) {
976 if (S->getNumElements() > 1) {
977 return {.DataAlign: DL.getABITypeAlign(Ty: PointerType::getUnqual(C&: Ctx)), .Indirect: true};
978 }
979 }
980
981 return {.DataAlign: A, .Indirect: false};
982 }
983};
984
985std::unique_ptr<VariadicABIInfo> VariadicABIInfo::create(const Triple &T) {
986 switch (T.getArch()) {
987 case Triple::r600:
988 case Triple::amdgcn: {
989 return std::make_unique<Amdgpu>();
990 }
991
992 case Triple::wasm32: {
993 return std::make_unique<Wasm>();
994 }
995
996 case Triple::nvptx:
997 case Triple::nvptx64: {
998 return std::make_unique<NVPTX>();
999 }
1000
1001 default:
1002 return {};
1003 }
1004}
1005
1006} // namespace
1007
1008char ExpandVariadics::ID = 0;
1009
1010INITIALIZE_PASS(ExpandVariadics, DEBUG_TYPE, "Expand variadic functions", false,
1011 false)
1012
1013ModulePass *llvm::createExpandVariadicsPass(ExpandVariadicsMode M) {
1014 return new ExpandVariadics(M);
1015}
1016
1017PreservedAnalyses ExpandVariadicsPass::run(Module &M, ModuleAnalysisManager &) {
1018 return ExpandVariadics(Mode).runOnModule(M) ? PreservedAnalyses::none()
1019 : PreservedAnalyses::all();
1020}
1021
1022ExpandVariadicsPass::ExpandVariadicsPass(ExpandVariadicsMode M) : Mode(M) {}
1023