1//===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the lowering of LLVM calls to machine code calls for
11/// GlobalISel.
12///
13//===----------------------------------------------------------------------===//
14
15#include "AArch64CallLowering.h"
16#include "AArch64GlobalISelUtils.h"
17#include "AArch64ISelLowering.h"
18#include "AArch64MachineFunctionInfo.h"
19#include "AArch64RegisterInfo.h"
20#include "AArch64SMEAttributes.h"
21#include "AArch64Subtarget.h"
22#include "AArch64TargetMachine.h"
23#include "llvm/ADT/ArrayRef.h"
24#include "llvm/ADT/SmallVector.h"
25#include "llvm/Analysis/ObjCARCUtil.h"
26#include "llvm/CodeGen/Analysis.h"
27#include "llvm/CodeGen/CallingConvLower.h"
28#include "llvm/CodeGen/FunctionLoweringInfo.h"
29#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
30#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
31#include "llvm/CodeGen/GlobalISel/Utils.h"
32#include "llvm/CodeGen/LowLevelTypeUtils.h"
33#include "llvm/CodeGen/MachineBasicBlock.h"
34#include "llvm/CodeGen/MachineFrameInfo.h"
35#include "llvm/CodeGen/MachineFunction.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineMemOperand.h"
38#include "llvm/CodeGen/MachineOperand.h"
39#include "llvm/CodeGen/MachineRegisterInfo.h"
40#include "llvm/CodeGen/TargetOpcodes.h"
41#include "llvm/CodeGen/TargetRegisterInfo.h"
42#include "llvm/CodeGen/TargetSubtargetInfo.h"
43#include "llvm/CodeGen/ValueTypes.h"
44#include "llvm/CodeGenTypes/MachineValueType.h"
45#include "llvm/IR/Argument.h"
46#include "llvm/IR/Attributes.h"
47#include "llvm/IR/Function.h"
48#include "llvm/IR/Type.h"
49#include "llvm/IR/Value.h"
50#include <algorithm>
51#include <cassert>
52#include <cstdint>
53
54#define DEBUG_TYPE "aarch64-call-lowering"
55
56using namespace llvm;
57using namespace AArch64GISelUtils;
58
59extern cl::opt<bool> EnableSVEGISel;
60
61AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
62 : CallLowering(&TLI) {}
63
64static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT,
65 MVT &LocVT) {
66 // If ValVT is i1/i8/i16, we should set LocVT to i8/i8/i16. This is a legacy
67 // hack because the DAG calls the assignment function with pre-legalized
68 // register typed values, not the raw type.
69 //
70 // This hack is not applied to return values which are not passed on the
71 // stack.
72 if (OrigVT == MVT::i1 || OrigVT == MVT::i8)
73 ValVT = LocVT = MVT::i8;
74 else if (OrigVT == MVT::i16)
75 ValVT = LocVT = MVT::i16;
76}
77
78// Account for i1/i8/i16 stack passed value hack
79static LLT getStackValueStoreTypeHack(const CCValAssign &VA) {
80 const MVT ValVT = VA.getValVT();
81 return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT)
82 : LLT(VA.getLocVT());
83}
84
85namespace {
86
87struct AArch64IncomingValueAssigner
88 : public CallLowering::IncomingValueAssigner {
89 AArch64IncomingValueAssigner(CCAssignFn *AssignFn_,
90 CCAssignFn *AssignFnVarArg_)
91 : IncomingValueAssigner(AssignFn_, AssignFnVarArg_) {}
92
93 bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
94 CCValAssign::LocInfo LocInfo,
95 const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
96 CCState &State) override {
97 applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
98 return IncomingValueAssigner::assignArg(ValNo, OrigVT, ValVT, LocVT,
99 LocInfo, Info, Flags, State);
100 }
101};
102
103struct AArch64OutgoingValueAssigner
104 : public CallLowering::OutgoingValueAssigner {
105 const AArch64Subtarget &Subtarget;
106
107 /// Track if this is used for a return instead of function argument
108 /// passing. We apply a hack to i1/i8/i16 stack passed values, but do not use
109 /// stack passed returns for them and cannot apply the type adjustment.
110 bool IsReturn;
111
112 AArch64OutgoingValueAssigner(CCAssignFn *AssignFn_,
113 CCAssignFn *AssignFnVarArg_,
114 const AArch64Subtarget &Subtarget_,
115 bool IsReturn)
116 : OutgoingValueAssigner(AssignFn_, AssignFnVarArg_),
117 Subtarget(Subtarget_), IsReturn(IsReturn) {}
118
119 bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
120 CCValAssign::LocInfo LocInfo,
121 const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
122 CCState &State) override {
123 const Function &F = State.getMachineFunction().getFunction();
124 bool IsCalleeWin =
125 Subtarget.isCallingConvWin64(CC: State.getCallingConv(), IsVarArg: F.isVarArg());
126 bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg();
127
128 bool Res;
129 if (!Flags.isVarArg() && !UseVarArgsCCForFixed) {
130 if (!IsReturn)
131 applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
132 Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, Info.Ty, State);
133 } else
134 Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, Info.Ty, State);
135
136 StackSize = State.getStackSize();
137 return Res;
138 }
139};
140
141struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
142 IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
143 : IncomingValueHandler(MIRBuilder, MRI) {}
144
145 Register getStackAddress(uint64_t Size, int64_t Offset,
146 MachinePointerInfo &MPO,
147 ISD::ArgFlagsTy Flags) override {
148 auto &MFI = MIRBuilder.getMF().getFrameInfo();
149
150 // Byval is assumed to be writable memory, but other stack passed arguments
151 // are not.
152 const bool IsImmutable = !Flags.isByVal();
153
154 int FI = MFI.CreateFixedObject(Size, SPOffset: Offset, IsImmutable);
155 MPO = MachinePointerInfo::getFixedStack(MF&: MIRBuilder.getMF(), FI);
156 auto AddrReg = MIRBuilder.buildFrameIndex(Res: LLT::pointer(AddressSpace: 0, SizeInBits: 64), Idx: FI);
157 return AddrReg.getReg(Idx: 0);
158 }
159
160 LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
161 ISD::ArgFlagsTy Flags) const override {
162 // For pointers, we just need to fixup the integer types reported in the
163 // CCValAssign.
164 if (Flags.isPointer())
165 return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
166 return getStackValueStoreTypeHack(VA);
167 }
168
169 void assignValueToReg(Register ValVReg, Register PhysReg,
170 const CCValAssign &VA,
171 ISD::ArgFlagsTy Flags = {}) override {
172 markRegUsed(Reg: PhysReg);
173 IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
174 }
175
176 void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
177 const MachinePointerInfo &MPO,
178 const CCValAssign &VA) override {
179 MachineFunction &MF = MIRBuilder.getMF();
180
181 LLT ValTy(VA.getValVT());
182 LLT LocTy(VA.getLocVT());
183
184 // Fixup the types for the DAG compatibility hack.
185 if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16)
186 std::swap(a&: ValTy, b&: LocTy);
187 else {
188 // The calling code knows if this is a pointer or not, we're only touching
189 // the LocTy for the i8/i16 hack.
190 assert(LocTy.getSizeInBits() == MemTy.getSizeInBits());
191 LocTy = MemTy;
192 }
193
194 auto MMO = MF.getMachineMemOperand(
195 PtrInfo: MPO, f: MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, MemTy: LocTy,
196 base_alignment: inferAlignFromPtrInfo(MF, MPO));
197
198 switch (VA.getLocInfo()) {
199 case CCValAssign::LocInfo::ZExt:
200 MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_ZEXTLOAD, Res: ValVReg, Addr, MMO&: *MMO);
201 return;
202 case CCValAssign::LocInfo::SExt:
203 MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_SEXTLOAD, Res: ValVReg, Addr, MMO&: *MMO);
204 return;
205 default:
206 MIRBuilder.buildLoad(Res: ValVReg, Addr, MMO&: *MMO);
207 return;
208 }
209 }
210
211 /// How the physical register gets marked varies between formal
212 /// parameters (it's a basic-block live-in), and a call instruction
213 /// (it's an implicit-def of the BL).
214 virtual void markRegUsed(Register Reg) = 0;
215};
216
217struct FormalArgHandler : public IncomingArgHandler {
218 FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
219 : IncomingArgHandler(MIRBuilder, MRI) {}
220
221 void markRegUsed(Register Reg) override {
222 MIRBuilder.getMRI()->addLiveIn(Reg: Reg.asMCReg());
223 MIRBuilder.getMBB().addLiveIn(PhysReg: Reg.asMCReg());
224 }
225};
226
227struct CallReturnHandler : public IncomingArgHandler {
228 CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
229 MachineInstrBuilder MIB)
230 : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
231
232 void markRegUsed(Register Reg) override {
233 MIB.addDef(RegNo: Reg, Flags: RegState::Implicit);
234 }
235
236 MachineInstrBuilder MIB;
237};
238
239/// A special return arg handler for "returned" attribute arg calls.
240struct ReturnedArgCallReturnHandler : public CallReturnHandler {
241 ReturnedArgCallReturnHandler(MachineIRBuilder &MIRBuilder,
242 MachineRegisterInfo &MRI,
243 MachineInstrBuilder MIB)
244 : CallReturnHandler(MIRBuilder, MRI, MIB) {}
245
246 void markRegUsed(Register Reg) override {}
247};
248
249struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
250 OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
251 MachineInstrBuilder MIB, bool IsTailCall = false,
252 int FPDiff = 0)
253 : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), IsTailCall(IsTailCall),
254 FPDiff(FPDiff),
255 Subtarget(MIRBuilder.getMF().getSubtarget<AArch64Subtarget>()) {}
256
257 Register getStackAddress(uint64_t Size, int64_t Offset,
258 MachinePointerInfo &MPO,
259 ISD::ArgFlagsTy Flags) override {
260 MachineFunction &MF = MIRBuilder.getMF();
261 LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64);
262 LLT s64 = LLT::integer(SizeInBits: 64);
263
264 if (IsTailCall) {
265 assert(!Flags.isByVal() && "byval unhandled with tail calls");
266
267 Offset += FPDiff;
268 int FI = MF.getFrameInfo().CreateFixedObject(Size, SPOffset: Offset, IsImmutable: true);
269 auto FIReg = MIRBuilder.buildFrameIndex(Res: p0, Idx: FI);
270 MPO = MachinePointerInfo::getFixedStack(MF, FI);
271 return FIReg.getReg(Idx: 0);
272 }
273
274 if (!SPReg)
275 SPReg = MIRBuilder.buildCopy(Res: p0, Op: Register(AArch64::SP)).getReg(Idx: 0);
276
277 auto OffsetReg = MIRBuilder.buildConstant(Res: s64, Val: Offset);
278
279 auto AddrReg = MIRBuilder.buildPtrAdd(Res: p0, Op0: SPReg, Op1: OffsetReg);
280
281 MPO = MachinePointerInfo::getStack(MF, Offset);
282 return AddrReg.getReg(Idx: 0);
283 }
284
285 /// We need to fixup the reported store size for certain value types because
286 /// we invert the interpretation of ValVT and LocVT in certain cases. This is
287 /// for compatibility with the DAG call lowering implementation, which we're
288 /// currently building on top of.
289 LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
290 ISD::ArgFlagsTy Flags) const override {
291 if (Flags.isPointer())
292 return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
293 return getStackValueStoreTypeHack(VA);
294 }
295
296 void assignValueToReg(Register ValVReg, Register PhysReg,
297 const CCValAssign &VA, ISD::ArgFlagsTy Flags) override {
298 MIB.addUse(RegNo: PhysReg, Flags: RegState::Implicit);
299 Register ExtReg = extendRegister(ValReg: ValVReg, VA);
300 MIRBuilder.buildCopy(Res: PhysReg, Op: ExtReg);
301 }
302
303 /// Check whether a stack argument requires lowering in a tail call.
304 static bool shouldLowerTailCallStackArg(const MachineFunction &MF,
305 const CCValAssign &VA,
306 Register ValVReg,
307 Register StoreAddr) {
308 const MachineRegisterInfo &MRI = MF.getRegInfo();
309 // Print the defining instruction for the value.
310 auto *DefMI = MRI.getVRegDef(Reg: ValVReg);
311 assert(DefMI && "No defining instruction");
312 for (;;) {
313 // Look through nodes that don't alter the bits of the incoming value.
314 unsigned Op = DefMI->getOpcode();
315 if (Op == TargetOpcode::G_ZEXT || Op == TargetOpcode::G_ANYEXT ||
316 Op == TargetOpcode::G_BITCAST || isAssertMI(MI: *DefMI)) {
317 DefMI = MRI.getVRegDef(Reg: DefMI->getOperand(i: 1).getReg());
318 continue;
319 }
320 break;
321 }
322
323 auto *Load = dyn_cast<GLoad>(Val: DefMI);
324 if (!Load)
325 return true;
326 Register LoadReg = Load->getPointerReg();
327 auto *LoadAddrDef = MRI.getVRegDef(Reg: LoadReg);
328 if (LoadAddrDef->getOpcode() != TargetOpcode::G_FRAME_INDEX)
329 return true;
330 const MachineFrameInfo &MFI = MF.getFrameInfo();
331 int LoadFI = LoadAddrDef->getOperand(i: 1).getIndex();
332
333 auto *StoreAddrDef = MRI.getVRegDef(Reg: StoreAddr);
334 if (StoreAddrDef->getOpcode() != TargetOpcode::G_FRAME_INDEX)
335 return true;
336 int StoreFI = StoreAddrDef->getOperand(i: 1).getIndex();
337
338 if (!MFI.isImmutableObjectIndex(ObjectIdx: LoadFI))
339 return true;
340 if (MFI.getObjectOffset(ObjectIdx: LoadFI) != MFI.getObjectOffset(ObjectIdx: StoreFI))
341 return true;
342 if (Load->getMemSize() != MFI.getObjectSize(ObjectIdx: StoreFI))
343 return true;
344
345 return false;
346 }
347
348 void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
349 const MachinePointerInfo &MPO,
350 const CCValAssign &VA) override {
351 MachineFunction &MF = MIRBuilder.getMF();
352 if (!FPDiff && !shouldLowerTailCallStackArg(MF, VA, ValVReg, StoreAddr: Addr))
353 return;
354 auto MMO = MF.getMachineMemOperand(PtrInfo: MPO, f: MachineMemOperand::MOStore, MemTy,
355 base_alignment: inferAlignFromPtrInfo(MF, MPO));
356 MIRBuilder.buildStore(Val: ValVReg, Addr, MMO&: *MMO);
357 }
358
359 void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex,
360 Register Addr, LLT MemTy,
361 const MachinePointerInfo &MPO,
362 const CCValAssign &VA) override {
363 unsigned MaxSize = MemTy.getSizeInBytes() * 8;
364 // For varargs, we always want to extend them to 8 bytes, in which case
365 // we disable setting a max.
366 if (Arg.Flags[0].isVarArg())
367 MaxSize = 0;
368
369 Register ValVReg = Arg.Regs[RegIndex];
370 if (VA.getLocInfo() != CCValAssign::LocInfo::FPExt) {
371 MVT LocVT = VA.getLocVT();
372 MVT ValVT = VA.getValVT();
373
374 if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) {
375 std::swap(a&: ValVT, b&: LocVT);
376 MemTy = LLT(VA.getValVT());
377 }
378
379 ValVReg = extendRegister(ValReg: ValVReg, VA, MaxSizeBits: MaxSize);
380 } else {
381 // The store does not cover the full allocated stack slot.
382 MemTy = LLT(VA.getValVT());
383 }
384
385 assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);
386 }
387
388 MachineInstrBuilder MIB;
389
390 bool IsTailCall;
391
392 /// For tail calls, the byte offset of the call's argument area from the
393 /// callee's. Unused elsewhere.
394 int FPDiff;
395
396 // Cache the SP register vreg if we need it more than once in this call site.
397 Register SPReg;
398
399 const AArch64Subtarget &Subtarget;
400};
401} // namespace
402
403static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
404 return (CallConv == CallingConv::Fast && TailCallOpt) ||
405 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
406}
407
408bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
409 const Value *Val,
410 ArrayRef<Register> VRegs,
411 FunctionLoweringInfo &FLI,
412 Register SwiftErrorVReg) const {
413 auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: AArch64::RET_ReallyLR);
414 assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
415 "Return value without a vreg");
416
417 bool Success = true;
418 if (!FLI.CanLowerReturn) {
419 insertSRetStores(MIRBuilder, RetTy: Val->getType(), VRegs, DemoteReg: FLI.DemoteRegister);
420 } else if (!VRegs.empty()) {
421 MachineFunction &MF = MIRBuilder.getMF();
422 const Function &F = MF.getFunction();
423 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
424
425 MachineRegisterInfo &MRI = MF.getRegInfo();
426 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
427 CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC: F.getCallingConv());
428 auto &DL = F.getDataLayout();
429 LLVMContext &Ctx = Val->getType()->getContext();
430
431 SmallVector<EVT, 4> SplitEVTs;
432 ComputeValueVTs(TLI, DL, Ty: Val->getType(), ValueVTs&: SplitEVTs);
433 assert(VRegs.size() == SplitEVTs.size() &&
434 "For each split Type there should be exactly one VReg.");
435
436 SmallVector<ArgInfo, 8> SplitArgs;
437 CallingConv::ID CC = F.getCallingConv();
438
439 for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
440 Register CurVReg = VRegs[i];
441 ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Context&: Ctx), 0};
442 setArgFlags(Arg&: CurArgInfo, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: F);
443
444 // i1 is a special case because SDAG i1 true is naturally zero extended
445 // when widened using ANYEXT. We need to do it explicitly here.
446 auto &Flags = CurArgInfo.Flags[0];
447 if (MRI.getType(Reg: CurVReg).getSizeInBits() == TypeSize::getFixed(ExactSize: 1) &&
448 !Flags.isSExt() && !Flags.isZExt()) {
449 CurVReg = MIRBuilder.buildZExt(Res: LLT::integer(SizeInBits: 8), Op: CurVReg).getReg(Idx: 0);
450 } else if (TLI.getNumRegistersForCallingConv(Context&: Ctx, CC, VT: SplitEVTs[i]) ==
451 1) {
452 // Some types will need extending as specified by the CC.
453 MVT NewVT = TLI.getRegisterTypeForCallingConv(Context&: Ctx, CC, VT: SplitEVTs[i]);
454 if (EVT(NewVT) != SplitEVTs[i]) {
455 unsigned ExtendOp = TargetOpcode::G_ANYEXT;
456 if (F.getAttributes().hasRetAttr(Kind: Attribute::SExt))
457 ExtendOp = TargetOpcode::G_SEXT;
458 else if (F.getAttributes().hasRetAttr(Kind: Attribute::ZExt))
459 ExtendOp = TargetOpcode::G_ZEXT;
460
461 LLT NewLLT(NewVT);
462 LLT OldLLT = getLLTForType(Ty&: *CurArgInfo.Ty, DL);
463 CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Context&: Ctx);
464 // Instead of an extend, we might have a vector type which needs
465 // padding with more elements, e.g. <2 x half> -> <4 x half>.
466 if (NewVT.isVector()) {
467 if (OldLLT.isVector()) {
468 if (NewLLT.getNumElements() > OldLLT.getNumElements()) {
469 CurVReg =
470 MIRBuilder.buildPadVectorWithUndefElements(Res: NewLLT, Op0: CurVReg)
471 .getReg(Idx: 0);
472 } else {
473 // Just do a vector extend.
474 CurVReg = MIRBuilder.buildInstr(Opc: ExtendOp, DstOps: {NewLLT}, SrcOps: {CurVReg})
475 .getReg(Idx: 0);
476 }
477 } else if (NewLLT.getNumElements() >= 2 &&
478 NewLLT.getNumElements() <= 8) {
479 // We need to pad a <1 x S> type to <2/4/8 x S>. Since we don't
480 // have <1 x S> vector types in GISel we use a build_vector
481 // instead of a vector merge/concat.
482 CurVReg =
483 MIRBuilder.buildPadVectorWithUndefElements(Res: NewLLT, Op0: CurVReg)
484 .getReg(Idx: 0);
485 } else {
486 LLVM_DEBUG(dbgs() << "Could not handle ret ty\n");
487 return false;
488 }
489 } else {
490 // If the split EVT was a <1 x T> vector, and NewVT is T, then we
491 // don't have to do anything since we don't distinguish between the
492 // two.
493 if (NewLLT.getScalarSizeInBits() !=
494 MRI.getType(Reg: CurVReg).getScalarSizeInBits()) {
495 // A scalar extend.
496 CurVReg = MIRBuilder.buildInstr(Opc: ExtendOp, DstOps: {NewLLT}, SrcOps: {CurVReg})
497 .getReg(Idx: 0);
498 }
499 }
500 }
501 }
502 if (CurVReg != CurArgInfo.Regs[0]) {
503 CurArgInfo.Regs[0] = CurVReg;
504 // Reset the arg flags after modifying CurVReg.
505 setArgFlags(Arg&: CurArgInfo, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: F);
506 }
507 splitToValueTypes(OrigArgInfo: CurArgInfo, SplitArgs, DL, CallConv: CC);
508 }
509
510 AArch64OutgoingValueAssigner Assigner(AssignFn, AssignFn, Subtarget,
511 /*IsReturn*/ true);
512 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
513 Success = determineAndHandleAssignments(Handler, Assigner, Args&: SplitArgs,
514 MIRBuilder, CallConv: CC, IsVarArg: F.isVarArg());
515 }
516
517 if (SwiftErrorVReg) {
518 MIB.addUse(RegNo: AArch64::X21, Flags: RegState::Implicit);
519 MIRBuilder.buildCopy(Res: AArch64::X21, Op: SwiftErrorVReg);
520 }
521
522 MIRBuilder.insertInstr(MIB);
523 return Success;
524}
525
526bool AArch64CallLowering::canLowerReturn(MachineFunction &MF,
527 CallingConv::ID CallConv,
528 SmallVectorImpl<BaseArgInfo> &Outs,
529 bool IsVarArg) const {
530 SmallVector<CCValAssign, 16> ArgLocs;
531 const auto &TLI = *getTLI<AArch64TargetLowering>();
532 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
533 MF.getFunction().getContext());
534
535 return checkReturn(CCInfo, Outs, Fn: TLI.CCAssignFnForReturn(CC: CallConv));
536}
537
538/// Helper function to compute forwarded registers for musttail calls. Computes
539/// the forwarded registers, sets MBB liveness, and emits COPY instructions that
540/// can be used to save + restore registers later.
541static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
542 CCAssignFn *AssignFn) {
543 MachineBasicBlock &MBB = MIRBuilder.getMBB();
544 MachineFunction &MF = MIRBuilder.getMF();
545 MachineFrameInfo &MFI = MF.getFrameInfo();
546
547 if (!MFI.hasMustTailInVarArgFunc())
548 return;
549
550 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
551 const Function &F = MF.getFunction();
552 assert(F.isVarArg() && "Expected F to be vararg?");
553
554 // Compute the set of forwarded registers. The rest are scratch.
555 SmallVector<CCValAssign, 16> ArgLocs;
556 CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs,
557 F.getContext());
558 SmallVector<MVT, 2> RegParmTypes;
559 RegParmTypes.push_back(Elt: MVT::i64);
560 RegParmTypes.push_back(Elt: MVT::f128);
561
562 // Later on, we can use this vector to restore the registers if necessary.
563 SmallVectorImpl<ForwardedRegister> &Forwards =
564 FuncInfo->getForwardedMustTailRegParms();
565 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, Fn: AssignFn);
566
567 // Conservatively forward X8, since it might be used for an aggregate
568 // return.
569 if (!CCInfo.isAllocated(Reg: AArch64::X8)) {
570 Register X8VReg = MF.addLiveIn(PReg: AArch64::X8, RC: &AArch64::GPR64RegClass);
571 Forwards.push_back(Elt: ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
572 }
573
574 // Add the forwards to the MachineBasicBlock and MachineFunction.
575 for (const auto &F : Forwards) {
576 MBB.addLiveIn(PhysReg: F.PReg);
577 MIRBuilder.buildCopy(Res: Register(F.VReg), Op: Register(F.PReg));
578 }
579}
580
581bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
582 auto &F = MF.getFunction();
583 const auto &TM = static_cast<const AArch64TargetMachine &>(MF.getTarget());
584
585 if (!EnableSVEGISel && (F.getReturnType()->isScalableTy() ||
586 llvm::any_of(Range: F.args(), P: [](const Argument &A) {
587 return A.getType()->isScalableTy();
588 })))
589 return true;
590 const auto &ST = MF.getSubtarget<AArch64Subtarget>();
591 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
592 LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n");
593 return true;
594 }
595
596 SMEAttrs Attrs = MF.getInfo<AArch64FunctionInfo>()->getSMEFnAttrs();
597 if (Attrs.hasZAState() || Attrs.hasZT0State() ||
598 Attrs.hasStreamingInterfaceOrBody() ||
599 Attrs.hasStreamingCompatibleInterface())
600 return true;
601
602 auto OptLevel = MF.getTarget().getOptLevel();
603 bool IsGlobalISelPreferred =
604 getCGPassBuilderOption().EnableGlobalISelOption ==
605 cl::boolOrDefault::BOU_TRUE ||
606 static_cast<unsigned>(OptLevel) <= TM.getEnableGlobalISelAtO() ||
607 F.hasOptNone();
608 return !IsGlobalISelPreferred;
609}
610
611void AArch64CallLowering::saveVarArgRegisters(
612 MachineIRBuilder &MIRBuilder, CallLowering::IncomingValueHandler &Handler,
613 CCState &CCInfo) const {
614 auto GPRArgRegs = AArch64::getGPRArgRegs();
615 auto FPRArgRegs = AArch64::getFPRArgRegs();
616
617 MachineFunction &MF = MIRBuilder.getMF();
618 MachineRegisterInfo &MRI = MF.getRegInfo();
619 MachineFrameInfo &MFI = MF.getFrameInfo();
620 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
621 auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
622 bool IsWin64CC = Subtarget.isCallingConvWin64(CC: CCInfo.getCallingConv(),
623 IsVarArg: MF.getFunction().isVarArg());
624 const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64);
625 const LLT s64 = LLT::integer(SizeInBits: 64);
626
627 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(Regs: GPRArgRegs);
628 unsigned NumVariadicGPRArgRegs = GPRArgRegs.size() - FirstVariadicGPR + 1;
629
630 unsigned GPRSaveSize = 8 * (GPRArgRegs.size() - FirstVariadicGPR);
631 int GPRIdx = 0;
632 if (GPRSaveSize != 0) {
633 if (IsWin64CC) {
634 GPRIdx = MFI.CreateFixedObject(Size: GPRSaveSize,
635 SPOffset: -static_cast<int>(GPRSaveSize), IsImmutable: false);
636 if (GPRSaveSize & 15)
637 // The extra size here, if triggered, will always be 8.
638 MFI.CreateFixedObject(Size: 16 - (GPRSaveSize & 15),
639 SPOffset: -static_cast<int>(alignTo(Value: GPRSaveSize, Align: 16)),
640 IsImmutable: false);
641 } else
642 GPRIdx = MFI.CreateStackObject(Size: GPRSaveSize, Alignment: Align(8), isSpillSlot: false);
643
644 auto FIN = MIRBuilder.buildFrameIndex(Res: p0, Idx: GPRIdx);
645 auto Offset =
646 MIRBuilder.buildConstant(Res: MRI.createGenericVirtualRegister(Ty: s64), Val: 8);
647
648 for (unsigned i = FirstVariadicGPR; i < GPRArgRegs.size(); ++i) {
649 Register Val = MRI.createGenericVirtualRegister(Ty: s64);
650 Handler.assignValueToReg(
651 ValVReg: Val, PhysReg: GPRArgRegs[i],
652 VA: CCValAssign::getReg(ValNo: i + MF.getFunction().getNumOperands(), ValVT: MVT::i64,
653 Reg: GPRArgRegs[i], LocVT: MVT::i64, HTP: CCValAssign::Full));
654 auto MPO = IsWin64CC ? MachinePointerInfo::getFixedStack(
655 MF, FI: GPRIdx, Offset: (i - FirstVariadicGPR) * 8)
656 : MachinePointerInfo::getStack(MF, Offset: i * 8);
657 MIRBuilder.buildStore(Val, Addr: FIN, PtrInfo: MPO, Alignment: inferAlignFromPtrInfo(MF, MPO));
658
659 FIN = MIRBuilder.buildPtrAdd(Res: MRI.createGenericVirtualRegister(Ty: p0),
660 Op0: FIN.getReg(Idx: 0), Op1: Offset);
661 }
662 }
663 FuncInfo->setVarArgsGPRIndex(GPRIdx);
664 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
665
666 if (Subtarget.hasFPARMv8() && !IsWin64CC) {
667 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(Regs: FPRArgRegs);
668
669 unsigned FPRSaveSize = 16 * (FPRArgRegs.size() - FirstVariadicFPR);
670 int FPRIdx = 0;
671 if (FPRSaveSize != 0) {
672 FPRIdx = MFI.CreateStackObject(Size: FPRSaveSize, Alignment: Align(16), isSpillSlot: false);
673
674 auto FIN = MIRBuilder.buildFrameIndex(Res: p0, Idx: FPRIdx);
675 auto Offset =
676 MIRBuilder.buildConstant(Res: MRI.createGenericVirtualRegister(Ty: s64), Val: 16);
677
678 for (unsigned i = FirstVariadicFPR; i < FPRArgRegs.size(); ++i) {
679 Register Val = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 128));
680 Handler.assignValueToReg(
681 ValVReg: Val, PhysReg: FPRArgRegs[i],
682 VA: CCValAssign::getReg(
683 ValNo: i + MF.getFunction().getNumOperands() + NumVariadicGPRArgRegs,
684 ValVT: MVT::f128, Reg: FPRArgRegs[i], LocVT: MVT::f128, HTP: CCValAssign::Full));
685
686 auto MPO = MachinePointerInfo::getStack(MF, Offset: i * 16);
687 MIRBuilder.buildStore(Val, Addr: FIN, PtrInfo: MPO, Alignment: inferAlignFromPtrInfo(MF, MPO));
688
689 FIN = MIRBuilder.buildPtrAdd(Res: MRI.createGenericVirtualRegister(Ty: p0),
690 Op0: FIN.getReg(Idx: 0), Op1: Offset);
691 }
692 }
693 FuncInfo->setVarArgsFPRIndex(FPRIdx);
694 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
695 }
696}
697
698bool AArch64CallLowering::lowerFormalArguments(
699 MachineIRBuilder &MIRBuilder, const Function &F,
700 ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const {
701 MachineFunction &MF = MIRBuilder.getMF();
702 MachineBasicBlock &MBB = MIRBuilder.getMBB();
703 MachineRegisterInfo &MRI = MF.getRegInfo();
704 auto &DL = F.getDataLayout();
705 auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
706
707 // Arm64EC has extra requirements for varargs calls which are only implemented
708 // in SelectionDAG; bail out for now.
709 if (F.isVarArg() && Subtarget.isWindowsArm64EC())
710 return false;
711
712 // Arm64EC thunks have a special calling convention which is only implemented
713 // in SelectionDAG; bail out for now.
714 if (F.getCallingConv() == CallingConv::ARM64EC_Thunk_Native ||
715 F.getCallingConv() == CallingConv::ARM64EC_Thunk_X64)
716 return false;
717
718 bool IsWin64 =
719 Subtarget.isCallingConvWin64(CC: F.getCallingConv(), IsVarArg: F.isVarArg()) &&
720 !Subtarget.isWindowsArm64EC();
721
722 SmallVector<ArgInfo, 8> SplitArgs;
723 SmallVector<std::pair<Register, Register>> BoolArgs;
724
725 // Insert the hidden sret parameter if the return value won't fit in the
726 // return registers.
727 if (!FLI.CanLowerReturn)
728 insertSRetIncomingArgument(F, SplitArgs, DemoteReg&: FLI.DemoteRegister, MRI, DL);
729
730 unsigned i = 0;
731 for (auto &Arg : F.args()) {
732 if (DL.getTypeStoreSize(Ty: Arg.getType()).isZero())
733 continue;
734
735 ArgInfo OrigArg{VRegs[i], Arg, i};
736 setArgFlags(Arg&: OrigArg, OpIdx: i + AttributeList::FirstArgIndex, DL, FuncInfo: F);
737
738 // i1 arguments are zero-extended to i8 by the caller. Emit a
739 // hint to reflect this.
740 if (OrigArg.Ty->isIntegerTy(BitWidth: 1)) {
741 assert(OrigArg.Regs.size() == 1 &&
742 MRI.getType(OrigArg.Regs[0]).getSizeInBits() == 1 &&
743 "Unexpected registers used for i1 arg");
744
745 auto &Flags = OrigArg.Flags[0];
746 if (!Flags.isZExt() && !Flags.isSExt()) {
747 // Lower i1 argument as i8, and insert AssertZExt + Trunc later.
748 Register OrigReg = OrigArg.Regs[0];
749 Register WideReg = MRI.createGenericVirtualRegister(Ty: LLT::integer(SizeInBits: 8));
750 OrigArg.Regs[0] = WideReg;
751 BoolArgs.push_back(Elt: {OrigReg, WideReg});
752 }
753 }
754
755 if (Arg.hasAttribute(Kind: Attribute::SwiftAsync))
756 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
757
758 splitToValueTypes(OrigArgInfo: OrigArg, SplitArgs, DL, CallConv: F.getCallingConv());
759 ++i;
760 }
761
762 if (!MBB.empty())
763 MIRBuilder.setInstr(*MBB.begin());
764
765 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
766 CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC: F.getCallingConv(), IsVarArg: IsWin64 && F.isVarArg());
767
768 AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn);
769 FormalArgHandler Handler(MIRBuilder, MRI);
770 SmallVector<CCValAssign, 16> ArgLocs;
771 CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
772 if (!determineAssignments(Assigner, Args&: SplitArgs, CCInfo) ||
773 !handleAssignments(Handler, Args&: SplitArgs, CCState&: CCInfo, ArgLocs, MIRBuilder))
774 return false;
775
776 if (!BoolArgs.empty()) {
777 for (auto &KV : BoolArgs) {
778 Register OrigReg = KV.first;
779 Register WideReg = KV.second;
780 LLT WideTy = MRI.getType(Reg: WideReg);
781 assert(MRI.getType(OrigReg).getScalarSizeInBits() == 1 &&
782 "Unexpected bit size of a bool arg");
783 MIRBuilder.buildTrunc(
784 Res: OrigReg, Op: MIRBuilder.buildAssertZExt(Res: WideTy, Op: WideReg, Size: 1).getReg(Idx: 0));
785 }
786 }
787
788 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
789 uint64_t StackSize = Assigner.StackSize;
790 if (F.isVarArg()) {
791 if ((!Subtarget.isTargetDarwin() && !Subtarget.isWindowsArm64EC()) || IsWin64) {
792 // The AAPCS variadic function ABI is identical to the non-variadic
793 // one. As a result there may be more arguments in registers and we should
794 // save them for future reference.
795 // Win64 variadic functions also pass arguments in registers, but all
796 // float arguments are passed in integer registers.
797 saveVarArgRegisters(MIRBuilder, Handler, CCInfo);
798 } else if (Subtarget.isWindowsArm64EC()) {
799 return false;
800 }
801
802 // We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
803 StackSize = alignTo(Value: Assigner.StackSize, Align: Subtarget.isTargetILP32() ? 4 : 8);
804
805 auto &MFI = MIRBuilder.getMF().getFrameInfo();
806 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(Size: 4, SPOffset: StackSize, IsImmutable: true));
807 }
808
809 if (doesCalleeRestoreStack(CallConv: F.getCallingConv(),
810 TailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt)) {
811 // We have a non-standard ABI, so why not make full use of the stack that
812 // we're going to pop? It must be aligned to 16 B in any case.
813 StackSize = alignTo(Value: StackSize, Align: 16);
814
815 // If we're expected to restore the stack (e.g. fastcc), then we'll be
816 // adding a multiple of 16.
817 FuncInfo->setArgumentStackToRestore(StackSize);
818
819 // Our own callers will guarantee that the space is free by giving an
820 // aligned value to CALLSEQ_START.
821 }
822
823 // When we tail call, we need to check if the callee's arguments
824 // will fit on the caller's stack. So, whenever we lower formal arguments,
825 // we should keep track of this information, since we might lower a tail call
826 // in this function later.
827 FuncInfo->setBytesInStackArgArea(StackSize);
828
829 if (Subtarget.hasCustomCallingConv())
830 Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
831
832 handleMustTailForwardedRegisters(MIRBuilder, AssignFn);
833
834 // Move back to the end of the basic block.
835 MIRBuilder.setMBB(MBB);
836
837 return true;
838}
839
840/// Return true if the calling convention is one that we can guarantee TCO for.
841static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
842 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
843 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
844}
845
846/// Return true if we might ever do TCO for calls with this calling convention.
847static bool mayTailCallThisCC(CallingConv::ID CC) {
848 switch (CC) {
849 case CallingConv::C:
850 case CallingConv::PreserveMost:
851 case CallingConv::PreserveAll:
852 case CallingConv::PreserveNone:
853 case CallingConv::Swift:
854 case CallingConv::SwiftTail:
855 case CallingConv::Tail:
856 case CallingConv::Fast:
857 return true;
858 default:
859 return false;
860 }
861}
862
863/// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
864/// CC.
865static std::pair<CCAssignFn *, CCAssignFn *>
866getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) {
867 return {TLI.CCAssignFnForCall(CC, IsVarArg: false), TLI.CCAssignFnForCall(CC, IsVarArg: true)};
868}
869
870bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay(
871 CallLoweringInfo &Info, MachineFunction &MF,
872 SmallVectorImpl<ArgInfo> &InArgs) const {
873 const Function &CallerF = MF.getFunction();
874 CallingConv::ID CalleeCC = Info.CallConv;
875 CallingConv::ID CallerCC = CallerF.getCallingConv();
876
877 // If the calling conventions match, then everything must be the same.
878 if (CalleeCC == CallerCC)
879 return true;
880
881 // Check if the caller and callee will handle arguments in the same way.
882 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
883 CCAssignFn *CalleeAssignFnFixed;
884 CCAssignFn *CalleeAssignFnVarArg;
885 std::tie(args&: CalleeAssignFnFixed, args&: CalleeAssignFnVarArg) =
886 getAssignFnsForCC(CC: CalleeCC, TLI);
887
888 CCAssignFn *CallerAssignFnFixed;
889 CCAssignFn *CallerAssignFnVarArg;
890 std::tie(args&: CallerAssignFnFixed, args&: CallerAssignFnVarArg) =
891 getAssignFnsForCC(CC: CallerCC, TLI);
892
893 AArch64IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed,
894 CalleeAssignFnVarArg);
895 AArch64IncomingValueAssigner CallerAssigner(CallerAssignFnFixed,
896 CallerAssignFnVarArg);
897
898 if (!resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner))
899 return false;
900
901 // Make sure that the caller and callee preserve all of the same registers.
902 auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
903 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
904 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
905 if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) {
906 TRI->UpdateCustomCallPreservedMask(MF, Mask: &CallerPreserved);
907 TRI->UpdateCustomCallPreservedMask(MF, Mask: &CalleePreserved);
908 }
909
910 return TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved);
911}
912
913bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
914 CallLoweringInfo &Info, MachineFunction &MF,
915 SmallVectorImpl<ArgInfo> &OrigOutArgs) const {
916 // If there are no outgoing arguments, then we are done.
917 if (OrigOutArgs.empty())
918 return true;
919
920 const Function &CallerF = MF.getFunction();
921 LLVMContext &Ctx = CallerF.getContext();
922 CallingConv::ID CalleeCC = Info.CallConv;
923 CallingConv::ID CallerCC = CallerF.getCallingConv();
924 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
925 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
926
927 CCAssignFn *AssignFnFixed;
928 CCAssignFn *AssignFnVarArg;
929 std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) = getAssignFnsForCC(CC: CalleeCC, TLI);
930
931 // We have outgoing arguments. Make sure that we can tail call with them.
932 SmallVector<CCValAssign, 16> OutLocs;
933 CCState OutInfo(CalleeCC, false, MF, OutLocs, Ctx);
934
935 AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
936 Subtarget, /*IsReturn*/ false);
937 // determineAssignments() may modify argument flags, so make a copy.
938 SmallVector<ArgInfo, 8> OutArgs;
939 append_range(C&: OutArgs, R&: OrigOutArgs);
940 if (!determineAssignments(Assigner&: CalleeAssigner, Args&: OutArgs, CCInfo&: OutInfo)) {
941 LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
942 return false;
943 }
944
945 // Make sure that they can fit on the caller's stack.
946 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
947 if (OutInfo.getStackSize() > FuncInfo->getBytesInStackArgArea()) {
948 LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
949 return false;
950 }
951
952 // Verify that the parameters in callee-saved registers match.
953 // TODO: Port this over to CallLowering as general code once swiftself is
954 // supported.
955 auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
956 const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
957 MachineRegisterInfo &MRI = MF.getRegInfo();
958
959 if (Info.IsVarArg) {
960 // Be conservative and disallow variadic memory operands to match SDAG's
961 // behaviour.
962 // FIXME: If the caller's calling convention is C, then we can
963 // potentially use its argument area. However, for cases like fastcc,
964 // we can't do anything.
965 for (unsigned i = 0; i < OutLocs.size(); ++i) {
966 auto &ArgLoc = OutLocs[i];
967 if (ArgLoc.isRegLoc())
968 continue;
969
970 LLVM_DEBUG(
971 dbgs()
972 << "... Cannot tail call vararg function with stack arguments\n");
973 return false;
974 }
975 }
976
977 return parametersInCSRMatch(MRI, CallerPreservedMask, ArgLocs: OutLocs, OutVals: OutArgs);
978}
979
980bool AArch64CallLowering::isEligibleForTailCallOptimization(
981 MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
982 SmallVectorImpl<ArgInfo> &InArgs,
983 SmallVectorImpl<ArgInfo> &OutArgs) const {
984
985 // Must pass all target-independent checks in order to tail call optimize.
986 if (!Info.IsTailCall)
987 return false;
988
989 CallingConv::ID CalleeCC = Info.CallConv;
990 MachineFunction &MF = MIRBuilder.getMF();
991 const Function &CallerF = MF.getFunction();
992
993 LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
994
995 if (Info.SwiftErrorVReg) {
996 // TODO: We should handle this.
997 // Note that this is also handled by the check for no outgoing arguments.
998 // Proactively disabling this though, because the swifterror handling in
999 // lowerCall inserts a COPY *after* the location of the call.
1000 LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n");
1001 return false;
1002 }
1003
1004 if (!mayTailCallThisCC(CC: CalleeCC)) {
1005 LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
1006 return false;
1007 }
1008
1009 // Byval parameters hand the function a pointer directly into the stack area
1010 // we want to reuse during a tail call. Working around this *is* possible (see
1011 // X86).
1012 //
1013 // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
1014 // it?
1015 //
1016 // On Windows, "inreg" attributes signify non-aggregate indirect returns.
1017 // In this case, it is necessary to save/restore X0 in the callee. Tail
1018 // call opt interferes with this. So we disable tail call opt when the
1019 // caller has an argument with "inreg" attribute.
1020 //
1021 // FIXME: Check whether the callee also has an "inreg" argument.
1022 //
1023 // When the caller has a swifterror argument, we don't want to tail call
1024 // because would have to move into the swifterror register before the
1025 // tail call.
1026 if (any_of(Range: CallerF.args(), P: [](const Argument &A) {
1027 return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr();
1028 })) {
1029 LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, "
1030 "inreg, or swifterror arguments\n");
1031 return false;
1032 }
1033
1034 // Externally-defined functions with weak linkage should not be
1035 // tail-called on AArch64 when the OS does not support dynamic
1036 // pre-emption of symbols, as the AAELF spec requires normal calls
1037 // to undefined weak functions to be replaced with a NOP or jump to the
1038 // next instruction. The behaviour of branch instructions in this
1039 // situation (as used for tail calls) is implementation-defined, so we
1040 // cannot rely on the linker replacing the tail call with a return.
1041 if (Info.Callee.isGlobal()) {
1042 const GlobalValue *GV = Info.Callee.getGlobal();
1043 const Triple &TT = MF.getTarget().getTargetTriple();
1044 if (GV->hasExternalWeakLinkage() &&
1045 (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
1046 TT.isOSBinFormatMachO())) {
1047 LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
1048 "with weak linkage for this OS.\n");
1049 return false;
1050 }
1051 }
1052
1053 // If we have -tailcallopt, then we're done.
1054 if (canGuaranteeTCO(CC: CalleeCC, GuaranteeTailCalls: MF.getTarget().Options.GuaranteedTailCallOpt))
1055 return CalleeCC == CallerF.getCallingConv();
1056
1057 // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
1058 // Try to find cases where we can do that.
1059
1060 // I want anyone implementing a new calling convention to think long and hard
1061 // about this assert.
1062 assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
1063 "Unexpected variadic calling convention");
1064
1065 // Verify that the incoming and outgoing arguments from the callee are
1066 // safe to tail call.
1067 if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
1068 LLVM_DEBUG(
1069 dbgs()
1070 << "... Caller and callee have incompatible calling conventions.\n");
1071 return false;
1072 }
1073
1074 if (!areCalleeOutgoingArgsTailCallable(Info, MF, OrigOutArgs&: OutArgs))
1075 return false;
1076
1077 LLVM_DEBUG(
1078 dbgs() << "... Call is eligible for tail call optimization.\n");
1079 return true;
1080}
1081
1082static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
1083 bool IsTailCall,
1084 std::optional<CallLowering::PtrAuthInfo> &PAI,
1085 MachineRegisterInfo &MRI) {
1086 const AArch64FunctionInfo *FuncInfo = CallerF.getInfo<AArch64FunctionInfo>();
1087
1088 if (!IsTailCall) {
1089 if (!PAI)
1090 return IsIndirect ? getBLRCallOpcode(MF: CallerF) : (unsigned)AArch64::BL;
1091
1092 assert(IsIndirect && "Direct call should not be authenticated");
1093 assert((PAI->Key == AArch64PACKey::IA || PAI->Key == AArch64PACKey::IB) &&
1094 "Invalid auth call key");
1095 return AArch64::BLRA;
1096 }
1097
1098 if (!IsIndirect)
1099 return AArch64::TCRETURNdi;
1100
1101 // When BTI or PAuthLR are enabled, there are restrictions on using x16 and
1102 // x17 to hold the function pointer.
1103 if (FuncInfo->branchTargetEnforcement()) {
1104 if (FuncInfo->branchProtectionPAuthLR()) {
1105 assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR");
1106 return AArch64::TCRETURNrix17;
1107 }
1108 if (PAI)
1109 return AArch64::AUTH_TCRETURN_BTI;
1110 return AArch64::TCRETURNrix16x17;
1111 }
1112
1113 if (FuncInfo->branchProtectionPAuthLR()) {
1114 assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR");
1115 return AArch64::TCRETURNrinotx16;
1116 }
1117
1118 if (PAI)
1119 return AArch64::AUTH_TCRETURN;
1120 return AArch64::TCRETURNri;
1121}
1122
1123static const uint32_t *
1124getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> &OutArgs,
1125 AArch64CallLowering::CallLoweringInfo &Info,
1126 const AArch64RegisterInfo &TRI, MachineFunction &MF) {
1127 const uint32_t *Mask;
1128 if (!OutArgs.empty() && OutArgs[0].Flags[0].isReturned()) {
1129 // For 'this' returns, use the X0-preserving mask if applicable
1130 Mask = TRI.getThisReturnPreservedMask(MF, Info.CallConv);
1131 if (!Mask) {
1132 OutArgs[0].Flags[0].setReturned(false);
1133 Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
1134 }
1135 } else {
1136 Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
1137 }
1138 return Mask;
1139}
1140
1141bool AArch64CallLowering::lowerTailCall(
1142 MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
1143 SmallVectorImpl<ArgInfo> &OutArgs) const {
1144 MachineFunction &MF = MIRBuilder.getMF();
1145 const Function &F = MF.getFunction();
1146 MachineRegisterInfo &MRI = MF.getRegInfo();
1147 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
1148 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1149
1150 // True when we're tail calling, but without -tailcallopt.
1151 bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt &&
1152 Info.CallConv != CallingConv::Tail &&
1153 Info.CallConv != CallingConv::SwiftTail;
1154
1155 // Find out which ABI gets to decide where things go.
1156 CallingConv::ID CalleeCC = Info.CallConv;
1157 CCAssignFn *AssignFnFixed;
1158 CCAssignFn *AssignFnVarArg;
1159 std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) = getAssignFnsForCC(CC: CalleeCC, TLI);
1160
1161 MachineInstrBuilder CallSeqStart;
1162 if (!IsSibCall)
1163 CallSeqStart = MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKDOWN);
1164
1165 unsigned Opc = getCallOpcode(CallerF: MF, IsIndirect: Info.Callee.isReg(), IsTailCall: true, PAI&: Info.PAI, MRI);
1166 auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: Opc);
1167 MIB.add(MO: Info.Callee);
1168
1169 // Tell the call which registers are clobbered.
1170 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1171 auto TRI = Subtarget.getRegisterInfo();
1172
1173 // Byte offset for the tail call. When we are sibcalling, this will always
1174 // be 0.
1175 MIB.addImm(Val: 0);
1176
1177 // Authenticated tail calls always take key/discriminator arguments.
1178 if (Opc == AArch64::AUTH_TCRETURN || Opc == AArch64::AUTH_TCRETURN_BTI) {
1179 assert((Info.PAI->Key == AArch64PACKey::IA ||
1180 Info.PAI->Key == AArch64PACKey::IB) &&
1181 "Invalid auth call key");
1182 MIB.addImm(Val: Info.PAI->Key);
1183
1184 Register AddrDisc = 0;
1185 uint16_t IntDisc = 0;
1186 std::tie(args&: IntDisc, args&: AddrDisc) =
1187 extractPtrauthBlendDiscriminators(Disc: Info.PAI->Discriminator, MRI);
1188
1189 MIB.addImm(Val: IntDisc);
1190 MIB.addUse(RegNo: AddrDisc);
1191 if (AddrDisc != AArch64::NoRegister) {
1192 MIB->getOperand(i: 4).setReg(constrainOperandRegClass(
1193 MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(),
1194 RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB, II: MIB->getDesc(),
1195 RegMO&: MIB->getOperand(i: 4), OpIdx: 4));
1196 }
1197 }
1198
1199 // Tell the call which registers are clobbered.
1200 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
1201 if (Subtarget.hasCustomCallingConv())
1202 TRI->UpdateCustomCallPreservedMask(MF, Mask: &Mask);
1203 MIB.addRegMask(Mask);
1204
1205 if (Info.CFIType)
1206 MIB->setCFIType(MF, Type: Info.CFIType->getZExtValue());
1207
1208 if (TRI->isAnyArgRegReserved(MF))
1209 TRI->emitReservedArgRegCallError(MF);
1210
1211 // FPDiff is the byte offset of the call's argument area from the callee's.
1212 // Stores to callee stack arguments will be placed in FixedStackSlots offset
1213 // by this amount for a tail call. In a sibling call it must be 0 because the
1214 // caller will deallocate the entire stack and the callee still expects its
1215 // arguments to begin at SP+0.
1216 int FPDiff = 0;
1217
1218 // This will be 0 for sibcalls, potentially nonzero for tail calls produced
1219 // by -tailcallopt. For sibcalls, the memory operands for the call are
1220 // already available in the caller's incoming argument space.
1221 unsigned NumBytes = 0;
1222 if (!IsSibCall) {
1223 // We aren't sibcalling, so we need to compute FPDiff. We need to do this
1224 // before handling assignments, because FPDiff must be known for memory
1225 // arguments.
1226 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
1227 SmallVector<CCValAssign, 16> OutLocs;
1228 CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
1229
1230 AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
1231 Subtarget, /*IsReturn*/ false);
1232 if (!determineAssignments(Assigner&: CalleeAssigner, Args&: OutArgs, CCInfo&: OutInfo))
1233 return false;
1234
1235 // The callee will pop the argument stack as a tail call. Thus, we must
1236 // keep it 16-byte aligned.
1237 NumBytes = alignTo(Value: OutInfo.getStackSize(), Align: 16);
1238
1239 // FPDiff will be negative if this tail call requires more space than we
1240 // would automatically have in our incoming argument space. Positive if we
1241 // actually shrink the stack.
1242 FPDiff = NumReusableBytes - NumBytes;
1243
1244 // Update the required reserved area if this is the tail call requiring the
1245 // most argument stack space.
1246 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
1247 FuncInfo->setTailCallReservedStack(-FPDiff);
1248
1249 // The stack pointer must be 16-byte aligned at all times it's used for a
1250 // memory operation, which in practice means at *all* times and in
1251 // particular across call boundaries. Therefore our own arguments started at
1252 // a 16-byte aligned SP and the delta applied for the tail call should
1253 // satisfy the same constraint.
1254 assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
1255 }
1256
1257 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
1258
1259 AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
1260 Subtarget, /*IsReturn*/ false);
1261
1262 // Do the actual argument marshalling.
1263 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB,
1264 /*IsTailCall*/ true, FPDiff);
1265 if (!determineAndHandleAssignments(Handler, Assigner, Args&: OutArgs, MIRBuilder,
1266 CallConv: CalleeCC, IsVarArg: Info.IsVarArg))
1267 return false;
1268
1269 Mask = getMaskForArgs(OutArgs, Info, TRI: *TRI, MF);
1270
1271 if (Info.IsVarArg && Info.IsMustTailCall) {
1272 // Now we know what's being passed to the function. Add uses to the call for
1273 // the forwarded registers that we *aren't* passing as parameters. This will
1274 // preserve the copies we build earlier.
1275 for (const auto &F : Forwards) {
1276 Register ForwardedReg = F.PReg;
1277 // If the register is already passed, or aliases a register which is
1278 // already being passed, then skip it.
1279 if (any_of(Range: MIB->uses(), P: [&ForwardedReg, &TRI](const MachineOperand &Use) {
1280 if (!Use.isReg())
1281 return false;
1282 return TRI->regsOverlap(RegA: Use.getReg(), RegB: ForwardedReg);
1283 }))
1284 continue;
1285
1286 // We aren't passing it already, so we should add it to the call.
1287 MIRBuilder.buildCopy(Res: ForwardedReg, Op: Register(F.VReg));
1288 MIB.addReg(RegNo: ForwardedReg, Flags: RegState::Implicit);
1289 }
1290 }
1291
1292 // If we have -tailcallopt, we need to adjust the stack. We'll do the call
1293 // sequence start and end here.
1294 if (!IsSibCall) {
1295 MIB->getOperand(i: 1).setImm(FPDiff);
1296 CallSeqStart.addImm(Val: 0).addImm(Val: 0);
1297 // End the call sequence *before* emitting the call. Normally, we would
1298 // tidy the frame up after the call. However, here, we've laid out the
1299 // parameters so that when SP is reset, they will be in the correct
1300 // location.
1301 MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKUP).addImm(Val: 0).addImm(Val: 0);
1302 }
1303
1304 // Now we can add the actual call instruction to the correct basic block.
1305 MIRBuilder.insertInstr(MIB);
1306
1307 // If Callee is a reg, since it is used by a target specific instruction,
1308 // it must have a register class matching the constraint of that instruction.
1309 if (MIB->getOperand(i: 0).isReg())
1310 constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(),
1311 RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB,
1312 II: MIB->getDesc(), RegMO&: MIB->getOperand(i: 0), OpIdx: 0);
1313
1314 MF.getFrameInfo().setHasTailCall();
1315 Info.LoweredTailCall = true;
1316 return true;
1317}
1318
1319bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
1320 CallLoweringInfo &Info) const {
1321 MachineFunction &MF = MIRBuilder.getMF();
1322 const Function &F = MF.getFunction();
1323 MachineRegisterInfo &MRI = MF.getRegInfo();
1324 auto &DL = F.getDataLayout();
1325 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
1326 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1327
1328 // Arm64EC has extra requirements for varargs calls; bail out for now.
1329 //
1330 // Arm64EC has special mangling rules for calls; bail out on all calls for
1331 // now.
1332 if (Subtarget.isWindowsArm64EC())
1333 return false;
1334
1335 // Arm64EC thunks have a special calling convention which is only implemented
1336 // in SelectionDAG; bail out for now.
1337 if (Info.CallConv == CallingConv::ARM64EC_Thunk_Native ||
1338 Info.CallConv == CallingConv::ARM64EC_Thunk_X64)
1339 return false;
1340
1341 SmallVector<ArgInfo, 8> OutArgs;
1342 for (auto &OrigArg : Info.OrigArgs) {
1343 splitToValueTypes(OrigArgInfo: OrigArg, SplitArgs&: OutArgs, DL, CallConv: Info.CallConv);
1344 // AAPCS requires that we zero-extend i1 to 8 bits by the caller.
1345 auto &Flags = OrigArg.Flags[0];
1346 if (OrigArg.Ty->isIntegerTy(BitWidth: 1) && !Flags.isSExt() && !Flags.isZExt()) {
1347 ArgInfo &OutArg = OutArgs.back();
1348 assert(OutArg.Regs.size() == 1 &&
1349 MRI.getType(OutArg.Regs[0]).getSizeInBits() == 1 &&
1350 "Unexpected registers used for i1 arg");
1351
1352 // We cannot use a ZExt ArgInfo flag here, because it will
1353 // zero-extend the argument to i32 instead of just i8.
1354 OutArg.Regs[0] =
1355 MIRBuilder.buildZExt(Res: LLT::integer(SizeInBits: 8), Op: OutArg.Regs[0]).getReg(Idx: 0);
1356 LLVMContext &Ctx = MF.getFunction().getContext();
1357 OutArg.Ty = Type::getInt8Ty(C&: Ctx);
1358 }
1359 }
1360
1361 SmallVector<ArgInfo, 8> InArgs;
1362 if (!Info.OrigRet.Ty->isVoidTy())
1363 splitToValueTypes(OrigArgInfo: Info.OrigRet, SplitArgs&: InArgs, DL, CallConv: Info.CallConv);
1364
1365 // If we can lower as a tail call, do that instead.
1366 bool CanTailCallOpt =
1367 isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
1368
1369 // We must emit a tail call if we have musttail.
1370 if (Info.IsMustTailCall && !CanTailCallOpt) {
1371 // There are types of incoming/outgoing arguments we can't handle yet, so
1372 // it doesn't make sense to actually die here like in ISelLowering. Instead,
1373 // fall back to SelectionDAG and let it try to handle this.
1374 LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
1375 return false;
1376 }
1377
1378 Info.IsTailCall = CanTailCallOpt;
1379 if (CanTailCallOpt)
1380 return lowerTailCall(MIRBuilder, Info, OutArgs);
1381
1382 // Find out which ABI gets to decide where things go.
1383 CCAssignFn *AssignFnFixed;
1384 CCAssignFn *AssignFnVarArg;
1385 std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) =
1386 getAssignFnsForCC(CC: Info.CallConv, TLI);
1387
1388 MachineInstrBuilder CallSeqStart;
1389 CallSeqStart = MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKDOWN);
1390
1391 // Create a temporarily-floating call instruction so we can add the implicit
1392 // uses of arg registers.
1393
1394 unsigned Opc = 0;
1395 // Calls with operand bundle "clang.arc.attachedcall" are special. They should
1396 // be expanded to the call, directly followed by a special marker sequence and
1397 // a call to an ObjC library function.
1398 if (Info.CB && objcarc::hasAttachedCallOpBundle(CB: Info.CB))
1399 Opc = Info.PAI ? AArch64::BLRA_RVMARKER : AArch64::BLR_RVMARKER;
1400 // A call to a returns twice function like setjmp must be followed by a bti
1401 // instruction.
1402 else if (Info.CB && Info.CB->hasFnAttr(Kind: Attribute::ReturnsTwice) &&
1403 !Subtarget.noBTIAtReturnTwice() &&
1404 MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
1405 Opc = AArch64::BLR_BTI;
1406 else {
1407 // For an intrinsic call (e.g. memset), use GOT if "RtLibUseGOT" (-fno-plt)
1408 // is set.
1409 if (Info.Callee.isSymbol() && F.getParent()->getRtLibUseGOT()) {
1410 auto MIB = MIRBuilder.buildInstr(Opcode: TargetOpcode::G_GLOBAL_VALUE);
1411 DstOp(getLLTForType(Ty&: *F.getType(), DL)).addDefToMIB(MRI, MIB);
1412 MIB.addExternalSymbol(FnName: Info.Callee.getSymbolName(), TargetFlags: AArch64II::MO_GOT);
1413 Info.Callee = MachineOperand::CreateReg(Reg: MIB.getReg(Idx: 0), isDef: false);
1414 }
1415 Opc = getCallOpcode(CallerF: MF, IsIndirect: Info.Callee.isReg(), IsTailCall: false, PAI&: Info.PAI, MRI);
1416 }
1417
1418 auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: Opc);
1419 unsigned CalleeOpNo = 0;
1420
1421 if (Opc == AArch64::BLR_RVMARKER || Opc == AArch64::BLRA_RVMARKER) {
1422 // Add a target global address for the retainRV/claimRV runtime function
1423 // just before the call target.
1424 Function *ARCFn = *objcarc::getAttachedARCFunction(CB: Info.CB);
1425 MIB.addGlobalAddress(GV: ARCFn);
1426 ++CalleeOpNo;
1427
1428 // We may or may not need to emit both the marker and the retain/claim call.
1429 // Tell the pseudo expansion using an additional boolean op.
1430 MIB.addImm(Val: objcarc::attachedCallOpBundleNeedsMarker(CB: Info.CB));
1431 ++CalleeOpNo;
1432 } else if (Info.CFIType) {
1433 MIB->setCFIType(MF, Type: Info.CFIType->getZExtValue());
1434 }
1435 MIB->setDeactivationSymbol(MF, DS: Info.DeactivationSymbol);
1436
1437 MIB.add(MO: Info.Callee);
1438
1439 // Tell the call which registers are clobbered.
1440 const uint32_t *Mask;
1441 const auto *TRI = Subtarget.getRegisterInfo();
1442
1443 AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
1444 Subtarget, /*IsReturn*/ false);
1445 // Do the actual argument marshalling.
1446 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, /*IsReturn*/ false);
1447 if (!determineAndHandleAssignments(Handler, Assigner, Args&: OutArgs, MIRBuilder,
1448 CallConv: Info.CallConv, IsVarArg: Info.IsVarArg))
1449 return false;
1450
1451 Mask = getMaskForArgs(OutArgs, Info, TRI: *TRI, MF);
1452
1453 if (Opc == AArch64::BLRA || Opc == AArch64::BLRA_RVMARKER) {
1454 assert((Info.PAI->Key == AArch64PACKey::IA ||
1455 Info.PAI->Key == AArch64PACKey::IB) &&
1456 "Invalid auth call key");
1457 MIB.addImm(Val: Info.PAI->Key);
1458
1459 Register AddrDisc = 0;
1460 uint16_t IntDisc = 0;
1461 std::tie(args&: IntDisc, args&: AddrDisc) =
1462 extractPtrauthBlendDiscriminators(Disc: Info.PAI->Discriminator, MRI);
1463
1464 MIB.addImm(Val: IntDisc);
1465 MIB.addUse(RegNo: AddrDisc);
1466 if (AddrDisc != AArch64::NoRegister) {
1467 constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(),
1468 RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB,
1469 II: MIB->getDesc(), RegMO&: MIB->getOperand(i: CalleeOpNo + 3),
1470 OpIdx: CalleeOpNo + 3);
1471 }
1472 }
1473
1474 // Tell the call which registers are clobbered.
1475 if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
1476 TRI->UpdateCustomCallPreservedMask(MF, Mask: &Mask);
1477 MIB.addRegMask(Mask);
1478
1479 if (TRI->isAnyArgRegReserved(MF))
1480 TRI->emitReservedArgRegCallError(MF);
1481
1482 // Now we can add the actual call instruction to the correct basic block.
1483 MIRBuilder.insertInstr(MIB);
1484
1485 uint64_t CalleePopBytes =
1486 doesCalleeRestoreStack(CallConv: Info.CallConv,
1487 TailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt)
1488 ? alignTo(Value: Assigner.StackSize, Align: 16)
1489 : 0;
1490
1491 CallSeqStart.addImm(Val: Assigner.StackSize).addImm(Val: 0);
1492 MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKUP)
1493 .addImm(Val: Assigner.StackSize)
1494 .addImm(Val: CalleePopBytes);
1495
1496 // If Callee is a reg, since it is used by a target specific
1497 // instruction, it must have a register class matching the
1498 // constraint of that instruction.
1499 if (MIB->getOperand(i: CalleeOpNo).isReg())
1500 constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *Subtarget.getInstrInfo(),
1501 RBI: *Subtarget.getRegBankInfo(), InsertPt&: *MIB, II: MIB->getDesc(),
1502 RegMO&: MIB->getOperand(i: CalleeOpNo), OpIdx: CalleeOpNo);
1503
1504 // Finally we can copy the returned value back into its virtual-register. In
1505 // symmetry with the arguments, the physical register must be an
1506 // implicit-define of the call instruction.
1507 if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) {
1508 CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(CC: Info.CallConv);
1509 CallReturnHandler Handler(MIRBuilder, MRI, MIB);
1510 bool UsingReturnedArg =
1511 !OutArgs.empty() && OutArgs[0].Flags[0].isReturned();
1512
1513 AArch64OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn, Subtarget,
1514 /*IsReturn*/ false);
1515 ReturnedArgCallReturnHandler ReturnedArgHandler(MIRBuilder, MRI, MIB);
1516 if (!determineAndHandleAssignments(
1517 Handler&: UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, Args&: InArgs,
1518 MIRBuilder, CallConv: Info.CallConv, IsVarArg: Info.IsVarArg,
1519 ThisReturnRegs: UsingReturnedArg ? ArrayRef(OutArgs[0].Regs)
1520 : ArrayRef<Register>()))
1521 return false;
1522 }
1523
1524 if (Info.SwiftErrorVReg) {
1525 MIB.addDef(RegNo: AArch64::X21, Flags: RegState::Implicit);
1526 MIRBuilder.buildCopy(Res: Info.SwiftErrorVReg, Op: Register(AArch64::X21));
1527 }
1528
1529 if (!Info.CanLowerReturn) {
1530 insertSRetLoads(MIRBuilder, RetTy: Info.OrigRet.Ty, VRegs: Info.OrigRet.Regs,
1531 DemoteReg: Info.DemoteRegister, FI: Info.DemoteStackIndex);
1532 }
1533 return true;
1534}
1535
1536bool AArch64CallLowering::isTypeIsValidForThisReturn(EVT Ty) const {
1537 return Ty.getSizeInBits() == 64;
1538}
1539