1//===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the lowering of LLVM calls to machine code calls for
11/// GlobalISel.
12///
13//===----------------------------------------------------------------------===//
14
15#include "AArch64CallLowering.h"
16#include "AArch64GlobalISelUtils.h"
17#include "AArch64ISelLowering.h"
18#include "AArch64MachineFunctionInfo.h"
19#include "AArch64RegisterInfo.h"
20#include "AArch64Subtarget.h"
21#include "Utils/AArch64SMEAttributes.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/Analysis/ObjCARCUtil.h"
25#include "llvm/CodeGen/Analysis.h"
26#include "llvm/CodeGen/CallingConvLower.h"
27#include "llvm/CodeGen/FunctionLoweringInfo.h"
28#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
29#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
30#include "llvm/CodeGen/GlobalISel/Utils.h"
31#include "llvm/CodeGen/LowLevelTypeUtils.h"
32#include "llvm/CodeGen/MachineBasicBlock.h"
33#include "llvm/CodeGen/MachineFrameInfo.h"
34#include "llvm/CodeGen/MachineFunction.h"
35#include "llvm/CodeGen/MachineInstrBuilder.h"
36#include "llvm/CodeGen/MachineMemOperand.h"
37#include "llvm/CodeGen/MachineOperand.h"
38#include "llvm/CodeGen/MachineRegisterInfo.h"
39#include "llvm/CodeGen/TargetOpcodes.h"
40#include "llvm/CodeGen/TargetRegisterInfo.h"
41#include "llvm/CodeGen/TargetSubtargetInfo.h"
42#include "llvm/CodeGen/ValueTypes.h"
43#include "llvm/CodeGenTypes/MachineValueType.h"
44#include "llvm/IR/Argument.h"
45#include "llvm/IR/Attributes.h"
46#include "llvm/IR/Function.h"
47#include "llvm/IR/Type.h"
48#include "llvm/IR/Value.h"
49#include <algorithm>
50#include <cassert>
51#include <cstdint>
52
53#define DEBUG_TYPE "aarch64-call-lowering"
54
55using namespace llvm;
56using namespace AArch64GISelUtils;
57
58extern cl::opt<bool> EnableSVEGISel;
59
60AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
61 : CallLowering(&TLI) {}
62
63static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT,
64 MVT &LocVT) {
65 // If ValVT is i1/i8/i16, we should set LocVT to i8/i8/i16. This is a legacy
66 // hack because the DAG calls the assignment function with pre-legalized
67 // register typed values, not the raw type.
68 //
69 // This hack is not applied to return values which are not passed on the
70 // stack.
71 if (OrigVT == MVT::i1 || OrigVT == MVT::i8)
72 ValVT = LocVT = MVT::i8;
73 else if (OrigVT == MVT::i16)
74 ValVT = LocVT = MVT::i16;
75}
76
77// Account for i1/i8/i16 stack passed value hack
78static LLT getStackValueStoreTypeHack(const CCValAssign &VA) {
79 const MVT ValVT = VA.getValVT();
80 return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT)
81 : LLT(VA.getLocVT());
82}
83
84namespace {
85
86struct AArch64IncomingValueAssigner
87 : public CallLowering::IncomingValueAssigner {
88 AArch64IncomingValueAssigner(CCAssignFn *AssignFn_,
89 CCAssignFn *AssignFnVarArg_)
90 : IncomingValueAssigner(AssignFn_, AssignFnVarArg_) {}
91
92 bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
93 CCValAssign::LocInfo LocInfo,
94 const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
95 CCState &State) override {
96 applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
97 return IncomingValueAssigner::assignArg(ValNo, OrigVT, ValVT, LocVT,
98 LocInfo, Info, Flags, State);
99 }
100};
101
102struct AArch64OutgoingValueAssigner
103 : public CallLowering::OutgoingValueAssigner {
104 const AArch64Subtarget &Subtarget;
105
106 /// Track if this is used for a return instead of function argument
107 /// passing. We apply a hack to i1/i8/i16 stack passed values, but do not use
108 /// stack passed returns for them and cannot apply the type adjustment.
109 bool IsReturn;
110
111 AArch64OutgoingValueAssigner(CCAssignFn *AssignFn_,
112 CCAssignFn *AssignFnVarArg_,
113 const AArch64Subtarget &Subtarget_,
114 bool IsReturn)
115 : OutgoingValueAssigner(AssignFn_, AssignFnVarArg_),
116 Subtarget(Subtarget_), IsReturn(IsReturn) {}
117
118 bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
119 CCValAssign::LocInfo LocInfo,
120 const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
121 CCState &State) override {
122 const Function &F = State.getMachineFunction().getFunction();
123 bool IsCalleeWin =
124 Subtarget.isCallingConvWin64(CC: State.getCallingConv(), IsVarArg: F.isVarArg());
125 bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg();
126
127 bool Res;
128 if (Info.IsFixed && !UseVarArgsCCForFixed) {
129 if (!IsReturn)
130 applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
131 Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
132 } else
133 Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State);
134
135 StackSize = State.getStackSize();
136 return Res;
137 }
138};
139
140struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
141 IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
142 : IncomingValueHandler(MIRBuilder, MRI) {}
143
144 Register getStackAddress(uint64_t Size, int64_t Offset,
145 MachinePointerInfo &MPO,
146 ISD::ArgFlagsTy Flags) override {
147 auto &MFI = MIRBuilder.getMF().getFrameInfo();
148
149 // Byval is assumed to be writable memory, but other stack passed arguments
150 // are not.
151 const bool IsImmutable = !Flags.isByVal();
152
153 int FI = MFI.CreateFixedObject(Size, SPOffset: Offset, IsImmutable);
154 MPO = MachinePointerInfo::getFixedStack(MF&: MIRBuilder.getMF(), FI);
155 auto AddrReg = MIRBuilder.buildFrameIndex(Res: LLT::pointer(AddressSpace: 0, SizeInBits: 64), Idx: FI);
156 return AddrReg.getReg(Idx: 0);
157 }
158
159 LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
160 ISD::ArgFlagsTy Flags) const override {
161 // For pointers, we just need to fixup the integer types reported in the
162 // CCValAssign.
163 if (Flags.isPointer())
164 return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
165 return getStackValueStoreTypeHack(VA);
166 }
167
168 void assignValueToReg(Register ValVReg, Register PhysReg,
169 const CCValAssign &VA) override {
170 markRegUsed(Reg: PhysReg);
171 IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
172 }
173
174 void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
175 const MachinePointerInfo &MPO,
176 const CCValAssign &VA) override {
177 MachineFunction &MF = MIRBuilder.getMF();
178
179 LLT ValTy(VA.getValVT());
180 LLT LocTy(VA.getLocVT());
181
182 // Fixup the types for the DAG compatibility hack.
183 if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16)
184 std::swap(a&: ValTy, b&: LocTy);
185 else {
186 // The calling code knows if this is a pointer or not, we're only touching
187 // the LocTy for the i8/i16 hack.
188 assert(LocTy.getSizeInBits() == MemTy.getSizeInBits());
189 LocTy = MemTy;
190 }
191
192 auto MMO = MF.getMachineMemOperand(
193 PtrInfo: MPO, f: MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, MemTy: LocTy,
194 base_alignment: inferAlignFromPtrInfo(MF, MPO));
195
196 switch (VA.getLocInfo()) {
197 case CCValAssign::LocInfo::ZExt:
198 MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_ZEXTLOAD, Res: ValVReg, Addr, MMO&: *MMO);
199 return;
200 case CCValAssign::LocInfo::SExt:
201 MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_SEXTLOAD, Res: ValVReg, Addr, MMO&: *MMO);
202 return;
203 default:
204 MIRBuilder.buildLoad(Res: ValVReg, Addr, MMO&: *MMO);
205 return;
206 }
207 }
208
209 /// How the physical register gets marked varies between formal
210 /// parameters (it's a basic-block live-in), and a call instruction
211 /// (it's an implicit-def of the BL).
212 virtual void markRegUsed(Register Reg) = 0;
213};
214
215struct FormalArgHandler : public IncomingArgHandler {
216 FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
217 : IncomingArgHandler(MIRBuilder, MRI) {}
218
219 void markRegUsed(Register Reg) override {
220 MIRBuilder.getMRI()->addLiveIn(Reg: Reg.asMCReg());
221 MIRBuilder.getMBB().addLiveIn(PhysReg: Reg.asMCReg());
222 }
223};
224
225struct CallReturnHandler : public IncomingArgHandler {
226 CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
227 MachineInstrBuilder MIB)
228 : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
229
230 void markRegUsed(Register Reg) override {
231 MIB.addDef(RegNo: Reg, Flags: RegState::Implicit);
232 }
233
234 MachineInstrBuilder MIB;
235};
236
237/// A special return arg handler for "returned" attribute arg calls.
238struct ReturnedArgCallReturnHandler : public CallReturnHandler {
239 ReturnedArgCallReturnHandler(MachineIRBuilder &MIRBuilder,
240 MachineRegisterInfo &MRI,
241 MachineInstrBuilder MIB)
242 : CallReturnHandler(MIRBuilder, MRI, MIB) {}
243
244 void markRegUsed(Register Reg) override {}
245};
246
247struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
248 OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
249 MachineInstrBuilder MIB, bool IsTailCall = false,
250 int FPDiff = 0)
251 : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), IsTailCall(IsTailCall),
252 FPDiff(FPDiff),
253 Subtarget(MIRBuilder.getMF().getSubtarget<AArch64Subtarget>()) {}
254
255 Register getStackAddress(uint64_t Size, int64_t Offset,
256 MachinePointerInfo &MPO,
257 ISD::ArgFlagsTy Flags) override {
258 MachineFunction &MF = MIRBuilder.getMF();
259 LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64);
260 LLT s64 = LLT::scalar(SizeInBits: 64);
261
262 if (IsTailCall) {
263 assert(!Flags.isByVal() && "byval unhandled with tail calls");
264
265 Offset += FPDiff;
266 int FI = MF.getFrameInfo().CreateFixedObject(Size, SPOffset: Offset, IsImmutable: true);
267 auto FIReg = MIRBuilder.buildFrameIndex(Res: p0, Idx: FI);
268 MPO = MachinePointerInfo::getFixedStack(MF, FI);
269 return FIReg.getReg(Idx: 0);
270 }
271
272 if (!SPReg)
273 SPReg = MIRBuilder.buildCopy(Res: p0, Op: Register(AArch64::SP)).getReg(Idx: 0);
274
275 auto OffsetReg = MIRBuilder.buildConstant(Res: s64, Val: Offset);
276
277 auto AddrReg = MIRBuilder.buildPtrAdd(Res: p0, Op0: SPReg, Op1: OffsetReg);
278
279 MPO = MachinePointerInfo::getStack(MF, Offset);
280 return AddrReg.getReg(Idx: 0);
281 }
282
283 /// We need to fixup the reported store size for certain value types because
284 /// we invert the interpretation of ValVT and LocVT in certain cases. This is
285 /// for compatibility with the DAG call lowering implementation, which we're
286 /// currently building on top of.
287 LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
288 ISD::ArgFlagsTy Flags) const override {
289 if (Flags.isPointer())
290 return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
291 return getStackValueStoreTypeHack(VA);
292 }
293
294 void assignValueToReg(Register ValVReg, Register PhysReg,
295 const CCValAssign &VA) override {
296 MIB.addUse(RegNo: PhysReg, Flags: RegState::Implicit);
297 Register ExtReg = extendRegister(ValReg: ValVReg, VA);
298 MIRBuilder.buildCopy(Res: PhysReg, Op: ExtReg);
299 }
300
301 /// Check whether a stack argument requires lowering in a tail call.
302 static bool shouldLowerTailCallStackArg(const MachineFunction &MF,
303 const CCValAssign &VA,
304 Register ValVReg,
305 Register StoreAddr) {
306 const MachineRegisterInfo &MRI = MF.getRegInfo();
307 // Print the defining instruction for the value.
308 auto *DefMI = MRI.getVRegDef(Reg: ValVReg);
309 assert(DefMI && "No defining instruction");
310 for (;;) {
311 // Look through nodes that don't alter the bits of the incoming value.
312 unsigned Op = DefMI->getOpcode();
313 if (Op == TargetOpcode::G_ZEXT || Op == TargetOpcode::G_ANYEXT ||
314 Op == TargetOpcode::G_BITCAST || isAssertMI(MI: *DefMI)) {
315 DefMI = MRI.getVRegDef(Reg: DefMI->getOperand(i: 1).getReg());
316 continue;
317 }
318 break;
319 }
320
321 auto *Load = dyn_cast<GLoad>(Val: DefMI);
322 if (!Load)
323 return true;
324 Register LoadReg = Load->getPointerReg();
325 auto *LoadAddrDef = MRI.getVRegDef(Reg: LoadReg);
326 if (LoadAddrDef->getOpcode() != TargetOpcode::G_FRAME_INDEX)
327 return true;
328 const MachineFrameInfo &MFI = MF.getFrameInfo();
329 int LoadFI = LoadAddrDef->getOperand(i: 1).getIndex();
330
331 auto *StoreAddrDef = MRI.getVRegDef(Reg: StoreAddr);
332 if (StoreAddrDef->getOpcode() != TargetOpcode::G_FRAME_INDEX)
333 return true;
334 int StoreFI = StoreAddrDef->getOperand(i: 1).getIndex();
335
336 if (!MFI.isImmutableObjectIndex(ObjectIdx: LoadFI))
337 return true;
338 if (MFI.getObjectOffset(ObjectIdx: LoadFI) != MFI.getObjectOffset(ObjectIdx: StoreFI))
339 return true;
340 if (Load->getMemSize() != MFI.getObjectSize(ObjectIdx: StoreFI))
341 return true;
342
343 return false;
344 }
345
346 void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
347 const MachinePointerInfo &MPO,
348 const CCValAssign &VA) override {
349 MachineFunction &MF = MIRBuilder.getMF();
350 if (!FPDiff && !shouldLowerTailCallStackArg(MF, VA, ValVReg, StoreAddr: Addr))
351 return;
352 auto MMO = MF.getMachineMemOperand(PtrInfo: MPO, f: MachineMemOperand::MOStore, MemTy,
353 base_alignment: inferAlignFromPtrInfo(MF, MPO));
354 MIRBuilder.buildStore(Val: ValVReg, Addr, MMO&: *MMO);
355 }
356
357 void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex,
358 Register Addr, LLT MemTy,
359 const MachinePointerInfo &MPO,
360 const CCValAssign &VA) override {
361 unsigned MaxSize = MemTy.getSizeInBytes() * 8;
362 // For varargs, we always want to extend them to 8 bytes, in which case
363 // we disable setting a max.
364 if (!Arg.IsFixed)
365 MaxSize = 0;
366
367 Register ValVReg = Arg.Regs[RegIndex];
368 if (VA.getLocInfo() != CCValAssign::LocInfo::FPExt) {
369 MVT LocVT = VA.getLocVT();
370 MVT ValVT = VA.getValVT();
371
372 if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) {
373 std::swap(a&: ValVT, b&: LocVT);
374 MemTy = LLT(VA.getValVT());
375 }
376
377 ValVReg = extendRegister(ValReg: ValVReg, VA, MaxSizeBits: MaxSize);
378 } else {
379 // The store does not cover the full allocated stack slot.
380 MemTy = LLT(VA.getValVT());
381 }
382
383 assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);
384 }
385
386 MachineInstrBuilder MIB;
387
388 bool IsTailCall;
389
390 /// For tail calls, the byte offset of the call's argument area from the
391 /// callee's. Unused elsewhere.
392 int FPDiff;
393
394 // Cache the SP register vreg if we need it more than once in this call site.
395 Register SPReg;
396
397 const AArch64Subtarget &Subtarget;
398};
399} // namespace
400
401static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
402 return (CallConv == CallingConv::Fast && TailCallOpt) ||
403 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
404}
405
406bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
407 const Value *Val,
408 ArrayRef<Register> VRegs,
409 FunctionLoweringInfo &FLI,
410 Register SwiftErrorVReg) const {
411 auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: AArch64::RET_ReallyLR);
412 assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
413 "Return value without a vreg");
414
415 bool Success = true;
416 if (!FLI.CanLowerReturn) {
417 insertSRetStores(MIRBuilder, RetTy: Val->getType(), VRegs, DemoteReg: FLI.DemoteRegister);
418 } else if (!VRegs.empty()) {
419 MachineFunction &MF = MIRBuilder.getMF();
420 const Function &F = MF.getFunction();
421 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
422
423 MachineRegisterInfo &MRI = MF.getRegInfo();
424 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
425 CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC: F.getCallingConv());
426 auto &DL = F.getDataLayout();
427 LLVMContext &Ctx = Val->getType()->getContext();
428
429 SmallVector<EVT, 4> SplitEVTs;
430 ComputeValueVTs(TLI, DL, Ty: Val->getType(), ValueVTs&: SplitEVTs);
431 assert(VRegs.size() == SplitEVTs.size() &&
432 "For each split Type there should be exactly one VReg.");
433
434 SmallVector<ArgInfo, 8> SplitArgs;
435 CallingConv::ID CC = F.getCallingConv();
436
437 for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
438 Register CurVReg = VRegs[i];
439 ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Context&: Ctx), 0};
440 setArgFlags(Arg&: CurArgInfo, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: F);
441
442 // i1 is a special case because SDAG i1 true is naturally zero extended
443 // when widened using ANYEXT. We need to do it explicitly here.
444 auto &Flags = CurArgInfo.Flags[0];
445 if (MRI.getType(Reg: CurVReg).getSizeInBits() == TypeSize::getFixed(ExactSize: 1) &&
446 !Flags.isSExt() && !Flags.isZExt()) {
447 CurVReg = MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 8), Op: CurVReg).getReg(Idx: 0);
448 } else if (TLI.getNumRegistersForCallingConv(Context&: Ctx, CC, VT: SplitEVTs[i]) ==
449 1) {
450 // Some types will need extending as specified by the CC.
451 MVT NewVT = TLI.getRegisterTypeForCallingConv(Context&: Ctx, CC, VT: SplitEVTs[i]);
452 if (EVT(NewVT) != SplitEVTs[i]) {
453 unsigned ExtendOp = TargetOpcode::G_ANYEXT;
454 if (F.getAttributes().hasRetAttr(Kind: Attribute::SExt))
455 ExtendOp = TargetOpcode::G_SEXT;
456 else if (F.getAttributes().hasRetAttr(Kind: Attribute::ZExt))
457 ExtendOp = TargetOpcode::G_ZEXT;
458
459 LLT NewLLT(NewVT);
460 LLT OldLLT = getLLTForType(Ty&: *CurArgInfo.Ty, DL);
461 CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Context&: Ctx);
462 // Instead of an extend, we might have a vector type which needs
463 // padding with more elements, e.g. <2 x half> -> <4 x half>.
464 if (NewVT.isVector()) {
465 if (OldLLT.isVector()) {
466 if (NewLLT.getNumElements() > OldLLT.getNumElements()) {
467 CurVReg =
468 MIRBuilder.buildPadVectorWithUndefElements(Res: NewLLT, Op0: CurVReg)
469 .getReg(Idx: 0);
470 } else {
471 // Just do a vector extend.
472 CurVReg = MIRBuilder.buildInstr(Opc: ExtendOp, DstOps: {NewLLT}, SrcOps: {CurVReg})
473 .getReg(Idx: 0);
474 }
475 } else if (NewLLT.getNumElements() >= 2 &&
476 NewLLT.getNumElements() <= 8) {
477 // We need to pad a <1 x S> type to <2/4/8 x S>. Since we don't
478 // have <1 x S> vector types in GISel we use a build_vector
479 // instead of a vector merge/concat.
480 CurVReg =
481 MIRBuilder.buildPadVectorWithUndefElements(Res: NewLLT, Op0: CurVReg)
482 .getReg(Idx: 0);
483 } else {
484 LLVM_DEBUG(dbgs() << "Could not handle ret ty\n");
485 return false;
486 }
487 } else {
488 // If the split EVT was a <1 x T> vector, and NewVT is T, then we
489 // don't have to do anything since we don't distinguish between the
490 // two.
491 if (NewLLT != MRI.getType(Reg: CurVReg)) {
492 // A scalar extend.
493 CurVReg = MIRBuilder.buildInstr(Opc: ExtendOp, DstOps: {NewLLT}, SrcOps: {CurVReg})
494 .getReg(Idx: 0);
495 }
496 }
497 }
498 }
499 if (CurVReg != CurArgInfo.Regs[0]) {
500 CurArgInfo.Regs[0] = CurVReg;
501 // Reset the arg flags after modifying CurVReg.
502 setArgFlags(Arg&: CurArgInfo, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: F);
503 }
504 splitToValueTypes(OrigArgInfo: CurArgInfo, SplitArgs, DL, CallConv: CC);
505 }
506
507 AArch64OutgoingValueAssigner Assigner(AssignFn, AssignFn, Subtarget,
508 /*IsReturn*/ true);
509 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
510 Success = determineAndHandleAssignments(Handler, Assigner, Args&: SplitArgs,
511 MIRBuilder, CallConv: CC, IsVarArg: F.isVarArg());
512 }
513
514 if (SwiftErrorVReg) {
515 MIB.addUse(RegNo: AArch64::X21, Flags: RegState::Implicit);
516 MIRBuilder.buildCopy(Res: AArch64::X21, Op: SwiftErrorVReg);
517 }
518
519 MIRBuilder.insertInstr(MIB);
520 return Success;
521}
522
523bool AArch64CallLowering::canLowerReturn(MachineFunction &MF,
524 CallingConv::ID CallConv,
525 SmallVectorImpl<BaseArgInfo> &Outs,
526 bool IsVarArg) const {
527 SmallVector<CCValAssign, 16> ArgLocs;
528 const auto &TLI = *getTLI<AArch64TargetLowering>();
529 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
530 MF.getFunction().getContext());
531
532 return checkReturn(CCInfo, Outs, Fn: TLI.CCAssignFnForReturn(CC: CallConv));
533}
534
535/// Helper function to compute forwarded registers for musttail calls. Computes
536/// the forwarded registers, sets MBB liveness, and emits COPY instructions that
537/// can be used to save + restore registers later.
538static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
539 CCAssignFn *AssignFn) {
540 MachineBasicBlock &MBB = MIRBuilder.getMBB();
541 MachineFunction &MF = MIRBuilder.getMF();
542 MachineFrameInfo &MFI = MF.getFrameInfo();
543
544 if (!MFI.hasMustTailInVarArgFunc())
545 return;
546
547 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
548 const Function &F = MF.getFunction();
549 assert(F.isVarArg() && "Expected F to be vararg?");
550
551 // Compute the set of forwarded registers. The rest are scratch.
552 SmallVector<CCValAssign, 16> ArgLocs;
553 CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs,
554 F.getContext());
555 SmallVector<MVT, 2> RegParmTypes;
556 RegParmTypes.push_back(Elt: MVT::i64);
557 RegParmTypes.push_back(Elt: MVT::f128);
558
559 // Later on, we can use this vector to restore the registers if necessary.
560 SmallVectorImpl<ForwardedRegister> &Forwards =
561 FuncInfo->getForwardedMustTailRegParms();
562 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, Fn: AssignFn);
563
564 // Conservatively forward X8, since it might be used for an aggregate
565 // return.
566 if (!CCInfo.isAllocated(Reg: AArch64::X8)) {
567 Register X8VReg = MF.addLiveIn(PReg: AArch64::X8, RC: &AArch64::GPR64RegClass);
568 Forwards.push_back(Elt: ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
569 }
570
571 // Add the forwards to the MachineBasicBlock and MachineFunction.
572 for (const auto &F : Forwards) {
573 MBB.addLiveIn(PhysReg: F.PReg);
574 MIRBuilder.buildCopy(Res: Register(F.VReg), Op: Register(F.PReg));
575 }
576}
577
578bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
579 auto &F = MF.getFunction();
580 if (!EnableSVEGISel && (F.getReturnType()->isScalableTy() ||
581 llvm::any_of(Range: F.args(), P: [](const Argument &A) {
582 return A.getType()->isScalableTy();
583 })))
584 return true;
585 const auto &ST = MF.getSubtarget<AArch64Subtarget>();
586 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
587 LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n");
588 return true;
589 }
590
591 SMEAttrs Attrs = MF.getInfo<AArch64FunctionInfo>()->getSMEFnAttrs();
592 if (Attrs.hasZAState() || Attrs.hasZT0State() ||
593 Attrs.hasStreamingInterfaceOrBody() ||
594 Attrs.hasStreamingCompatibleInterface())
595 return true;
596
597 return false;
598}
599
600void AArch64CallLowering::saveVarArgRegisters(
601 MachineIRBuilder &MIRBuilder, CallLowering::IncomingValueHandler &Handler,
602 CCState &CCInfo) const {
603 auto GPRArgRegs = AArch64::getGPRArgRegs();
604 auto FPRArgRegs = AArch64::getFPRArgRegs();
605
606 MachineFunction &MF = MIRBuilder.getMF();
607 MachineRegisterInfo &MRI = MF.getRegInfo();
608 MachineFrameInfo &MFI = MF.getFrameInfo();
609 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
610 auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
611 bool IsWin64CC = Subtarget.isCallingConvWin64(CC: CCInfo.getCallingConv(),
612 IsVarArg: MF.getFunction().isVarArg());
613 const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64);
614 const LLT s64 = LLT::scalar(SizeInBits: 64);
615
616 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(Regs: GPRArgRegs);
617 unsigned NumVariadicGPRArgRegs = GPRArgRegs.size() - FirstVariadicGPR + 1;
618
619 unsigned GPRSaveSize = 8 * (GPRArgRegs.size() - FirstVariadicGPR);
620 int GPRIdx = 0;
621 if (GPRSaveSize != 0) {
622 if (IsWin64CC) {
623 GPRIdx = MFI.CreateFixedObject(Size: GPRSaveSize,
624 SPOffset: -static_cast<int>(GPRSaveSize), IsImmutable: false);
625 if (GPRSaveSize & 15)
626 // The extra size here, if triggered, will always be 8.
627 MFI.CreateFixedObject(Size: 16 - (GPRSaveSize & 15),
628 SPOffset: -static_cast<int>(alignTo(Value: GPRSaveSize, Align: 16)),
629 IsImmutable: false);
630 } else
631 GPRIdx = MFI.CreateStackObject(Size: GPRSaveSize, Alignment: Align(8), isSpillSlot: false);
632
633 auto FIN = MIRBuilder.buildFrameIndex(Res: p0, Idx: GPRIdx);
634 auto Offset =
635 MIRBuilder.buildConstant(Res: MRI.createGenericVirtualRegister(Ty: s64), Val: 8);
636
637 for (unsigned i = FirstVariadicGPR; i < GPRArgRegs.size(); ++i) {
638 Register Val = MRI.createGenericVirtualRegister(Ty: s64);
639 Handler.assignValueToReg(
640 ValVReg: Val, PhysReg: GPRArgRegs[i],
641 VA: CCValAssign::getReg(ValNo: i + MF.getFunction().getNumOperands(), ValVT: MVT::i64,
642 Reg: GPRArgRegs[i], LocVT: MVT::i64, HTP: CCValAssign::Full));
643 auto MPO = IsWin64CC ? MachinePointerInfo::getFixedStack(
644 MF, FI: GPRIdx, Offset: (i - FirstVariadicGPR) * 8)
645 : MachinePointerInfo::getStack(MF, Offset: i * 8);
646 MIRBuilder.buildStore(Val, Addr: FIN, PtrInfo: MPO, Alignment: inferAlignFromPtrInfo(MF, MPO));
647
648 FIN = MIRBuilder.buildPtrAdd(Res: MRI.createGenericVirtualRegister(Ty: p0),
649 Op0: FIN.getReg(Idx: 0), Op1: Offset);
650 }
651 }
652 FuncInfo->setVarArgsGPRIndex(GPRIdx);
653 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
654
655 if (Subtarget.hasFPARMv8() && !IsWin64CC) {
656 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(Regs: FPRArgRegs);
657
658 unsigned FPRSaveSize = 16 * (FPRArgRegs.size() - FirstVariadicFPR);
659 int FPRIdx = 0;
660 if (FPRSaveSize != 0) {
661 FPRIdx = MFI.CreateStackObject(Size: FPRSaveSize, Alignment: Align(16), isSpillSlot: false);
662
663 auto FIN = MIRBuilder.buildFrameIndex(Res: p0, Idx: FPRIdx);
664 auto Offset =
665 MIRBuilder.buildConstant(Res: MRI.createGenericVirtualRegister(Ty: s64), Val: 16);
666
667 for (unsigned i = FirstVariadicFPR; i < FPRArgRegs.size(); ++i) {
668 Register Val = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 128));
669 Handler.assignValueToReg(
670 ValVReg: Val, PhysReg: FPRArgRegs[i],
671 VA: CCValAssign::getReg(
672 ValNo: i + MF.getFunction().getNumOperands() + NumVariadicGPRArgRegs,
673 ValVT: MVT::f128, Reg: FPRArgRegs[i], LocVT: MVT::f128, HTP: CCValAssign::Full));
674
675 auto MPO = MachinePointerInfo::getStack(MF, Offset: i * 16);
676 MIRBuilder.buildStore(Val, Addr: FIN, PtrInfo: MPO, Alignment: inferAlignFromPtrInfo(MF, MPO));
677
678 FIN = MIRBuilder.buildPtrAdd(Res: MRI.createGenericVirtualRegister(Ty: p0),
679 Op0: FIN.getReg(Idx: 0), Op1: Offset);
680 }
681 }
682 FuncInfo->setVarArgsFPRIndex(FPRIdx);
683 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
684 }
685}
686
687bool AArch64CallLowering::lowerFormalArguments(
688 MachineIRBuilder &MIRBuilder, const Function &F,
689 ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const {
690 MachineFunction &MF = MIRBuilder.getMF();
691 MachineBasicBlock &MBB = MIRBuilder.getMBB();
692 MachineRegisterInfo &MRI = MF.getRegInfo();
693 auto &DL = F.getDataLayout();
694 auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
695
696 // Arm64EC has extra requirements for varargs calls which are only implemented
697 // in SelectionDAG; bail out for now.
698 if (F.isVarArg() && Subtarget.isWindowsArm64EC())
699 return false;
700
701 // Arm64EC thunks have a special calling convention which is only implemented
702 // in SelectionDAG; bail out for now.
703 if (F.getCallingConv() == CallingConv::ARM64EC_Thunk_Native ||
704 F.getCallingConv() == CallingConv::ARM64EC_Thunk_X64)
705 return false;
706
707 bool IsWin64 =
708 Subtarget.isCallingConvWin64(CC: F.getCallingConv(), IsVarArg: F.isVarArg()) &&
709 !Subtarget.isWindowsArm64EC();
710
711 SmallVector<ArgInfo, 8> SplitArgs;
712 SmallVector<std::pair<Register, Register>> BoolArgs;
713
714 // Insert the hidden sret parameter if the return value won't fit in the
715 // return registers.
716 if (!FLI.CanLowerReturn)
717 insertSRetIncomingArgument(F, SplitArgs, DemoteReg&: FLI.DemoteRegister, MRI, DL);
718
719 unsigned i = 0;
720 for (auto &Arg : F.args()) {
721 if (DL.getTypeStoreSize(Ty: Arg.getType()).isZero())
722 continue;
723
724 ArgInfo OrigArg{VRegs[i], Arg, i};
725 setArgFlags(Arg&: OrigArg, OpIdx: i + AttributeList::FirstArgIndex, DL, FuncInfo: F);
726
727 // i1 arguments are zero-extended to i8 by the caller. Emit a
728 // hint to reflect this.
729 if (OrigArg.Ty->isIntegerTy(Bitwidth: 1)) {
730 assert(OrigArg.Regs.size() == 1 &&
731 MRI.getType(OrigArg.Regs[0]).getSizeInBits() == 1 &&
732 "Unexpected registers used for i1 arg");
733
734 auto &Flags = OrigArg.Flags[0];
735 if (!Flags.isZExt() && !Flags.isSExt()) {
736 // Lower i1 argument as i8, and insert AssertZExt + Trunc later.
737 Register OrigReg = OrigArg.Regs[0];
738 Register WideReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 8));
739 OrigArg.Regs[0] = WideReg;
740 BoolArgs.push_back(Elt: {OrigReg, WideReg});
741 }
742 }
743
744 if (Arg.hasAttribute(Kind: Attribute::SwiftAsync))
745 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
746
747 splitToValueTypes(OrigArgInfo: OrigArg, SplitArgs, DL, CallConv: F.getCallingConv());
748 ++i;
749 }
750
751 if (!MBB.empty())
752 MIRBuilder.setInstr(*MBB.begin());
753
754 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
755 CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC: F.getCallingConv(), IsVarArg: IsWin64 && F.isVarArg());
756
757 AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn);
758 FormalArgHandler Handler(MIRBuilder, MRI);
759 SmallVector<CCValAssign, 16> ArgLocs;
760 CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
761 if (!determineAssignments(Assigner, Args&: SplitArgs, CCInfo) ||
762 !handleAssignments(Handler, Args&: SplitArgs, CCState&: CCInfo, ArgLocs, MIRBuilder))
763 return false;
764
765 if (!BoolArgs.empty()) {
766 for (auto &KV : BoolArgs) {
767 Register OrigReg = KV.first;
768 Register WideReg = KV.second;
769 LLT WideTy = MRI.getType(Reg: WideReg);
770 assert(MRI.getType(OrigReg).getScalarSizeInBits() == 1 &&
771 "Unexpected bit size of a bool arg");
772 MIRBuilder.buildTrunc(
773 Res: OrigReg, Op: MIRBuilder.buildAssertZExt(Res: WideTy, Op: WideReg, Size: 1).getReg(Idx: 0));
774 }
775 }
776
777 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
778 uint64_t StackSize = Assigner.StackSize;
779 if (F.isVarArg()) {
780 if ((!Subtarget.isTargetDarwin() && !Subtarget.isWindowsArm64EC()) || IsWin64) {
781 // The AAPCS variadic function ABI is identical to the non-variadic
782 // one. As a result there may be more arguments in registers and we should
783 // save them for future reference.
784 // Win64 variadic functions also pass arguments in registers, but all
785 // float arguments are passed in integer registers.
786 saveVarArgRegisters(MIRBuilder, Handler, CCInfo);
787 } else if (Subtarget.isWindowsArm64EC()) {
788 return false;
789 }
790
791 // We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
792 StackSize = alignTo(Value: Assigner.StackSize, Align: Subtarget.isTargetILP32() ? 4 : 8);
793
794 auto &MFI = MIRBuilder.getMF().getFrameInfo();
795 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(Size: 4, SPOffset: StackSize, IsImmutable: true));
796 }
797
798 if (doesCalleeRestoreStack(CallConv: F.getCallingConv(),
799 TailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt)) {
800 // We have a non-standard ABI, so why not make full use of the stack that
801 // we're going to pop? It must be aligned to 16 B in any case.
802 StackSize = alignTo(Value: StackSize, Align: 16);
803
804 // If we're expected to restore the stack (e.g. fastcc), then we'll be
805 // adding a multiple of 16.
806 FuncInfo->setArgumentStackToRestore(StackSize);
807
808 // Our own callers will guarantee that the space is free by giving an
809 // aligned value to CALLSEQ_START.
810 }
811
812 // When we tail call, we need to check if the callee's arguments
813 // will fit on the caller's stack. So, whenever we lower formal arguments,
814 // we should keep track of this information, since we might lower a tail call
815 // in this function later.
816 FuncInfo->setBytesInStackArgArea(StackSize);
817
818 if (Subtarget.hasCustomCallingConv())
819 Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
820
821 handleMustTailForwardedRegisters(MIRBuilder, AssignFn);
822
823 // Move back to the end of the basic block.
824 MIRBuilder.setMBB(MBB);
825
826 return true;
827}
828
829/// Return true if the calling convention is one that we can guarantee TCO for.
830static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
831 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
832 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
833}
834
835/// Return true if we might ever do TCO for calls with this calling convention.
836static bool mayTailCallThisCC(CallingConv::ID CC) {
837 switch (CC) {
838 case CallingConv::C:
839 case CallingConv::PreserveMost:
840 case CallingConv::PreserveAll:
841 case CallingConv::PreserveNone:
842 case CallingConv::Swift:
843 case CallingConv::SwiftTail:
844 case CallingConv::Tail:
845 case CallingConv::Fast:
846 return true;
847 default:
848 return false;
849 }
850}
851
852/// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
853/// CC.
854static std::pair<CCAssignFn *, CCAssignFn *>
855getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) {
856 return {TLI.CCAssignFnForCall(CC, IsVarArg: false), TLI.CCAssignFnForCall(CC, IsVarArg: true)};
857}
858
859bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay(
860 CallLoweringInfo &Info, MachineFunction &MF,
861 SmallVectorImpl<ArgInfo> &InArgs) const {
862 const Function &CallerF = MF.getFunction();
863 CallingConv::ID CalleeCC = Info.CallConv;
864 CallingConv::ID CallerCC = CallerF.getCallingConv();
865
866 // If the calling conventions match, then everything must be the same.
867 if (CalleeCC == CallerCC)
868 return true;
869
870 // Check if the caller and callee will handle arguments in the same way.
871 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
872 CCAssignFn *CalleeAssignFnFixed;
873 CCAssignFn *CalleeAssignFnVarArg;
874 std::tie(args&: CalleeAssignFnFixed, args&: CalleeAssignFnVarArg) =
875 getAssignFnsForCC(CC: CalleeCC, TLI);
876
877 CCAssignFn *CallerAssignFnFixed;
878 CCAssignFn *CallerAssignFnVarArg;
879 std::tie(args&: CallerAssignFnFixed, args&: CallerAssignFnVarArg) =
880 getAssignFnsForCC(CC: CallerCC, TLI);
881
882 AArch64IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed,
883 CalleeAssignFnVarArg);
884 AArch64IncomingValueAssigner CallerAssigner(CallerAssignFnFixed,
885 CallerAssignFnVarArg);
886
887 if (!resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner))
888 return false;
889
890 // Make sure that the caller and callee preserve all of the same registers.
891 auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
892 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
893 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
894 if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) {
895 TRI->UpdateCustomCallPreservedMask(MF, Mask: &CallerPreserved);
896 TRI->UpdateCustomCallPreservedMask(MF, Mask: &CalleePreserved);
897 }
898
899 return TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved);
900}
901
902bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
903 CallLoweringInfo &Info, MachineFunction &MF,
904 SmallVectorImpl<ArgInfo> &OrigOutArgs) const {
905 // If there are no outgoing arguments, then we are done.
906 if (OrigOutArgs.empty())
907 return true;
908
909 const Function &CallerF = MF.getFunction();
910 LLVMContext &Ctx = CallerF.getContext();
911 CallingConv::ID CalleeCC = Info.CallConv;
912 CallingConv::ID CallerCC = CallerF.getCallingConv();
913 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
914 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
915
916 CCAssignFn *AssignFnFixed;
917 CCAssignFn *AssignFnVarArg;
918 std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) = getAssignFnsForCC(CC: CalleeCC, TLI);
919
920 // We have outgoing arguments. Make sure that we can tail call with them.
921 SmallVector<CCValAssign, 16> OutLocs;
922 CCState OutInfo(CalleeCC, false, MF, OutLocs, Ctx);
923
924 AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
925 Subtarget, /*IsReturn*/ false);
926 // determineAssignments() may modify argument flags, so make a copy.
927 SmallVector<ArgInfo, 8> OutArgs;
928 append_range(C&: OutArgs, R&: OrigOutArgs);
929 if (!determineAssignments(Assigner&: CalleeAssigner, Args&: OutArgs, CCInfo&: OutInfo)) {
930 LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
931 return false;
932 }
933
934 // Make sure that they can fit on the caller's stack.
935 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
936 if (OutInfo.getStackSize() > FuncInfo->getBytesInStackArgArea()) {
937 LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
938 return false;
939 }
940
941 // Verify that the parameters in callee-saved registers match.
942 // TODO: Port this over to CallLowering as general code once swiftself is
943 // supported.
944 auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
945 const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
946 MachineRegisterInfo &MRI = MF.getRegInfo();
947
948 if (Info.IsVarArg) {
949 // Be conservative and disallow variadic memory operands to match SDAG's
950 // behaviour.
951 // FIXME: If the caller's calling convention is C, then we can
952 // potentially use its argument area. However, for cases like fastcc,
953 // we can't do anything.
954 for (unsigned i = 0; i < OutLocs.size(); ++i) {
955 auto &ArgLoc = OutLocs[i];
956 if (ArgLoc.isRegLoc())
957 continue;
958
959 LLVM_DEBUG(
960 dbgs()
961 << "... Cannot tail call vararg function with stack arguments\n");
962 return false;
963 }
964 }
965
966 return parametersInCSRMatch(MRI, CallerPreservedMask, ArgLocs: OutLocs, OutVals: OutArgs);
967}
968
969bool AArch64CallLowering::isEligibleForTailCallOptimization(
970 MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
971 SmallVectorImpl<ArgInfo> &InArgs,
972 SmallVectorImpl<ArgInfo> &OutArgs) const {
973
974 // Must pass all target-independent checks in order to tail call optimize.
975 if (!Info.IsTailCall)
976 return false;
977
978 CallingConv::ID CalleeCC = Info.CallConv;
979 MachineFunction &MF = MIRBuilder.getMF();
980 const Function &CallerF = MF.getFunction();
981
982 LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
983
984 if (Info.SwiftErrorVReg) {
985 // TODO: We should handle this.
986 // Note that this is also handled by the check for no outgoing arguments.
987 // Proactively disabling this though, because the swifterror handling in
988 // lowerCall inserts a COPY *after* the location of the call.
989 LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n");
990 return false;
991 }
992
993 if (!mayTailCallThisCC(CC: CalleeCC)) {
994 LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
995 return false;
996 }
997
998 // Byval parameters hand the function a pointer directly into the stack area
999 // we want to reuse during a tail call. Working around this *is* possible (see
1000 // X86).
1001 //
1002 // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
1003 // it?
1004 //
1005 // On Windows, "inreg" attributes signify non-aggregate indirect returns.
1006 // In this case, it is necessary to save/restore X0 in the callee. Tail
1007 // call opt interferes with this. So we disable tail call opt when the
1008 // caller has an argument with "inreg" attribute.
1009 //
1010 // FIXME: Check whether the callee also has an "inreg" argument.
1011 //
1012 // When the caller has a swifterror argument, we don't want to tail call
1013 // because would have to move into the swifterror register before the
1014 // tail call.
1015 if (any_of(Range: CallerF.args(), P: [](const Argument &A) {
1016 return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr();
1017 })) {
1018 LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, "
1019 "inreg, or swifterror arguments\n");
1020 return false;
1021 }
1022
1023 // Externally-defined functions with weak linkage should not be
1024 // tail-called on AArch64 when the OS does not support dynamic
1025 // pre-emption of symbols, as the AAELF spec requires normal calls
1026 // to undefined weak functions to be replaced with a NOP or jump to the
1027 // next instruction. The behaviour of branch instructions in this
1028 // situation (as used for tail calls) is implementation-defined, so we
1029 // cannot rely on the linker replacing the tail call with a return.
1030 if (Info.Callee.isGlobal()) {
1031 const GlobalValue *GV = Info.Callee.getGlobal();
1032 const Triple &TT = MF.getTarget().getTargetTriple();
1033 if (GV->hasExternalWeakLinkage() &&
1034 (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
1035 TT.isOSBinFormatMachO())) {
1036 LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
1037 "with weak linkage for this OS.\n");
1038 return false;
1039 }
1040 }
1041
1042 // If we have -tailcallopt, then we're done.
1043 if (canGuaranteeTCO(CC: CalleeCC, GuaranteeTailCalls: MF.getTarget().Options.GuaranteedTailCallOpt))
1044 return CalleeCC == CallerF.getCallingConv();
1045
1046 // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
1047 // Try to find cases where we can do that.
1048
1049 // I want anyone implementing a new calling convention to think long and hard
1050 // about this assert.
1051 assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
1052 "Unexpected variadic calling convention");
1053
1054 // Verify that the incoming and outgoing arguments from the callee are
1055 // safe to tail call.
1056 if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
1057 LLVM_DEBUG(
1058 dbgs()
1059 << "... Caller and callee have incompatible calling conventions.\n");
1060 return false;
1061 }
1062
1063 if (!areCalleeOutgoingArgsTailCallable(Info, MF, OrigOutArgs&: OutArgs))
1064 return false;
1065
1066 LLVM_DEBUG(
1067 dbgs() << "... Call is eligible for tail call optimization.\n");
1068 return true;
1069}
1070
1071static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
1072 bool IsTailCall,
1073 std::optional<CallLowering::PtrAuthInfo> &PAI,
1074 MachineRegisterInfo &MRI) {
1075 const AArch64FunctionInfo *FuncInfo = CallerF.getInfo<AArch64FunctionInfo>();
1076
1077 if (!IsTailCall) {
1078 if (!PAI)
1079 return IsIndirect ? getBLRCallOpcode(MF: CallerF) : (unsigned)AArch64::BL;
1080
1081 assert(IsIndirect && "Direct call should not be authenticated");
1082 assert((PAI->Key == AArch64PACKey::IA || PAI->Key == AArch64PACKey::IB) &&
1083 "Invalid auth call key");
1084 return AArch64::BLRA;
1085 }
1086
1087 if (!IsIndirect)
1088 return AArch64::TCRETURNdi;
1089
1090 // When BTI or PAuthLR are enabled, there are restrictions on using x16 and
1091 // x17 to hold the function pointer.
1092 if (FuncInfo->branchTargetEnforcement()) {
1093 if (FuncInfo->branchProtectionPAuthLR()) {
1094 assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR");
1095 return AArch64::TCRETURNrix17;
1096 }
1097 if (PAI)
1098 return AArch64::AUTH_TCRETURN_BTI;
1099 return AArch64::TCRETURNrix16x17;
1100 }
1101
1102 if (FuncInfo->branchProtectionPAuthLR()) {
1103 assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR");
1104 return AArch64::TCRETURNrinotx16;
1105 }
1106
1107 if (PAI)
1108 return AArch64::AUTH_TCRETURN;
1109 return AArch64::TCRETURNri;
1110}
1111
1112static const uint32_t *
1113getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> &OutArgs,
1114 AArch64CallLowering::CallLoweringInfo &Info,
1115 const AArch64RegisterInfo &TRI, MachineFunction &MF) {
1116 const uint32_t *Mask;
1117 if (!OutArgs.empty() && OutArgs[0].Flags[0].isReturned()) {
1118 // For 'this' returns, use the X0-preserving mask if applicable
1119 Mask = TRI.getThisReturnPreservedMask(MF, Info.CallConv);
1120 if (!Mask) {
1121 OutArgs[0].Flags[0].setReturned(false);
1122 Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
1123 }
1124 } else {
1125 Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
1126 }
1127 return Mask;
1128}
1129
1130bool AArch64CallLowering::lowerTailCall(
1131 MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
1132 SmallVectorImpl<ArgInfo> &OutArgs) const {
1133 MachineFunction &MF = MIRBuilder.getMF();
1134 const Function &F = MF.getFunction();
1135 MachineRegisterInfo &MRI = MF.getRegInfo();
1136 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
1137 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1138
1139 // True when we're tail calling, but without -tailcallopt.
1140 bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt &&
1141 Info.CallConv != CallingConv::Tail &&
1142 Info.CallConv != CallingConv::SwiftTail;
1143
1144 // Find out which ABI gets to decide where things go.
1145 CallingConv::ID CalleeCC = Info.CallConv;
1146 CCAssignFn *AssignFnFixed;
1147 CCAssignFn *AssignFnVarArg;
1148 std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) = getAssignFnsForCC(CC: CalleeCC, TLI);
1149
1150 MachineInstrBuilder CallSeqStart;
1151 if (!IsSibCall)
1152 CallSeqStart = MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKDOWN);
1153
1154 unsigned Opc = getCallOpcode(CallerF: MF, IsIndirect: Info.Callee.isReg(), IsTailCall: true, PAI&: Info.PAI, MRI);
1155 auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: Opc);
1156 MIB.add(MO: Info.Callee);
1157
1158 // Tell the call which registers are clobbered.
1159 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1160 auto TRI = Subtarget.getRegisterInfo();
1161
1162 // Byte offset for the tail call. When we are sibcalling, this will always
1163 // be 0.
1164 MIB.addImm(Val: 0);
1165
1166 // Authenticated tail calls always take key/discriminator arguments.
1167 if (Opc == AArch64::AUTH_TCRETURN || Opc == AArch64::AUTH_TCRETURN_BTI) {
1168 assert((Info.PAI->Key == AArch64PACKey::IA ||
1169 Info.PAI->Key == AArch64PACKey::IB) &&
1170 "Invalid auth call key");
1171 MIB.addImm(Val: Info.PAI->Key);
1172
1173 Register AddrDisc = 0;
1174 uint16_t IntDisc = 0;
1175 std::tie(args&: IntDisc, args&: AddrDisc) =
1176 extractPtrauthBlendDiscriminators(Disc: Info.PAI->Discriminator, MRI);
1177
1178 MIB.addImm(Val: IntDisc);
1179 MIB.addUse(RegNo: AddrDisc);
1180 if (AddrDisc != AArch64::NoRegister) {
1181 MIB->getOperand(i: 4).setReg(constrainOperandRegClass(
1182 MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(),
1183 RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB, II: MIB->getDesc(),
1184 RegMO&: MIB->getOperand(i: 4), OpIdx: 4));
1185 }
1186 }
1187
1188 // Tell the call which registers are clobbered.
1189 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
1190 if (Subtarget.hasCustomCallingConv())
1191 TRI->UpdateCustomCallPreservedMask(MF, Mask: &Mask);
1192 MIB.addRegMask(Mask);
1193
1194 if (Info.CFIType)
1195 MIB->setCFIType(MF, Type: Info.CFIType->getZExtValue());
1196
1197 if (TRI->isAnyArgRegReserved(MF))
1198 TRI->emitReservedArgRegCallError(MF);
1199
1200 // FPDiff is the byte offset of the call's argument area from the callee's.
1201 // Stores to callee stack arguments will be placed in FixedStackSlots offset
1202 // by this amount for a tail call. In a sibling call it must be 0 because the
1203 // caller will deallocate the entire stack and the callee still expects its
1204 // arguments to begin at SP+0.
1205 int FPDiff = 0;
1206
1207 // This will be 0 for sibcalls, potentially nonzero for tail calls produced
1208 // by -tailcallopt. For sibcalls, the memory operands for the call are
1209 // already available in the caller's incoming argument space.
1210 unsigned NumBytes = 0;
1211 if (!IsSibCall) {
1212 // We aren't sibcalling, so we need to compute FPDiff. We need to do this
1213 // before handling assignments, because FPDiff must be known for memory
1214 // arguments.
1215 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
1216 SmallVector<CCValAssign, 16> OutLocs;
1217 CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
1218
1219 AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
1220 Subtarget, /*IsReturn*/ false);
1221 if (!determineAssignments(Assigner&: CalleeAssigner, Args&: OutArgs, CCInfo&: OutInfo))
1222 return false;
1223
1224 // The callee will pop the argument stack as a tail call. Thus, we must
1225 // keep it 16-byte aligned.
1226 NumBytes = alignTo(Value: OutInfo.getStackSize(), Align: 16);
1227
1228 // FPDiff will be negative if this tail call requires more space than we
1229 // would automatically have in our incoming argument space. Positive if we
1230 // actually shrink the stack.
1231 FPDiff = NumReusableBytes - NumBytes;
1232
1233 // Update the required reserved area if this is the tail call requiring the
1234 // most argument stack space.
1235 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
1236 FuncInfo->setTailCallReservedStack(-FPDiff);
1237
1238 // The stack pointer must be 16-byte aligned at all times it's used for a
1239 // memory operation, which in practice means at *all* times and in
1240 // particular across call boundaries. Therefore our own arguments started at
1241 // a 16-byte aligned SP and the delta applied for the tail call should
1242 // satisfy the same constraint.
1243 assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
1244 }
1245
1246 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
1247
1248 AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
1249 Subtarget, /*IsReturn*/ false);
1250
1251 // Do the actual argument marshalling.
1252 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB,
1253 /*IsTailCall*/ true, FPDiff);
1254 if (!determineAndHandleAssignments(Handler, Assigner, Args&: OutArgs, MIRBuilder,
1255 CallConv: CalleeCC, IsVarArg: Info.IsVarArg))
1256 return false;
1257
1258 Mask = getMaskForArgs(OutArgs, Info, TRI: *TRI, MF);
1259
1260 if (Info.IsVarArg && Info.IsMustTailCall) {
1261 // Now we know what's being passed to the function. Add uses to the call for
1262 // the forwarded registers that we *aren't* passing as parameters. This will
1263 // preserve the copies we build earlier.
1264 for (const auto &F : Forwards) {
1265 Register ForwardedReg = F.PReg;
1266 // If the register is already passed, or aliases a register which is
1267 // already being passed, then skip it.
1268 if (any_of(Range: MIB->uses(), P: [&ForwardedReg, &TRI](const MachineOperand &Use) {
1269 if (!Use.isReg())
1270 return false;
1271 return TRI->regsOverlap(RegA: Use.getReg(), RegB: ForwardedReg);
1272 }))
1273 continue;
1274
1275 // We aren't passing it already, so we should add it to the call.
1276 MIRBuilder.buildCopy(Res: ForwardedReg, Op: Register(F.VReg));
1277 MIB.addReg(RegNo: ForwardedReg, flags: RegState::Implicit);
1278 }
1279 }
1280
1281 // If we have -tailcallopt, we need to adjust the stack. We'll do the call
1282 // sequence start and end here.
1283 if (!IsSibCall) {
1284 MIB->getOperand(i: 1).setImm(FPDiff);
1285 CallSeqStart.addImm(Val: 0).addImm(Val: 0);
1286 // End the call sequence *before* emitting the call. Normally, we would
1287 // tidy the frame up after the call. However, here, we've laid out the
1288 // parameters so that when SP is reset, they will be in the correct
1289 // location.
1290 MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKUP).addImm(Val: 0).addImm(Val: 0);
1291 }
1292
1293 // Now we can add the actual call instruction to the correct basic block.
1294 MIRBuilder.insertInstr(MIB);
1295
1296 // If Callee is a reg, since it is used by a target specific instruction,
1297 // it must have a register class matching the constraint of that instruction.
1298 if (MIB->getOperand(i: 0).isReg())
1299 constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(),
1300 RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB,
1301 II: MIB->getDesc(), RegMO&: MIB->getOperand(i: 0), OpIdx: 0);
1302
1303 MF.getFrameInfo().setHasTailCall();
1304 Info.LoweredTailCall = true;
1305 return true;
1306}
1307
1308bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
1309 CallLoweringInfo &Info) const {
1310 MachineFunction &MF = MIRBuilder.getMF();
1311 const Function &F = MF.getFunction();
1312 MachineRegisterInfo &MRI = MF.getRegInfo();
1313 auto &DL = F.getDataLayout();
1314 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
1315 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1316
1317 // Arm64EC has extra requirements for varargs calls; bail out for now.
1318 //
1319 // Arm64EC has special mangling rules for calls; bail out on all calls for
1320 // now.
1321 if (Subtarget.isWindowsArm64EC())
1322 return false;
1323
1324 // Arm64EC thunks have a special calling convention which is only implemented
1325 // in SelectionDAG; bail out for now.
1326 if (Info.CallConv == CallingConv::ARM64EC_Thunk_Native ||
1327 Info.CallConv == CallingConv::ARM64EC_Thunk_X64)
1328 return false;
1329
1330 SmallVector<ArgInfo, 8> OutArgs;
1331 for (auto &OrigArg : Info.OrigArgs) {
1332 splitToValueTypes(OrigArgInfo: OrigArg, SplitArgs&: OutArgs, DL, CallConv: Info.CallConv);
1333 // AAPCS requires that we zero-extend i1 to 8 bits by the caller.
1334 auto &Flags = OrigArg.Flags[0];
1335 if (OrigArg.Ty->isIntegerTy(Bitwidth: 1) && !Flags.isSExt() && !Flags.isZExt()) {
1336 ArgInfo &OutArg = OutArgs.back();
1337 assert(OutArg.Regs.size() == 1 &&
1338 MRI.getType(OutArg.Regs[0]).getSizeInBits() == 1 &&
1339 "Unexpected registers used for i1 arg");
1340
1341 // We cannot use a ZExt ArgInfo flag here, because it will
1342 // zero-extend the argument to i32 instead of just i8.
1343 OutArg.Regs[0] =
1344 MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 8), Op: OutArg.Regs[0]).getReg(Idx: 0);
1345 LLVMContext &Ctx = MF.getFunction().getContext();
1346 OutArg.Ty = Type::getInt8Ty(C&: Ctx);
1347 }
1348 }
1349
1350 SmallVector<ArgInfo, 8> InArgs;
1351 if (!Info.OrigRet.Ty->isVoidTy())
1352 splitToValueTypes(OrigArgInfo: Info.OrigRet, SplitArgs&: InArgs, DL, CallConv: Info.CallConv);
1353
1354 // If we can lower as a tail call, do that instead.
1355 bool CanTailCallOpt =
1356 isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
1357
1358 // We must emit a tail call if we have musttail.
1359 if (Info.IsMustTailCall && !CanTailCallOpt) {
1360 // There are types of incoming/outgoing arguments we can't handle yet, so
1361 // it doesn't make sense to actually die here like in ISelLowering. Instead,
1362 // fall back to SelectionDAG and let it try to handle this.
1363 LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
1364 return false;
1365 }
1366
1367 Info.IsTailCall = CanTailCallOpt;
1368 if (CanTailCallOpt)
1369 return lowerTailCall(MIRBuilder, Info, OutArgs);
1370
1371 // Find out which ABI gets to decide where things go.
1372 CCAssignFn *AssignFnFixed;
1373 CCAssignFn *AssignFnVarArg;
1374 std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) =
1375 getAssignFnsForCC(CC: Info.CallConv, TLI);
1376
1377 MachineInstrBuilder CallSeqStart;
1378 CallSeqStart = MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKDOWN);
1379
1380 // Create a temporarily-floating call instruction so we can add the implicit
1381 // uses of arg registers.
1382
1383 unsigned Opc = 0;
1384 // Calls with operand bundle "clang.arc.attachedcall" are special. They should
1385 // be expanded to the call, directly followed by a special marker sequence and
1386 // a call to an ObjC library function.
1387 if (Info.CB && objcarc::hasAttachedCallOpBundle(CB: Info.CB))
1388 Opc = Info.PAI ? AArch64::BLRA_RVMARKER : AArch64::BLR_RVMARKER;
1389 // A call to a returns twice function like setjmp must be followed by a bti
1390 // instruction.
1391 else if (Info.CB && Info.CB->hasFnAttr(Kind: Attribute::ReturnsTwice) &&
1392 !Subtarget.noBTIAtReturnTwice() &&
1393 MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
1394 Opc = AArch64::BLR_BTI;
1395 else {
1396 // For an intrinsic call (e.g. memset), use GOT if "RtLibUseGOT" (-fno-plt)
1397 // is set.
1398 if (Info.Callee.isSymbol() && F.getParent()->getRtLibUseGOT()) {
1399 auto MIB = MIRBuilder.buildInstr(Opcode: TargetOpcode::G_GLOBAL_VALUE);
1400 DstOp(getLLTForType(Ty&: *F.getType(), DL)).addDefToMIB(MRI, MIB);
1401 MIB.addExternalSymbol(FnName: Info.Callee.getSymbolName(), TargetFlags: AArch64II::MO_GOT);
1402 Info.Callee = MachineOperand::CreateReg(Reg: MIB.getReg(Idx: 0), isDef: false);
1403 }
1404 Opc = getCallOpcode(CallerF: MF, IsIndirect: Info.Callee.isReg(), IsTailCall: false, PAI&: Info.PAI, MRI);
1405 }
1406
1407 auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: Opc);
1408 unsigned CalleeOpNo = 0;
1409
1410 if (Opc == AArch64::BLR_RVMARKER || Opc == AArch64::BLRA_RVMARKER) {
1411 // Add a target global address for the retainRV/claimRV runtime function
1412 // just before the call target.
1413 Function *ARCFn = *objcarc::getAttachedARCFunction(CB: Info.CB);
1414 MIB.addGlobalAddress(GV: ARCFn);
1415 ++CalleeOpNo;
1416
1417 // We may or may not need to emit both the marker and the retain/claim call.
1418 // Tell the pseudo expansion using an additional boolean op.
1419 MIB.addImm(Val: objcarc::attachedCallOpBundleNeedsMarker(CB: Info.CB));
1420 ++CalleeOpNo;
1421 } else if (Info.CFIType) {
1422 MIB->setCFIType(MF, Type: Info.CFIType->getZExtValue());
1423 }
1424
1425 MIB.add(MO: Info.Callee);
1426
1427 // Tell the call which registers are clobbered.
1428 const uint32_t *Mask;
1429 const auto *TRI = Subtarget.getRegisterInfo();
1430
1431 AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
1432 Subtarget, /*IsReturn*/ false);
1433 // Do the actual argument marshalling.
1434 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, /*IsReturn*/ false);
1435 if (!determineAndHandleAssignments(Handler, Assigner, Args&: OutArgs, MIRBuilder,
1436 CallConv: Info.CallConv, IsVarArg: Info.IsVarArg))
1437 return false;
1438
1439 Mask = getMaskForArgs(OutArgs, Info, TRI: *TRI, MF);
1440
1441 if (Opc == AArch64::BLRA || Opc == AArch64::BLRA_RVMARKER) {
1442 assert((Info.PAI->Key == AArch64PACKey::IA ||
1443 Info.PAI->Key == AArch64PACKey::IB) &&
1444 "Invalid auth call key");
1445 MIB.addImm(Val: Info.PAI->Key);
1446
1447 Register AddrDisc = 0;
1448 uint16_t IntDisc = 0;
1449 std::tie(args&: IntDisc, args&: AddrDisc) =
1450 extractPtrauthBlendDiscriminators(Disc: Info.PAI->Discriminator, MRI);
1451
1452 MIB.addImm(Val: IntDisc);
1453 MIB.addUse(RegNo: AddrDisc);
1454 if (AddrDisc != AArch64::NoRegister) {
1455 constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(),
1456 RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB,
1457 II: MIB->getDesc(), RegMO&: MIB->getOperand(i: CalleeOpNo + 3),
1458 OpIdx: CalleeOpNo + 3);
1459 }
1460 }
1461
1462 // Tell the call which registers are clobbered.
1463 if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
1464 TRI->UpdateCustomCallPreservedMask(MF, Mask: &Mask);
1465 MIB.addRegMask(Mask);
1466
1467 if (TRI->isAnyArgRegReserved(MF))
1468 TRI->emitReservedArgRegCallError(MF);
1469
1470 // Now we can add the actual call instruction to the correct basic block.
1471 MIRBuilder.insertInstr(MIB);
1472
1473 uint64_t CalleePopBytes =
1474 doesCalleeRestoreStack(CallConv: Info.CallConv,
1475 TailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt)
1476 ? alignTo(Value: Assigner.StackSize, Align: 16)
1477 : 0;
1478
1479 CallSeqStart.addImm(Val: Assigner.StackSize).addImm(Val: 0);
1480 MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKUP)
1481 .addImm(Val: Assigner.StackSize)
1482 .addImm(Val: CalleePopBytes);
1483
1484 // If Callee is a reg, since it is used by a target specific
1485 // instruction, it must have a register class matching the
1486 // constraint of that instruction.
1487 if (MIB->getOperand(i: CalleeOpNo).isReg())
1488 constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *Subtarget.getInstrInfo(),
1489 RBI: *Subtarget.getRegBankInfo(), InsertPt&: *MIB, II: MIB->getDesc(),
1490 RegMO&: MIB->getOperand(i: CalleeOpNo), OpIdx: CalleeOpNo);
1491
1492 // Finally we can copy the returned value back into its virtual-register. In
1493 // symmetry with the arguments, the physical register must be an
1494 // implicit-define of the call instruction.
1495 if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) {
1496 CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(CC: Info.CallConv);
1497 CallReturnHandler Handler(MIRBuilder, MRI, MIB);
1498 bool UsingReturnedArg =
1499 !OutArgs.empty() && OutArgs[0].Flags[0].isReturned();
1500
1501 AArch64OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn, Subtarget,
1502 /*IsReturn*/ false);
1503 ReturnedArgCallReturnHandler ReturnedArgHandler(MIRBuilder, MRI, MIB);
1504 if (!determineAndHandleAssignments(
1505 Handler&: UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, Args&: InArgs,
1506 MIRBuilder, CallConv: Info.CallConv, IsVarArg: Info.IsVarArg,
1507 ThisReturnRegs: UsingReturnedArg ? ArrayRef(OutArgs[0].Regs)
1508 : ArrayRef<Register>()))
1509 return false;
1510 }
1511
1512 if (Info.SwiftErrorVReg) {
1513 MIB.addDef(RegNo: AArch64::X21, Flags: RegState::Implicit);
1514 MIRBuilder.buildCopy(Res: Info.SwiftErrorVReg, Op: Register(AArch64::X21));
1515 }
1516
1517 if (!Info.CanLowerReturn) {
1518 insertSRetLoads(MIRBuilder, RetTy: Info.OrigRet.Ty, VRegs: Info.OrigRet.Regs,
1519 DemoteReg: Info.DemoteRegister, FI: Info.DemoteStackIndex);
1520 }
1521 return true;
1522}
1523
1524bool AArch64CallLowering::isTypeIsValidForThisReturn(EVT Ty) const {
1525 return Ty.getSizeInBits() == 64;
1526}
1527