1//===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the lowering of LLVM calls to machine code calls for
11/// GlobalISel.
12///
13//===----------------------------------------------------------------------===//
14
15#include "AArch64CallLowering.h"
16#include "AArch64GlobalISelUtils.h"
17#include "AArch64ISelLowering.h"
18#include "AArch64MachineFunctionInfo.h"
19#include "AArch64RegisterInfo.h"
20#include "AArch64SMEAttributes.h"
21#include "AArch64Subtarget.h"
22#include "AArch64TargetMachine.h"
23#include "llvm/ADT/ArrayRef.h"
24#include "llvm/ADT/SmallVector.h"
25#include "llvm/Analysis/ObjCARCUtil.h"
26#include "llvm/CodeGen/Analysis.h"
27#include "llvm/CodeGen/CallingConvLower.h"
28#include "llvm/CodeGen/FunctionLoweringInfo.h"
29#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
30#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
31#include "llvm/CodeGen/GlobalISel/Utils.h"
32#include "llvm/CodeGen/LowLevelTypeUtils.h"
33#include "llvm/CodeGen/MachineBasicBlock.h"
34#include "llvm/CodeGen/MachineFrameInfo.h"
35#include "llvm/CodeGen/MachineFunction.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineMemOperand.h"
38#include "llvm/CodeGen/MachineOperand.h"
39#include "llvm/CodeGen/MachineRegisterInfo.h"
40#include "llvm/CodeGen/TargetOpcodes.h"
41#include "llvm/CodeGen/TargetRegisterInfo.h"
42#include "llvm/CodeGen/TargetSubtargetInfo.h"
43#include "llvm/CodeGen/ValueTypes.h"
44#include "llvm/CodeGenTypes/MachineValueType.h"
45#include "llvm/IR/Argument.h"
46#include "llvm/IR/Attributes.h"
47#include "llvm/IR/Function.h"
48#include "llvm/IR/Type.h"
49#include "llvm/IR/Value.h"
50#include <algorithm>
51#include <cassert>
52#include <cstdint>
53
54#define DEBUG_TYPE "aarch64-call-lowering"
55
56using namespace llvm;
57using namespace AArch64GISelUtils;
58
59extern cl::opt<bool> EnableSVEGISel;
60
61AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
62 : CallLowering(&TLI) {}
63
64static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT,
65 MVT &LocVT) {
66 // If ValVT is i1/i8/i16, we should set LocVT to i8/i8/i16. This is a legacy
67 // hack because the DAG calls the assignment function with pre-legalized
68 // register typed values, not the raw type.
69 //
70 // This hack is not applied to return values which are not passed on the
71 // stack.
72 if (OrigVT == MVT::i1 || OrigVT == MVT::i8)
73 ValVT = LocVT = MVT::i8;
74 else if (OrigVT == MVT::i16)
75 ValVT = LocVT = MVT::i16;
76}
77
78// Account for i1/i8/i16 stack passed value hack
79static LLT getStackValueStoreTypeHack(const CCValAssign &VA) {
80 const MVT ValVT = VA.getValVT();
81 return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT)
82 : LLT(VA.getLocVT());
83}
84
85namespace {
86
87struct AArch64IncomingValueAssigner
88 : public CallLowering::IncomingValueAssigner {
89 AArch64IncomingValueAssigner(CCAssignFn *AssignFn_,
90 CCAssignFn *AssignFnVarArg_)
91 : IncomingValueAssigner(AssignFn_, AssignFnVarArg_) {}
92
93 bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
94 CCValAssign::LocInfo LocInfo,
95 const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
96 CCState &State) override {
97 applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
98 return IncomingValueAssigner::assignArg(ValNo, OrigVT, ValVT, LocVT,
99 LocInfo, Info, Flags, State);
100 }
101};
102
103struct AArch64OutgoingValueAssigner
104 : public CallLowering::OutgoingValueAssigner {
105 const AArch64Subtarget &Subtarget;
106
107 /// Track if this is used for a return instead of function argument
108 /// passing. We apply a hack to i1/i8/i16 stack passed values, but do not use
109 /// stack passed returns for them and cannot apply the type adjustment.
110 bool IsReturn;
111
112 AArch64OutgoingValueAssigner(CCAssignFn *AssignFn_,
113 CCAssignFn *AssignFnVarArg_,
114 const AArch64Subtarget &Subtarget_,
115 bool IsReturn)
116 : OutgoingValueAssigner(AssignFn_, AssignFnVarArg_),
117 Subtarget(Subtarget_), IsReturn(IsReturn) {}
118
119 bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
120 CCValAssign::LocInfo LocInfo,
121 const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
122 CCState &State) override {
123 const Function &F = State.getMachineFunction().getFunction();
124 bool IsCalleeWin =
125 Subtarget.isCallingConvWin64(CC: State.getCallingConv(), IsVarArg: F.isVarArg());
126 bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg();
127
128 bool Res;
129 if (!Flags.isVarArg() && !UseVarArgsCCForFixed) {
130 if (!IsReturn)
131 applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
132 Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, Info.Ty, State);
133 } else
134 Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, Info.Ty, State);
135
136 StackSize = State.getStackSize();
137 return Res;
138 }
139};
140
141struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
142 IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
143 : IncomingValueHandler(MIRBuilder, MRI) {}
144
145 Register getStackAddress(uint64_t Size, int64_t Offset,
146 MachinePointerInfo &MPO,
147 ISD::ArgFlagsTy Flags) override {
148 auto &MFI = MIRBuilder.getMF().getFrameInfo();
149
150 // Byval is assumed to be writable memory, but other stack passed arguments
151 // are not.
152 const bool IsImmutable = !Flags.isByVal();
153
154 int FI = MFI.CreateFixedObject(Size, SPOffset: Offset, IsImmutable);
155 MPO = MachinePointerInfo::getFixedStack(MF&: MIRBuilder.getMF(), FI);
156 auto AddrReg = MIRBuilder.buildFrameIndex(Res: LLT::pointer(AddressSpace: 0, SizeInBits: 64), Idx: FI);
157 return AddrReg.getReg(Idx: 0);
158 }
159
160 LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
161 ISD::ArgFlagsTy Flags) const override {
162 // For pointers, we just need to fixup the integer types reported in the
163 // CCValAssign.
164 if (Flags.isPointer())
165 return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
166 return getStackValueStoreTypeHack(VA);
167 }
168
169 void assignValueToReg(Register ValVReg, Register PhysReg,
170 const CCValAssign &VA) override {
171 markRegUsed(Reg: PhysReg);
172 IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
173 }
174
175 void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
176 const MachinePointerInfo &MPO,
177 const CCValAssign &VA) override {
178 MachineFunction &MF = MIRBuilder.getMF();
179
180 LLT ValTy(VA.getValVT());
181 LLT LocTy(VA.getLocVT());
182
183 // Fixup the types for the DAG compatibility hack.
184 if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16)
185 std::swap(a&: ValTy, b&: LocTy);
186 else {
187 // The calling code knows if this is a pointer or not, we're only touching
188 // the LocTy for the i8/i16 hack.
189 assert(LocTy.getSizeInBits() == MemTy.getSizeInBits());
190 LocTy = MemTy;
191 }
192
193 auto MMO = MF.getMachineMemOperand(
194 PtrInfo: MPO, f: MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, MemTy: LocTy,
195 base_alignment: inferAlignFromPtrInfo(MF, MPO));
196
197 switch (VA.getLocInfo()) {
198 case CCValAssign::LocInfo::ZExt:
199 MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_ZEXTLOAD, Res: ValVReg, Addr, MMO&: *MMO);
200 return;
201 case CCValAssign::LocInfo::SExt:
202 MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_SEXTLOAD, Res: ValVReg, Addr, MMO&: *MMO);
203 return;
204 default:
205 MIRBuilder.buildLoad(Res: ValVReg, Addr, MMO&: *MMO);
206 return;
207 }
208 }
209
210 /// How the physical register gets marked varies between formal
211 /// parameters (it's a basic-block live-in), and a call instruction
212 /// (it's an implicit-def of the BL).
213 virtual void markRegUsed(Register Reg) = 0;
214};
215
216struct FormalArgHandler : public IncomingArgHandler {
217 FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
218 : IncomingArgHandler(MIRBuilder, MRI) {}
219
220 void markRegUsed(Register Reg) override {
221 MIRBuilder.getMRI()->addLiveIn(Reg: Reg.asMCReg());
222 MIRBuilder.getMBB().addLiveIn(PhysReg: Reg.asMCReg());
223 }
224};
225
226struct CallReturnHandler : public IncomingArgHandler {
227 CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
228 MachineInstrBuilder MIB)
229 : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
230
231 void markRegUsed(Register Reg) override {
232 MIB.addDef(RegNo: Reg, Flags: RegState::Implicit);
233 }
234
235 MachineInstrBuilder MIB;
236};
237
238/// A special return arg handler for "returned" attribute arg calls.
239struct ReturnedArgCallReturnHandler : public CallReturnHandler {
240 ReturnedArgCallReturnHandler(MachineIRBuilder &MIRBuilder,
241 MachineRegisterInfo &MRI,
242 MachineInstrBuilder MIB)
243 : CallReturnHandler(MIRBuilder, MRI, MIB) {}
244
245 void markRegUsed(Register Reg) override {}
246};
247
248struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
249 OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
250 MachineInstrBuilder MIB, bool IsTailCall = false,
251 int FPDiff = 0)
252 : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), IsTailCall(IsTailCall),
253 FPDiff(FPDiff),
254 Subtarget(MIRBuilder.getMF().getSubtarget<AArch64Subtarget>()) {}
255
256 Register getStackAddress(uint64_t Size, int64_t Offset,
257 MachinePointerInfo &MPO,
258 ISD::ArgFlagsTy Flags) override {
259 MachineFunction &MF = MIRBuilder.getMF();
260 LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64);
261 LLT s64 = LLT::scalar(SizeInBits: 64);
262
263 if (IsTailCall) {
264 assert(!Flags.isByVal() && "byval unhandled with tail calls");
265
266 Offset += FPDiff;
267 int FI = MF.getFrameInfo().CreateFixedObject(Size, SPOffset: Offset, IsImmutable: true);
268 auto FIReg = MIRBuilder.buildFrameIndex(Res: p0, Idx: FI);
269 MPO = MachinePointerInfo::getFixedStack(MF, FI);
270 return FIReg.getReg(Idx: 0);
271 }
272
273 if (!SPReg)
274 SPReg = MIRBuilder.buildCopy(Res: p0, Op: Register(AArch64::SP)).getReg(Idx: 0);
275
276 auto OffsetReg = MIRBuilder.buildConstant(Res: s64, Val: Offset);
277
278 auto AddrReg = MIRBuilder.buildPtrAdd(Res: p0, Op0: SPReg, Op1: OffsetReg);
279
280 MPO = MachinePointerInfo::getStack(MF, Offset);
281 return AddrReg.getReg(Idx: 0);
282 }
283
284 /// We need to fixup the reported store size for certain value types because
285 /// we invert the interpretation of ValVT and LocVT in certain cases. This is
286 /// for compatibility with the DAG call lowering implementation, which we're
287 /// currently building on top of.
288 LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
289 ISD::ArgFlagsTy Flags) const override {
290 if (Flags.isPointer())
291 return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
292 return getStackValueStoreTypeHack(VA);
293 }
294
295 void assignValueToReg(Register ValVReg, Register PhysReg,
296 const CCValAssign &VA) override {
297 MIB.addUse(RegNo: PhysReg, Flags: RegState::Implicit);
298 Register ExtReg = extendRegister(ValReg: ValVReg, VA);
299 MIRBuilder.buildCopy(Res: PhysReg, Op: ExtReg);
300 }
301
302 /// Check whether a stack argument requires lowering in a tail call.
303 static bool shouldLowerTailCallStackArg(const MachineFunction &MF,
304 const CCValAssign &VA,
305 Register ValVReg,
306 Register StoreAddr) {
307 const MachineRegisterInfo &MRI = MF.getRegInfo();
308 // Print the defining instruction for the value.
309 auto *DefMI = MRI.getVRegDef(Reg: ValVReg);
310 assert(DefMI && "No defining instruction");
311 for (;;) {
312 // Look through nodes that don't alter the bits of the incoming value.
313 unsigned Op = DefMI->getOpcode();
314 if (Op == TargetOpcode::G_ZEXT || Op == TargetOpcode::G_ANYEXT ||
315 Op == TargetOpcode::G_BITCAST || isAssertMI(MI: *DefMI)) {
316 DefMI = MRI.getVRegDef(Reg: DefMI->getOperand(i: 1).getReg());
317 continue;
318 }
319 break;
320 }
321
322 auto *Load = dyn_cast<GLoad>(Val: DefMI);
323 if (!Load)
324 return true;
325 Register LoadReg = Load->getPointerReg();
326 auto *LoadAddrDef = MRI.getVRegDef(Reg: LoadReg);
327 if (LoadAddrDef->getOpcode() != TargetOpcode::G_FRAME_INDEX)
328 return true;
329 const MachineFrameInfo &MFI = MF.getFrameInfo();
330 int LoadFI = LoadAddrDef->getOperand(i: 1).getIndex();
331
332 auto *StoreAddrDef = MRI.getVRegDef(Reg: StoreAddr);
333 if (StoreAddrDef->getOpcode() != TargetOpcode::G_FRAME_INDEX)
334 return true;
335 int StoreFI = StoreAddrDef->getOperand(i: 1).getIndex();
336
337 if (!MFI.isImmutableObjectIndex(ObjectIdx: LoadFI))
338 return true;
339 if (MFI.getObjectOffset(ObjectIdx: LoadFI) != MFI.getObjectOffset(ObjectIdx: StoreFI))
340 return true;
341 if (Load->getMemSize() != MFI.getObjectSize(ObjectIdx: StoreFI))
342 return true;
343
344 return false;
345 }
346
347 void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
348 const MachinePointerInfo &MPO,
349 const CCValAssign &VA) override {
350 MachineFunction &MF = MIRBuilder.getMF();
351 if (!FPDiff && !shouldLowerTailCallStackArg(MF, VA, ValVReg, StoreAddr: Addr))
352 return;
353 auto MMO = MF.getMachineMemOperand(PtrInfo: MPO, f: MachineMemOperand::MOStore, MemTy,
354 base_alignment: inferAlignFromPtrInfo(MF, MPO));
355 MIRBuilder.buildStore(Val: ValVReg, Addr, MMO&: *MMO);
356 }
357
358 void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex,
359 Register Addr, LLT MemTy,
360 const MachinePointerInfo &MPO,
361 const CCValAssign &VA) override {
362 unsigned MaxSize = MemTy.getSizeInBytes() * 8;
363 // For varargs, we always want to extend them to 8 bytes, in which case
364 // we disable setting a max.
365 if (Arg.Flags[0].isVarArg())
366 MaxSize = 0;
367
368 Register ValVReg = Arg.Regs[RegIndex];
369 if (VA.getLocInfo() != CCValAssign::LocInfo::FPExt) {
370 MVT LocVT = VA.getLocVT();
371 MVT ValVT = VA.getValVT();
372
373 if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) {
374 std::swap(a&: ValVT, b&: LocVT);
375 MemTy = LLT(VA.getValVT());
376 }
377
378 ValVReg = extendRegister(ValReg: ValVReg, VA, MaxSizeBits: MaxSize);
379 } else {
380 // The store does not cover the full allocated stack slot.
381 MemTy = LLT(VA.getValVT());
382 }
383
384 assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);
385 }
386
387 MachineInstrBuilder MIB;
388
389 bool IsTailCall;
390
391 /// For tail calls, the byte offset of the call's argument area from the
392 /// callee's. Unused elsewhere.
393 int FPDiff;
394
395 // Cache the SP register vreg if we need it more than once in this call site.
396 Register SPReg;
397
398 const AArch64Subtarget &Subtarget;
399};
400} // namespace
401
402static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
403 return (CallConv == CallingConv::Fast && TailCallOpt) ||
404 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
405}
406
407bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
408 const Value *Val,
409 ArrayRef<Register> VRegs,
410 FunctionLoweringInfo &FLI,
411 Register SwiftErrorVReg) const {
412 auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: AArch64::RET_ReallyLR);
413 assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
414 "Return value without a vreg");
415
416 bool Success = true;
417 if (!FLI.CanLowerReturn) {
418 insertSRetStores(MIRBuilder, RetTy: Val->getType(), VRegs, DemoteReg: FLI.DemoteRegister);
419 } else if (!VRegs.empty()) {
420 MachineFunction &MF = MIRBuilder.getMF();
421 const Function &F = MF.getFunction();
422 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
423
424 MachineRegisterInfo &MRI = MF.getRegInfo();
425 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
426 CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC: F.getCallingConv());
427 auto &DL = F.getDataLayout();
428 LLVMContext &Ctx = Val->getType()->getContext();
429
430 SmallVector<EVT, 4> SplitEVTs;
431 ComputeValueVTs(TLI, DL, Ty: Val->getType(), ValueVTs&: SplitEVTs);
432 assert(VRegs.size() == SplitEVTs.size() &&
433 "For each split Type there should be exactly one VReg.");
434
435 SmallVector<ArgInfo, 8> SplitArgs;
436 CallingConv::ID CC = F.getCallingConv();
437
438 for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
439 Register CurVReg = VRegs[i];
440 ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Context&: Ctx), 0};
441 setArgFlags(Arg&: CurArgInfo, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: F);
442
443 // i1 is a special case because SDAG i1 true is naturally zero extended
444 // when widened using ANYEXT. We need to do it explicitly here.
445 auto &Flags = CurArgInfo.Flags[0];
446 if (MRI.getType(Reg: CurVReg).getSizeInBits() == TypeSize::getFixed(ExactSize: 1) &&
447 !Flags.isSExt() && !Flags.isZExt()) {
448 CurVReg = MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 8), Op: CurVReg).getReg(Idx: 0);
449 } else if (TLI.getNumRegistersForCallingConv(Context&: Ctx, CC, VT: SplitEVTs[i]) ==
450 1) {
451 // Some types will need extending as specified by the CC.
452 MVT NewVT = TLI.getRegisterTypeForCallingConv(Context&: Ctx, CC, VT: SplitEVTs[i]);
453 if (EVT(NewVT) != SplitEVTs[i]) {
454 unsigned ExtendOp = TargetOpcode::G_ANYEXT;
455 if (F.getAttributes().hasRetAttr(Kind: Attribute::SExt))
456 ExtendOp = TargetOpcode::G_SEXT;
457 else if (F.getAttributes().hasRetAttr(Kind: Attribute::ZExt))
458 ExtendOp = TargetOpcode::G_ZEXT;
459
460 LLT NewLLT(NewVT);
461 LLT OldLLT = getLLTForType(Ty&: *CurArgInfo.Ty, DL);
462 CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Context&: Ctx);
463 // Instead of an extend, we might have a vector type which needs
464 // padding with more elements, e.g. <2 x half> -> <4 x half>.
465 if (NewVT.isVector()) {
466 if (OldLLT.isVector()) {
467 if (NewLLT.getNumElements() > OldLLT.getNumElements()) {
468 CurVReg =
469 MIRBuilder.buildPadVectorWithUndefElements(Res: NewLLT, Op0: CurVReg)
470 .getReg(Idx: 0);
471 } else {
472 // Just do a vector extend.
473 CurVReg = MIRBuilder.buildInstr(Opc: ExtendOp, DstOps: {NewLLT}, SrcOps: {CurVReg})
474 .getReg(Idx: 0);
475 }
476 } else if (NewLLT.getNumElements() >= 2 &&
477 NewLLT.getNumElements() <= 8) {
478 // We need to pad a <1 x S> type to <2/4/8 x S>. Since we don't
479 // have <1 x S> vector types in GISel we use a build_vector
480 // instead of a vector merge/concat.
481 CurVReg =
482 MIRBuilder.buildPadVectorWithUndefElements(Res: NewLLT, Op0: CurVReg)
483 .getReg(Idx: 0);
484 } else {
485 LLVM_DEBUG(dbgs() << "Could not handle ret ty\n");
486 return false;
487 }
488 } else {
489 // If the split EVT was a <1 x T> vector, and NewVT is T, then we
490 // don't have to do anything since we don't distinguish between the
491 // two.
492 if (NewLLT != MRI.getType(Reg: CurVReg)) {
493 // A scalar extend.
494 CurVReg = MIRBuilder.buildInstr(Opc: ExtendOp, DstOps: {NewLLT}, SrcOps: {CurVReg})
495 .getReg(Idx: 0);
496 }
497 }
498 }
499 }
500 if (CurVReg != CurArgInfo.Regs[0]) {
501 CurArgInfo.Regs[0] = CurVReg;
502 // Reset the arg flags after modifying CurVReg.
503 setArgFlags(Arg&: CurArgInfo, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: F);
504 }
505 splitToValueTypes(OrigArgInfo: CurArgInfo, SplitArgs, DL, CallConv: CC);
506 }
507
508 AArch64OutgoingValueAssigner Assigner(AssignFn, AssignFn, Subtarget,
509 /*IsReturn*/ true);
510 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
511 Success = determineAndHandleAssignments(Handler, Assigner, Args&: SplitArgs,
512 MIRBuilder, CallConv: CC, IsVarArg: F.isVarArg());
513 }
514
515 if (SwiftErrorVReg) {
516 MIB.addUse(RegNo: AArch64::X21, Flags: RegState::Implicit);
517 MIRBuilder.buildCopy(Res: AArch64::X21, Op: SwiftErrorVReg);
518 }
519
520 MIRBuilder.insertInstr(MIB);
521 return Success;
522}
523
524bool AArch64CallLowering::canLowerReturn(MachineFunction &MF,
525 CallingConv::ID CallConv,
526 SmallVectorImpl<BaseArgInfo> &Outs,
527 bool IsVarArg) const {
528 SmallVector<CCValAssign, 16> ArgLocs;
529 const auto &TLI = *getTLI<AArch64TargetLowering>();
530 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
531 MF.getFunction().getContext());
532
533 return checkReturn(CCInfo, Outs, Fn: TLI.CCAssignFnForReturn(CC: CallConv));
534}
535
536/// Helper function to compute forwarded registers for musttail calls. Computes
537/// the forwarded registers, sets MBB liveness, and emits COPY instructions that
538/// can be used to save + restore registers later.
539static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
540 CCAssignFn *AssignFn) {
541 MachineBasicBlock &MBB = MIRBuilder.getMBB();
542 MachineFunction &MF = MIRBuilder.getMF();
543 MachineFrameInfo &MFI = MF.getFrameInfo();
544
545 if (!MFI.hasMustTailInVarArgFunc())
546 return;
547
548 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
549 const Function &F = MF.getFunction();
550 assert(F.isVarArg() && "Expected F to be vararg?");
551
552 // Compute the set of forwarded registers. The rest are scratch.
553 SmallVector<CCValAssign, 16> ArgLocs;
554 CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs,
555 F.getContext());
556 SmallVector<MVT, 2> RegParmTypes;
557 RegParmTypes.push_back(Elt: MVT::i64);
558 RegParmTypes.push_back(Elt: MVT::f128);
559
560 // Later on, we can use this vector to restore the registers if necessary.
561 SmallVectorImpl<ForwardedRegister> &Forwards =
562 FuncInfo->getForwardedMustTailRegParms();
563 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, Fn: AssignFn);
564
565 // Conservatively forward X8, since it might be used for an aggregate
566 // return.
567 if (!CCInfo.isAllocated(Reg: AArch64::X8)) {
568 Register X8VReg = MF.addLiveIn(PReg: AArch64::X8, RC: &AArch64::GPR64RegClass);
569 Forwards.push_back(Elt: ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
570 }
571
572 // Add the forwards to the MachineBasicBlock and MachineFunction.
573 for (const auto &F : Forwards) {
574 MBB.addLiveIn(PhysReg: F.PReg);
575 MIRBuilder.buildCopy(Res: Register(F.VReg), Op: Register(F.PReg));
576 }
577}
578
579bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
580 auto &F = MF.getFunction();
581 const auto &TM = static_cast<const AArch64TargetMachine &>(MF.getTarget());
582
583 const bool GlobalISelFlag =
584 getCGPassBuilderOption().EnableGlobalISelOption.value_or(u: false);
585
586 auto OptLevel = MF.getTarget().getOptLevel();
587 auto EnableGlobalISelAtO = TM.getEnableGlobalISelAtO();
588
589 // GlobalISel is currently only enabled when the opt level is less than or
590 // equal to EnableGlobalISelAt or it was explicitly enabled via the CLI. If we
591 // encounter this check, we know GlobalISel was enabled. If not by these two,
592 // it must have been used as part of the SDAG pipeline to use GlobalISel for
593 // optnone.
594 if (static_cast<unsigned>(OptLevel) > EnableGlobalISelAtO && !GlobalISelFlag)
595 return !F.hasOptNone();
596
597 if (!EnableSVEGISel && (F.getReturnType()->isScalableTy() ||
598 llvm::any_of(Range: F.args(), P: [](const Argument &A) {
599 return A.getType()->isScalableTy();
600 })))
601 return true;
602 const auto &ST = MF.getSubtarget<AArch64Subtarget>();
603 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
604 LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n");
605 return true;
606 }
607
608 SMEAttrs Attrs = MF.getInfo<AArch64FunctionInfo>()->getSMEFnAttrs();
609 if (Attrs.hasZAState() || Attrs.hasZT0State() ||
610 Attrs.hasStreamingInterfaceOrBody() ||
611 Attrs.hasStreamingCompatibleInterface())
612 return true;
613
614 return false;
615}
616
617void AArch64CallLowering::saveVarArgRegisters(
618 MachineIRBuilder &MIRBuilder, CallLowering::IncomingValueHandler &Handler,
619 CCState &CCInfo) const {
620 auto GPRArgRegs = AArch64::getGPRArgRegs();
621 auto FPRArgRegs = AArch64::getFPRArgRegs();
622
623 MachineFunction &MF = MIRBuilder.getMF();
624 MachineRegisterInfo &MRI = MF.getRegInfo();
625 MachineFrameInfo &MFI = MF.getFrameInfo();
626 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
627 auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
628 bool IsWin64CC = Subtarget.isCallingConvWin64(CC: CCInfo.getCallingConv(),
629 IsVarArg: MF.getFunction().isVarArg());
630 const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64);
631 const LLT s64 = LLT::scalar(SizeInBits: 64);
632
633 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(Regs: GPRArgRegs);
634 unsigned NumVariadicGPRArgRegs = GPRArgRegs.size() - FirstVariadicGPR + 1;
635
636 unsigned GPRSaveSize = 8 * (GPRArgRegs.size() - FirstVariadicGPR);
637 int GPRIdx = 0;
638 if (GPRSaveSize != 0) {
639 if (IsWin64CC) {
640 GPRIdx = MFI.CreateFixedObject(Size: GPRSaveSize,
641 SPOffset: -static_cast<int>(GPRSaveSize), IsImmutable: false);
642 if (GPRSaveSize & 15)
643 // The extra size here, if triggered, will always be 8.
644 MFI.CreateFixedObject(Size: 16 - (GPRSaveSize & 15),
645 SPOffset: -static_cast<int>(alignTo(Value: GPRSaveSize, Align: 16)),
646 IsImmutable: false);
647 } else
648 GPRIdx = MFI.CreateStackObject(Size: GPRSaveSize, Alignment: Align(8), isSpillSlot: false);
649
650 auto FIN = MIRBuilder.buildFrameIndex(Res: p0, Idx: GPRIdx);
651 auto Offset =
652 MIRBuilder.buildConstant(Res: MRI.createGenericVirtualRegister(Ty: s64), Val: 8);
653
654 for (unsigned i = FirstVariadicGPR; i < GPRArgRegs.size(); ++i) {
655 Register Val = MRI.createGenericVirtualRegister(Ty: s64);
656 Handler.assignValueToReg(
657 ValVReg: Val, PhysReg: GPRArgRegs[i],
658 VA: CCValAssign::getReg(ValNo: i + MF.getFunction().getNumOperands(), ValVT: MVT::i64,
659 Reg: GPRArgRegs[i], LocVT: MVT::i64, HTP: CCValAssign::Full));
660 auto MPO = IsWin64CC ? MachinePointerInfo::getFixedStack(
661 MF, FI: GPRIdx, Offset: (i - FirstVariadicGPR) * 8)
662 : MachinePointerInfo::getStack(MF, Offset: i * 8);
663 MIRBuilder.buildStore(Val, Addr: FIN, PtrInfo: MPO, Alignment: inferAlignFromPtrInfo(MF, MPO));
664
665 FIN = MIRBuilder.buildPtrAdd(Res: MRI.createGenericVirtualRegister(Ty: p0),
666 Op0: FIN.getReg(Idx: 0), Op1: Offset);
667 }
668 }
669 FuncInfo->setVarArgsGPRIndex(GPRIdx);
670 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
671
672 if (Subtarget.hasFPARMv8() && !IsWin64CC) {
673 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(Regs: FPRArgRegs);
674
675 unsigned FPRSaveSize = 16 * (FPRArgRegs.size() - FirstVariadicFPR);
676 int FPRIdx = 0;
677 if (FPRSaveSize != 0) {
678 FPRIdx = MFI.CreateStackObject(Size: FPRSaveSize, Alignment: Align(16), isSpillSlot: false);
679
680 auto FIN = MIRBuilder.buildFrameIndex(Res: p0, Idx: FPRIdx);
681 auto Offset =
682 MIRBuilder.buildConstant(Res: MRI.createGenericVirtualRegister(Ty: s64), Val: 16);
683
684 for (unsigned i = FirstVariadicFPR; i < FPRArgRegs.size(); ++i) {
685 Register Val = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 128));
686 Handler.assignValueToReg(
687 ValVReg: Val, PhysReg: FPRArgRegs[i],
688 VA: CCValAssign::getReg(
689 ValNo: i + MF.getFunction().getNumOperands() + NumVariadicGPRArgRegs,
690 ValVT: MVT::f128, Reg: FPRArgRegs[i], LocVT: MVT::f128, HTP: CCValAssign::Full));
691
692 auto MPO = MachinePointerInfo::getStack(MF, Offset: i * 16);
693 MIRBuilder.buildStore(Val, Addr: FIN, PtrInfo: MPO, Alignment: inferAlignFromPtrInfo(MF, MPO));
694
695 FIN = MIRBuilder.buildPtrAdd(Res: MRI.createGenericVirtualRegister(Ty: p0),
696 Op0: FIN.getReg(Idx: 0), Op1: Offset);
697 }
698 }
699 FuncInfo->setVarArgsFPRIndex(FPRIdx);
700 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
701 }
702}
703
704bool AArch64CallLowering::lowerFormalArguments(
705 MachineIRBuilder &MIRBuilder, const Function &F,
706 ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const {
707 MachineFunction &MF = MIRBuilder.getMF();
708 MachineBasicBlock &MBB = MIRBuilder.getMBB();
709 MachineRegisterInfo &MRI = MF.getRegInfo();
710 auto &DL = F.getDataLayout();
711 auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
712
713 // Arm64EC has extra requirements for varargs calls which are only implemented
714 // in SelectionDAG; bail out for now.
715 if (F.isVarArg() && Subtarget.isWindowsArm64EC())
716 return false;
717
718 // Arm64EC thunks have a special calling convention which is only implemented
719 // in SelectionDAG; bail out for now.
720 if (F.getCallingConv() == CallingConv::ARM64EC_Thunk_Native ||
721 F.getCallingConv() == CallingConv::ARM64EC_Thunk_X64)
722 return false;
723
724 bool IsWin64 =
725 Subtarget.isCallingConvWin64(CC: F.getCallingConv(), IsVarArg: F.isVarArg()) &&
726 !Subtarget.isWindowsArm64EC();
727
728 SmallVector<ArgInfo, 8> SplitArgs;
729 SmallVector<std::pair<Register, Register>> BoolArgs;
730
731 // Insert the hidden sret parameter if the return value won't fit in the
732 // return registers.
733 if (!FLI.CanLowerReturn)
734 insertSRetIncomingArgument(F, SplitArgs, DemoteReg&: FLI.DemoteRegister, MRI, DL);
735
736 unsigned i = 0;
737 for (auto &Arg : F.args()) {
738 if (DL.getTypeStoreSize(Ty: Arg.getType()).isZero())
739 continue;
740
741 ArgInfo OrigArg{VRegs[i], Arg, i};
742 setArgFlags(Arg&: OrigArg, OpIdx: i + AttributeList::FirstArgIndex, DL, FuncInfo: F);
743
744 // i1 arguments are zero-extended to i8 by the caller. Emit a
745 // hint to reflect this.
746 if (OrigArg.Ty->isIntegerTy(Bitwidth: 1)) {
747 assert(OrigArg.Regs.size() == 1 &&
748 MRI.getType(OrigArg.Regs[0]).getSizeInBits() == 1 &&
749 "Unexpected registers used for i1 arg");
750
751 auto &Flags = OrigArg.Flags[0];
752 if (!Flags.isZExt() && !Flags.isSExt()) {
753 // Lower i1 argument as i8, and insert AssertZExt + Trunc later.
754 Register OrigReg = OrigArg.Regs[0];
755 Register WideReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 8));
756 OrigArg.Regs[0] = WideReg;
757 BoolArgs.push_back(Elt: {OrigReg, WideReg});
758 }
759 }
760
761 if (Arg.hasAttribute(Kind: Attribute::SwiftAsync))
762 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
763
764 splitToValueTypes(OrigArgInfo: OrigArg, SplitArgs, DL, CallConv: F.getCallingConv());
765 ++i;
766 }
767
768 if (!MBB.empty())
769 MIRBuilder.setInstr(*MBB.begin());
770
771 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
772 CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC: F.getCallingConv(), IsVarArg: IsWin64 && F.isVarArg());
773
774 AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn);
775 FormalArgHandler Handler(MIRBuilder, MRI);
776 SmallVector<CCValAssign, 16> ArgLocs;
777 CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
778 if (!determineAssignments(Assigner, Args&: SplitArgs, CCInfo) ||
779 !handleAssignments(Handler, Args&: SplitArgs, CCState&: CCInfo, ArgLocs, MIRBuilder))
780 return false;
781
782 if (!BoolArgs.empty()) {
783 for (auto &KV : BoolArgs) {
784 Register OrigReg = KV.first;
785 Register WideReg = KV.second;
786 LLT WideTy = MRI.getType(Reg: WideReg);
787 assert(MRI.getType(OrigReg).getScalarSizeInBits() == 1 &&
788 "Unexpected bit size of a bool arg");
789 MIRBuilder.buildTrunc(
790 Res: OrigReg, Op: MIRBuilder.buildAssertZExt(Res: WideTy, Op: WideReg, Size: 1).getReg(Idx: 0));
791 }
792 }
793
794 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
795 uint64_t StackSize = Assigner.StackSize;
796 if (F.isVarArg()) {
797 if ((!Subtarget.isTargetDarwin() && !Subtarget.isWindowsArm64EC()) || IsWin64) {
798 // The AAPCS variadic function ABI is identical to the non-variadic
799 // one. As a result there may be more arguments in registers and we should
800 // save them for future reference.
801 // Win64 variadic functions also pass arguments in registers, but all
802 // float arguments are passed in integer registers.
803 saveVarArgRegisters(MIRBuilder, Handler, CCInfo);
804 } else if (Subtarget.isWindowsArm64EC()) {
805 return false;
806 }
807
808 // We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
809 StackSize = alignTo(Value: Assigner.StackSize, Align: Subtarget.isTargetILP32() ? 4 : 8);
810
811 auto &MFI = MIRBuilder.getMF().getFrameInfo();
812 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(Size: 4, SPOffset: StackSize, IsImmutable: true));
813 }
814
815 if (doesCalleeRestoreStack(CallConv: F.getCallingConv(),
816 TailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt)) {
817 // We have a non-standard ABI, so why not make full use of the stack that
818 // we're going to pop? It must be aligned to 16 B in any case.
819 StackSize = alignTo(Value: StackSize, Align: 16);
820
821 // If we're expected to restore the stack (e.g. fastcc), then we'll be
822 // adding a multiple of 16.
823 FuncInfo->setArgumentStackToRestore(StackSize);
824
825 // Our own callers will guarantee that the space is free by giving an
826 // aligned value to CALLSEQ_START.
827 }
828
829 // When we tail call, we need to check if the callee's arguments
830 // will fit on the caller's stack. So, whenever we lower formal arguments,
831 // we should keep track of this information, since we might lower a tail call
832 // in this function later.
833 FuncInfo->setBytesInStackArgArea(StackSize);
834
835 if (Subtarget.hasCustomCallingConv())
836 Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
837
838 handleMustTailForwardedRegisters(MIRBuilder, AssignFn);
839
840 // Move back to the end of the basic block.
841 MIRBuilder.setMBB(MBB);
842
843 return true;
844}
845
846/// Return true if the calling convention is one that we can guarantee TCO for.
847static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
848 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
849 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
850}
851
852/// Return true if we might ever do TCO for calls with this calling convention.
853static bool mayTailCallThisCC(CallingConv::ID CC) {
854 switch (CC) {
855 case CallingConv::C:
856 case CallingConv::PreserveMost:
857 case CallingConv::PreserveAll:
858 case CallingConv::PreserveNone:
859 case CallingConv::Swift:
860 case CallingConv::SwiftTail:
861 case CallingConv::Tail:
862 case CallingConv::Fast:
863 return true;
864 default:
865 return false;
866 }
867}
868
869/// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
870/// CC.
871static std::pair<CCAssignFn *, CCAssignFn *>
872getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) {
873 return {TLI.CCAssignFnForCall(CC, IsVarArg: false), TLI.CCAssignFnForCall(CC, IsVarArg: true)};
874}
875
876bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay(
877 CallLoweringInfo &Info, MachineFunction &MF,
878 SmallVectorImpl<ArgInfo> &InArgs) const {
879 const Function &CallerF = MF.getFunction();
880 CallingConv::ID CalleeCC = Info.CallConv;
881 CallingConv::ID CallerCC = CallerF.getCallingConv();
882
883 // If the calling conventions match, then everything must be the same.
884 if (CalleeCC == CallerCC)
885 return true;
886
887 // Check if the caller and callee will handle arguments in the same way.
888 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
889 CCAssignFn *CalleeAssignFnFixed;
890 CCAssignFn *CalleeAssignFnVarArg;
891 std::tie(args&: CalleeAssignFnFixed, args&: CalleeAssignFnVarArg) =
892 getAssignFnsForCC(CC: CalleeCC, TLI);
893
894 CCAssignFn *CallerAssignFnFixed;
895 CCAssignFn *CallerAssignFnVarArg;
896 std::tie(args&: CallerAssignFnFixed, args&: CallerAssignFnVarArg) =
897 getAssignFnsForCC(CC: CallerCC, TLI);
898
899 AArch64IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed,
900 CalleeAssignFnVarArg);
901 AArch64IncomingValueAssigner CallerAssigner(CallerAssignFnFixed,
902 CallerAssignFnVarArg);
903
904 if (!resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner))
905 return false;
906
907 // Make sure that the caller and callee preserve all of the same registers.
908 auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
909 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
910 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
911 if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) {
912 TRI->UpdateCustomCallPreservedMask(MF, Mask: &CallerPreserved);
913 TRI->UpdateCustomCallPreservedMask(MF, Mask: &CalleePreserved);
914 }
915
916 return TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved);
917}
918
919bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
920 CallLoweringInfo &Info, MachineFunction &MF,
921 SmallVectorImpl<ArgInfo> &OrigOutArgs) const {
922 // If there are no outgoing arguments, then we are done.
923 if (OrigOutArgs.empty())
924 return true;
925
926 const Function &CallerF = MF.getFunction();
927 LLVMContext &Ctx = CallerF.getContext();
928 CallingConv::ID CalleeCC = Info.CallConv;
929 CallingConv::ID CallerCC = CallerF.getCallingConv();
930 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
931 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
932
933 CCAssignFn *AssignFnFixed;
934 CCAssignFn *AssignFnVarArg;
935 std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) = getAssignFnsForCC(CC: CalleeCC, TLI);
936
937 // We have outgoing arguments. Make sure that we can tail call with them.
938 SmallVector<CCValAssign, 16> OutLocs;
939 CCState OutInfo(CalleeCC, false, MF, OutLocs, Ctx);
940
941 AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
942 Subtarget, /*IsReturn*/ false);
943 // determineAssignments() may modify argument flags, so make a copy.
944 SmallVector<ArgInfo, 8> OutArgs;
945 append_range(C&: OutArgs, R&: OrigOutArgs);
946 if (!determineAssignments(Assigner&: CalleeAssigner, Args&: OutArgs, CCInfo&: OutInfo)) {
947 LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
948 return false;
949 }
950
951 // Make sure that they can fit on the caller's stack.
952 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
953 if (OutInfo.getStackSize() > FuncInfo->getBytesInStackArgArea()) {
954 LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
955 return false;
956 }
957
958 // Verify that the parameters in callee-saved registers match.
959 // TODO: Port this over to CallLowering as general code once swiftself is
960 // supported.
961 auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
962 const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
963 MachineRegisterInfo &MRI = MF.getRegInfo();
964
965 if (Info.IsVarArg) {
966 // Be conservative and disallow variadic memory operands to match SDAG's
967 // behaviour.
968 // FIXME: If the caller's calling convention is C, then we can
969 // potentially use its argument area. However, for cases like fastcc,
970 // we can't do anything.
971 for (unsigned i = 0; i < OutLocs.size(); ++i) {
972 auto &ArgLoc = OutLocs[i];
973 if (ArgLoc.isRegLoc())
974 continue;
975
976 LLVM_DEBUG(
977 dbgs()
978 << "... Cannot tail call vararg function with stack arguments\n");
979 return false;
980 }
981 }
982
983 return parametersInCSRMatch(MRI, CallerPreservedMask, ArgLocs: OutLocs, OutVals: OutArgs);
984}
985
986bool AArch64CallLowering::isEligibleForTailCallOptimization(
987 MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
988 SmallVectorImpl<ArgInfo> &InArgs,
989 SmallVectorImpl<ArgInfo> &OutArgs) const {
990
991 // Must pass all target-independent checks in order to tail call optimize.
992 if (!Info.IsTailCall)
993 return false;
994
995 CallingConv::ID CalleeCC = Info.CallConv;
996 MachineFunction &MF = MIRBuilder.getMF();
997 const Function &CallerF = MF.getFunction();
998
999 LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
1000
1001 if (Info.SwiftErrorVReg) {
1002 // TODO: We should handle this.
1003 // Note that this is also handled by the check for no outgoing arguments.
1004 // Proactively disabling this though, because the swifterror handling in
1005 // lowerCall inserts a COPY *after* the location of the call.
1006 LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n");
1007 return false;
1008 }
1009
1010 if (!mayTailCallThisCC(CC: CalleeCC)) {
1011 LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
1012 return false;
1013 }
1014
1015 // Byval parameters hand the function a pointer directly into the stack area
1016 // we want to reuse during a tail call. Working around this *is* possible (see
1017 // X86).
1018 //
1019 // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
1020 // it?
1021 //
1022 // On Windows, "inreg" attributes signify non-aggregate indirect returns.
1023 // In this case, it is necessary to save/restore X0 in the callee. Tail
1024 // call opt interferes with this. So we disable tail call opt when the
1025 // caller has an argument with "inreg" attribute.
1026 //
1027 // FIXME: Check whether the callee also has an "inreg" argument.
1028 //
1029 // When the caller has a swifterror argument, we don't want to tail call
1030 // because would have to move into the swifterror register before the
1031 // tail call.
1032 if (any_of(Range: CallerF.args(), P: [](const Argument &A) {
1033 return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr();
1034 })) {
1035 LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, "
1036 "inreg, or swifterror arguments\n");
1037 return false;
1038 }
1039
1040 // Externally-defined functions with weak linkage should not be
1041 // tail-called on AArch64 when the OS does not support dynamic
1042 // pre-emption of symbols, as the AAELF spec requires normal calls
1043 // to undefined weak functions to be replaced with a NOP or jump to the
1044 // next instruction. The behaviour of branch instructions in this
1045 // situation (as used for tail calls) is implementation-defined, so we
1046 // cannot rely on the linker replacing the tail call with a return.
1047 if (Info.Callee.isGlobal()) {
1048 const GlobalValue *GV = Info.Callee.getGlobal();
1049 const Triple &TT = MF.getTarget().getTargetTriple();
1050 if (GV->hasExternalWeakLinkage() &&
1051 (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
1052 TT.isOSBinFormatMachO())) {
1053 LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
1054 "with weak linkage for this OS.\n");
1055 return false;
1056 }
1057 }
1058
1059 // If we have -tailcallopt, then we're done.
1060 if (canGuaranteeTCO(CC: CalleeCC, GuaranteeTailCalls: MF.getTarget().Options.GuaranteedTailCallOpt))
1061 return CalleeCC == CallerF.getCallingConv();
1062
1063 // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
1064 // Try to find cases where we can do that.
1065
1066 // I want anyone implementing a new calling convention to think long and hard
1067 // about this assert.
1068 assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
1069 "Unexpected variadic calling convention");
1070
1071 // Verify that the incoming and outgoing arguments from the callee are
1072 // safe to tail call.
1073 if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
1074 LLVM_DEBUG(
1075 dbgs()
1076 << "... Caller and callee have incompatible calling conventions.\n");
1077 return false;
1078 }
1079
1080 if (!areCalleeOutgoingArgsTailCallable(Info, MF, OrigOutArgs&: OutArgs))
1081 return false;
1082
1083 LLVM_DEBUG(
1084 dbgs() << "... Call is eligible for tail call optimization.\n");
1085 return true;
1086}
1087
1088static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
1089 bool IsTailCall,
1090 std::optional<CallLowering::PtrAuthInfo> &PAI,
1091 MachineRegisterInfo &MRI) {
1092 const AArch64FunctionInfo *FuncInfo = CallerF.getInfo<AArch64FunctionInfo>();
1093
1094 if (!IsTailCall) {
1095 if (!PAI)
1096 return IsIndirect ? getBLRCallOpcode(MF: CallerF) : (unsigned)AArch64::BL;
1097
1098 assert(IsIndirect && "Direct call should not be authenticated");
1099 assert((PAI->Key == AArch64PACKey::IA || PAI->Key == AArch64PACKey::IB) &&
1100 "Invalid auth call key");
1101 return AArch64::BLRA;
1102 }
1103
1104 if (!IsIndirect)
1105 return AArch64::TCRETURNdi;
1106
1107 // When BTI or PAuthLR are enabled, there are restrictions on using x16 and
1108 // x17 to hold the function pointer.
1109 if (FuncInfo->branchTargetEnforcement()) {
1110 if (FuncInfo->branchProtectionPAuthLR()) {
1111 assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR");
1112 return AArch64::TCRETURNrix17;
1113 }
1114 if (PAI)
1115 return AArch64::AUTH_TCRETURN_BTI;
1116 return AArch64::TCRETURNrix16x17;
1117 }
1118
1119 if (FuncInfo->branchProtectionPAuthLR()) {
1120 assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR");
1121 return AArch64::TCRETURNrinotx16;
1122 }
1123
1124 if (PAI)
1125 return AArch64::AUTH_TCRETURN;
1126 return AArch64::TCRETURNri;
1127}
1128
1129static const uint32_t *
1130getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> &OutArgs,
1131 AArch64CallLowering::CallLoweringInfo &Info,
1132 const AArch64RegisterInfo &TRI, MachineFunction &MF) {
1133 const uint32_t *Mask;
1134 if (!OutArgs.empty() && OutArgs[0].Flags[0].isReturned()) {
1135 // For 'this' returns, use the X0-preserving mask if applicable
1136 Mask = TRI.getThisReturnPreservedMask(MF, Info.CallConv);
1137 if (!Mask) {
1138 OutArgs[0].Flags[0].setReturned(false);
1139 Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
1140 }
1141 } else {
1142 Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
1143 }
1144 return Mask;
1145}
1146
1147bool AArch64CallLowering::lowerTailCall(
1148 MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
1149 SmallVectorImpl<ArgInfo> &OutArgs) const {
1150 MachineFunction &MF = MIRBuilder.getMF();
1151 const Function &F = MF.getFunction();
1152 MachineRegisterInfo &MRI = MF.getRegInfo();
1153 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
1154 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1155
1156 // True when we're tail calling, but without -tailcallopt.
1157 bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt &&
1158 Info.CallConv != CallingConv::Tail &&
1159 Info.CallConv != CallingConv::SwiftTail;
1160
1161 // Find out which ABI gets to decide where things go.
1162 CallingConv::ID CalleeCC = Info.CallConv;
1163 CCAssignFn *AssignFnFixed;
1164 CCAssignFn *AssignFnVarArg;
1165 std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) = getAssignFnsForCC(CC: CalleeCC, TLI);
1166
1167 MachineInstrBuilder CallSeqStart;
1168 if (!IsSibCall)
1169 CallSeqStart = MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKDOWN);
1170
1171 unsigned Opc = getCallOpcode(CallerF: MF, IsIndirect: Info.Callee.isReg(), IsTailCall: true, PAI&: Info.PAI, MRI);
1172 auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: Opc);
1173 MIB.add(MO: Info.Callee);
1174
1175 // Tell the call which registers are clobbered.
1176 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1177 auto TRI = Subtarget.getRegisterInfo();
1178
1179 // Byte offset for the tail call. When we are sibcalling, this will always
1180 // be 0.
1181 MIB.addImm(Val: 0);
1182
1183 // Authenticated tail calls always take key/discriminator arguments.
1184 if (Opc == AArch64::AUTH_TCRETURN || Opc == AArch64::AUTH_TCRETURN_BTI) {
1185 assert((Info.PAI->Key == AArch64PACKey::IA ||
1186 Info.PAI->Key == AArch64PACKey::IB) &&
1187 "Invalid auth call key");
1188 MIB.addImm(Val: Info.PAI->Key);
1189
1190 Register AddrDisc = 0;
1191 uint16_t IntDisc = 0;
1192 std::tie(args&: IntDisc, args&: AddrDisc) =
1193 extractPtrauthBlendDiscriminators(Disc: Info.PAI->Discriminator, MRI);
1194
1195 MIB.addImm(Val: IntDisc);
1196 MIB.addUse(RegNo: AddrDisc);
1197 if (AddrDisc != AArch64::NoRegister) {
1198 MIB->getOperand(i: 4).setReg(constrainOperandRegClass(
1199 MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(),
1200 RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB, II: MIB->getDesc(),
1201 RegMO&: MIB->getOperand(i: 4), OpIdx: 4));
1202 }
1203 }
1204
1205 // Tell the call which registers are clobbered.
1206 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
1207 if (Subtarget.hasCustomCallingConv())
1208 TRI->UpdateCustomCallPreservedMask(MF, Mask: &Mask);
1209 MIB.addRegMask(Mask);
1210
1211 if (Info.CFIType)
1212 MIB->setCFIType(MF, Type: Info.CFIType->getZExtValue());
1213
1214 if (TRI->isAnyArgRegReserved(MF))
1215 TRI->emitReservedArgRegCallError(MF);
1216
1217 // FPDiff is the byte offset of the call's argument area from the callee's.
1218 // Stores to callee stack arguments will be placed in FixedStackSlots offset
1219 // by this amount for a tail call. In a sibling call it must be 0 because the
1220 // caller will deallocate the entire stack and the callee still expects its
1221 // arguments to begin at SP+0.
1222 int FPDiff = 0;
1223
1224 // This will be 0 for sibcalls, potentially nonzero for tail calls produced
1225 // by -tailcallopt. For sibcalls, the memory operands for the call are
1226 // already available in the caller's incoming argument space.
1227 unsigned NumBytes = 0;
1228 if (!IsSibCall) {
1229 // We aren't sibcalling, so we need to compute FPDiff. We need to do this
1230 // before handling assignments, because FPDiff must be known for memory
1231 // arguments.
1232 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
1233 SmallVector<CCValAssign, 16> OutLocs;
1234 CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
1235
1236 AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
1237 Subtarget, /*IsReturn*/ false);
1238 if (!determineAssignments(Assigner&: CalleeAssigner, Args&: OutArgs, CCInfo&: OutInfo))
1239 return false;
1240
1241 // The callee will pop the argument stack as a tail call. Thus, we must
1242 // keep it 16-byte aligned.
1243 NumBytes = alignTo(Value: OutInfo.getStackSize(), Align: 16);
1244
1245 // FPDiff will be negative if this tail call requires more space than we
1246 // would automatically have in our incoming argument space. Positive if we
1247 // actually shrink the stack.
1248 FPDiff = NumReusableBytes - NumBytes;
1249
1250 // Update the required reserved area if this is the tail call requiring the
1251 // most argument stack space.
1252 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
1253 FuncInfo->setTailCallReservedStack(-FPDiff);
1254
1255 // The stack pointer must be 16-byte aligned at all times it's used for a
1256 // memory operation, which in practice means at *all* times and in
1257 // particular across call boundaries. Therefore our own arguments started at
1258 // a 16-byte aligned SP and the delta applied for the tail call should
1259 // satisfy the same constraint.
1260 assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
1261 }
1262
1263 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
1264
1265 AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
1266 Subtarget, /*IsReturn*/ false);
1267
1268 // Do the actual argument marshalling.
1269 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB,
1270 /*IsTailCall*/ true, FPDiff);
1271 if (!determineAndHandleAssignments(Handler, Assigner, Args&: OutArgs, MIRBuilder,
1272 CallConv: CalleeCC, IsVarArg: Info.IsVarArg))
1273 return false;
1274
1275 Mask = getMaskForArgs(OutArgs, Info, TRI: *TRI, MF);
1276
1277 if (Info.IsVarArg && Info.IsMustTailCall) {
1278 // Now we know what's being passed to the function. Add uses to the call for
1279 // the forwarded registers that we *aren't* passing as parameters. This will
1280 // preserve the copies we build earlier.
1281 for (const auto &F : Forwards) {
1282 Register ForwardedReg = F.PReg;
1283 // If the register is already passed, or aliases a register which is
1284 // already being passed, then skip it.
1285 if (any_of(Range: MIB->uses(), P: [&ForwardedReg, &TRI](const MachineOperand &Use) {
1286 if (!Use.isReg())
1287 return false;
1288 return TRI->regsOverlap(RegA: Use.getReg(), RegB: ForwardedReg);
1289 }))
1290 continue;
1291
1292 // We aren't passing it already, so we should add it to the call.
1293 MIRBuilder.buildCopy(Res: ForwardedReg, Op: Register(F.VReg));
1294 MIB.addReg(RegNo: ForwardedReg, Flags: RegState::Implicit);
1295 }
1296 }
1297
1298 // If we have -tailcallopt, we need to adjust the stack. We'll do the call
1299 // sequence start and end here.
1300 if (!IsSibCall) {
1301 MIB->getOperand(i: 1).setImm(FPDiff);
1302 CallSeqStart.addImm(Val: 0).addImm(Val: 0);
1303 // End the call sequence *before* emitting the call. Normally, we would
1304 // tidy the frame up after the call. However, here, we've laid out the
1305 // parameters so that when SP is reset, they will be in the correct
1306 // location.
1307 MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKUP).addImm(Val: 0).addImm(Val: 0);
1308 }
1309
1310 // Now we can add the actual call instruction to the correct basic block.
1311 MIRBuilder.insertInstr(MIB);
1312
1313 // If Callee is a reg, since it is used by a target specific instruction,
1314 // it must have a register class matching the constraint of that instruction.
1315 if (MIB->getOperand(i: 0).isReg())
1316 constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(),
1317 RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB,
1318 II: MIB->getDesc(), RegMO&: MIB->getOperand(i: 0), OpIdx: 0);
1319
1320 MF.getFrameInfo().setHasTailCall();
1321 Info.LoweredTailCall = true;
1322 return true;
1323}
1324
1325bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
1326 CallLoweringInfo &Info) const {
1327 MachineFunction &MF = MIRBuilder.getMF();
1328 const Function &F = MF.getFunction();
1329 MachineRegisterInfo &MRI = MF.getRegInfo();
1330 auto &DL = F.getDataLayout();
1331 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
1332 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1333
1334 // Arm64EC has extra requirements for varargs calls; bail out for now.
1335 //
1336 // Arm64EC has special mangling rules for calls; bail out on all calls for
1337 // now.
1338 if (Subtarget.isWindowsArm64EC())
1339 return false;
1340
1341 // Arm64EC thunks have a special calling convention which is only implemented
1342 // in SelectionDAG; bail out for now.
1343 if (Info.CallConv == CallingConv::ARM64EC_Thunk_Native ||
1344 Info.CallConv == CallingConv::ARM64EC_Thunk_X64)
1345 return false;
1346
1347 SmallVector<ArgInfo, 8> OutArgs;
1348 for (auto &OrigArg : Info.OrigArgs) {
1349 splitToValueTypes(OrigArgInfo: OrigArg, SplitArgs&: OutArgs, DL, CallConv: Info.CallConv);
1350 // AAPCS requires that we zero-extend i1 to 8 bits by the caller.
1351 auto &Flags = OrigArg.Flags[0];
1352 if (OrigArg.Ty->isIntegerTy(Bitwidth: 1) && !Flags.isSExt() && !Flags.isZExt()) {
1353 ArgInfo &OutArg = OutArgs.back();
1354 assert(OutArg.Regs.size() == 1 &&
1355 MRI.getType(OutArg.Regs[0]).getSizeInBits() == 1 &&
1356 "Unexpected registers used for i1 arg");
1357
1358 // We cannot use a ZExt ArgInfo flag here, because it will
1359 // zero-extend the argument to i32 instead of just i8.
1360 OutArg.Regs[0] =
1361 MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 8), Op: OutArg.Regs[0]).getReg(Idx: 0);
1362 LLVMContext &Ctx = MF.getFunction().getContext();
1363 OutArg.Ty = Type::getInt8Ty(C&: Ctx);
1364 }
1365 }
1366
1367 SmallVector<ArgInfo, 8> InArgs;
1368 if (!Info.OrigRet.Ty->isVoidTy())
1369 splitToValueTypes(OrigArgInfo: Info.OrigRet, SplitArgs&: InArgs, DL, CallConv: Info.CallConv);
1370
1371 // If we can lower as a tail call, do that instead.
1372 bool CanTailCallOpt =
1373 isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
1374
1375 // We must emit a tail call if we have musttail.
1376 if (Info.IsMustTailCall && !CanTailCallOpt) {
1377 // There are types of incoming/outgoing arguments we can't handle yet, so
1378 // it doesn't make sense to actually die here like in ISelLowering. Instead,
1379 // fall back to SelectionDAG and let it try to handle this.
1380 LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
1381 return false;
1382 }
1383
1384 Info.IsTailCall = CanTailCallOpt;
1385 if (CanTailCallOpt)
1386 return lowerTailCall(MIRBuilder, Info, OutArgs);
1387
1388 // Find out which ABI gets to decide where things go.
1389 CCAssignFn *AssignFnFixed;
1390 CCAssignFn *AssignFnVarArg;
1391 std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) =
1392 getAssignFnsForCC(CC: Info.CallConv, TLI);
1393
1394 MachineInstrBuilder CallSeqStart;
1395 CallSeqStart = MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKDOWN);
1396
1397 // Create a temporarily-floating call instruction so we can add the implicit
1398 // uses of arg registers.
1399
1400 unsigned Opc = 0;
1401 // Calls with operand bundle "clang.arc.attachedcall" are special. They should
1402 // be expanded to the call, directly followed by a special marker sequence and
1403 // a call to an ObjC library function.
1404 if (Info.CB && objcarc::hasAttachedCallOpBundle(CB: Info.CB))
1405 Opc = Info.PAI ? AArch64::BLRA_RVMARKER : AArch64::BLR_RVMARKER;
1406 // A call to a returns twice function like setjmp must be followed by a bti
1407 // instruction.
1408 else if (Info.CB && Info.CB->hasFnAttr(Kind: Attribute::ReturnsTwice) &&
1409 !Subtarget.noBTIAtReturnTwice() &&
1410 MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
1411 Opc = AArch64::BLR_BTI;
1412 else {
1413 // For an intrinsic call (e.g. memset), use GOT if "RtLibUseGOT" (-fno-plt)
1414 // is set.
1415 if (Info.Callee.isSymbol() && F.getParent()->getRtLibUseGOT()) {
1416 auto MIB = MIRBuilder.buildInstr(Opcode: TargetOpcode::G_GLOBAL_VALUE);
1417 DstOp(getLLTForType(Ty&: *F.getType(), DL)).addDefToMIB(MRI, MIB);
1418 MIB.addExternalSymbol(FnName: Info.Callee.getSymbolName(), TargetFlags: AArch64II::MO_GOT);
1419 Info.Callee = MachineOperand::CreateReg(Reg: MIB.getReg(Idx: 0), isDef: false);
1420 }
1421 Opc = getCallOpcode(CallerF: MF, IsIndirect: Info.Callee.isReg(), IsTailCall: false, PAI&: Info.PAI, MRI);
1422 }
1423
1424 auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: Opc);
1425 unsigned CalleeOpNo = 0;
1426
1427 if (Opc == AArch64::BLR_RVMARKER || Opc == AArch64::BLRA_RVMARKER) {
1428 // Add a target global address for the retainRV/claimRV runtime function
1429 // just before the call target.
1430 Function *ARCFn = *objcarc::getAttachedARCFunction(CB: Info.CB);
1431 MIB.addGlobalAddress(GV: ARCFn);
1432 ++CalleeOpNo;
1433
1434 // We may or may not need to emit both the marker and the retain/claim call.
1435 // Tell the pseudo expansion using an additional boolean op.
1436 MIB.addImm(Val: objcarc::attachedCallOpBundleNeedsMarker(CB: Info.CB));
1437 ++CalleeOpNo;
1438 } else if (Info.CFIType) {
1439 MIB->setCFIType(MF, Type: Info.CFIType->getZExtValue());
1440 }
1441 MIB->setDeactivationSymbol(MF, DS: Info.DeactivationSymbol);
1442
1443 MIB.add(MO: Info.Callee);
1444
1445 // Tell the call which registers are clobbered.
1446 const uint32_t *Mask;
1447 const auto *TRI = Subtarget.getRegisterInfo();
1448
1449 AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
1450 Subtarget, /*IsReturn*/ false);
1451 // Do the actual argument marshalling.
1452 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, /*IsReturn*/ false);
1453 if (!determineAndHandleAssignments(Handler, Assigner, Args&: OutArgs, MIRBuilder,
1454 CallConv: Info.CallConv, IsVarArg: Info.IsVarArg))
1455 return false;
1456
1457 Mask = getMaskForArgs(OutArgs, Info, TRI: *TRI, MF);
1458
1459 if (Opc == AArch64::BLRA || Opc == AArch64::BLRA_RVMARKER) {
1460 assert((Info.PAI->Key == AArch64PACKey::IA ||
1461 Info.PAI->Key == AArch64PACKey::IB) &&
1462 "Invalid auth call key");
1463 MIB.addImm(Val: Info.PAI->Key);
1464
1465 Register AddrDisc = 0;
1466 uint16_t IntDisc = 0;
1467 std::tie(args&: IntDisc, args&: AddrDisc) =
1468 extractPtrauthBlendDiscriminators(Disc: Info.PAI->Discriminator, MRI);
1469
1470 MIB.addImm(Val: IntDisc);
1471 MIB.addUse(RegNo: AddrDisc);
1472 if (AddrDisc != AArch64::NoRegister) {
1473 constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(),
1474 RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB,
1475 II: MIB->getDesc(), RegMO&: MIB->getOperand(i: CalleeOpNo + 3),
1476 OpIdx: CalleeOpNo + 3);
1477 }
1478 }
1479
1480 // Tell the call which registers are clobbered.
1481 if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
1482 TRI->UpdateCustomCallPreservedMask(MF, Mask: &Mask);
1483 MIB.addRegMask(Mask);
1484
1485 if (TRI->isAnyArgRegReserved(MF))
1486 TRI->emitReservedArgRegCallError(MF);
1487
1488 // Now we can add the actual call instruction to the correct basic block.
1489 MIRBuilder.insertInstr(MIB);
1490
1491 uint64_t CalleePopBytes =
1492 doesCalleeRestoreStack(CallConv: Info.CallConv,
1493 TailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt)
1494 ? alignTo(Value: Assigner.StackSize, Align: 16)
1495 : 0;
1496
1497 CallSeqStart.addImm(Val: Assigner.StackSize).addImm(Val: 0);
1498 MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKUP)
1499 .addImm(Val: Assigner.StackSize)
1500 .addImm(Val: CalleePopBytes);
1501
1502 // If Callee is a reg, since it is used by a target specific
1503 // instruction, it must have a register class matching the
1504 // constraint of that instruction.
1505 if (MIB->getOperand(i: CalleeOpNo).isReg())
1506 constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *Subtarget.getInstrInfo(),
1507 RBI: *Subtarget.getRegBankInfo(), InsertPt&: *MIB, II: MIB->getDesc(),
1508 RegMO&: MIB->getOperand(i: CalleeOpNo), OpIdx: CalleeOpNo);
1509
1510 // Finally we can copy the returned value back into its virtual-register. In
1511 // symmetry with the arguments, the physical register must be an
1512 // implicit-define of the call instruction.
1513 if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) {
1514 CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(CC: Info.CallConv);
1515 CallReturnHandler Handler(MIRBuilder, MRI, MIB);
1516 bool UsingReturnedArg =
1517 !OutArgs.empty() && OutArgs[0].Flags[0].isReturned();
1518
1519 AArch64OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn, Subtarget,
1520 /*IsReturn*/ false);
1521 ReturnedArgCallReturnHandler ReturnedArgHandler(MIRBuilder, MRI, MIB);
1522 if (!determineAndHandleAssignments(
1523 Handler&: UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, Args&: InArgs,
1524 MIRBuilder, CallConv: Info.CallConv, IsVarArg: Info.IsVarArg,
1525 ThisReturnRegs: UsingReturnedArg ? ArrayRef(OutArgs[0].Regs)
1526 : ArrayRef<Register>()))
1527 return false;
1528 }
1529
1530 if (Info.SwiftErrorVReg) {
1531 MIB.addDef(RegNo: AArch64::X21, Flags: RegState::Implicit);
1532 MIRBuilder.buildCopy(Res: Info.SwiftErrorVReg, Op: Register(AArch64::X21));
1533 }
1534
1535 if (!Info.CanLowerReturn) {
1536 insertSRetLoads(MIRBuilder, RetTy: Info.OrigRet.Ty, VRegs: Info.OrigRet.Regs,
1537 DemoteReg: Info.DemoteRegister, FI: Info.DemoteStackIndex);
1538 }
1539 return true;
1540}
1541
1542bool AArch64CallLowering::isTypeIsValidForThisReturn(EVT Ty) const {
1543 return Ty.getSizeInBits() == 64;
1544}
1545