1//===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the lowering of LLVM calls to machine code calls for
11/// GlobalISel.
12///
13//===----------------------------------------------------------------------===//
14
15#include "AArch64CallLowering.h"
16#include "AArch64GlobalISelUtils.h"
17#include "AArch64ISelLowering.h"
18#include "AArch64MachineFunctionInfo.h"
19#include "AArch64RegisterInfo.h"
20#include "AArch64SMEAttributes.h"
21#include "AArch64Subtarget.h"
22#include "AArch64TargetMachine.h"
23#include "llvm/ADT/ArrayRef.h"
24#include "llvm/ADT/SmallVector.h"
25#include "llvm/Analysis/ObjCARCUtil.h"
26#include "llvm/CodeGen/Analysis.h"
27#include "llvm/CodeGen/CallingConvLower.h"
28#include "llvm/CodeGen/FunctionLoweringInfo.h"
29#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
30#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
31#include "llvm/CodeGen/GlobalISel/Utils.h"
32#include "llvm/CodeGen/LowLevelTypeUtils.h"
33#include "llvm/CodeGen/MachineBasicBlock.h"
34#include "llvm/CodeGen/MachineFrameInfo.h"
35#include "llvm/CodeGen/MachineFunction.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineMemOperand.h"
38#include "llvm/CodeGen/MachineOperand.h"
39#include "llvm/CodeGen/MachineRegisterInfo.h"
40#include "llvm/CodeGen/TargetOpcodes.h"
41#include "llvm/CodeGen/TargetRegisterInfo.h"
42#include "llvm/CodeGen/TargetSubtargetInfo.h"
43#include "llvm/CodeGen/ValueTypes.h"
44#include "llvm/CodeGenTypes/MachineValueType.h"
45#include "llvm/IR/Argument.h"
46#include "llvm/IR/Attributes.h"
47#include "llvm/IR/Function.h"
48#include "llvm/IR/Type.h"
49#include "llvm/IR/Value.h"
50#include <algorithm>
51#include <cassert>
52#include <cstdint>
53
54#define DEBUG_TYPE "aarch64-call-lowering"
55
56using namespace llvm;
57using namespace AArch64GISelUtils;
58
59extern cl::opt<bool> EnableSVEGISel;
60
61AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
62 : CallLowering(&TLI) {}
63
64static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT,
65 MVT &LocVT) {
66 // If ValVT is i1/i8/i16, we should set LocVT to i8/i8/i16. This is a legacy
67 // hack because the DAG calls the assignment function with pre-legalized
68 // register typed values, not the raw type.
69 //
70 // This hack is not applied to return values which are not passed on the
71 // stack.
72 if (OrigVT == MVT::i1 || OrigVT == MVT::i8)
73 ValVT = LocVT = MVT::i8;
74 else if (OrigVT == MVT::i16)
75 ValVT = LocVT = MVT::i16;
76}
77
78// Account for i1/i8/i16 stack passed value hack
79static LLT getStackValueStoreTypeHack(const CCValAssign &VA) {
80 const MVT ValVT = VA.getValVT();
81 return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT)
82 : LLT(VA.getLocVT());
83}
84
85namespace {
86
87struct AArch64IncomingValueAssigner
88 : public CallLowering::IncomingValueAssigner {
89 AArch64IncomingValueAssigner(CCAssignFn *AssignFn_,
90 CCAssignFn *AssignFnVarArg_)
91 : IncomingValueAssigner(AssignFn_, AssignFnVarArg_) {}
92
93 bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
94 CCValAssign::LocInfo LocInfo,
95 const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
96 CCState &State) override {
97 applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
98 return IncomingValueAssigner::assignArg(ValNo, OrigVT, ValVT, LocVT,
99 LocInfo, Info, Flags, State);
100 }
101};
102
103struct AArch64OutgoingValueAssigner
104 : public CallLowering::OutgoingValueAssigner {
105 const AArch64Subtarget &Subtarget;
106
107 /// Track if this is used for a return instead of function argument
108 /// passing. We apply a hack to i1/i8/i16 stack passed values, but do not use
109 /// stack passed returns for them and cannot apply the type adjustment.
110 bool IsReturn;
111
112 AArch64OutgoingValueAssigner(CCAssignFn *AssignFn_,
113 CCAssignFn *AssignFnVarArg_,
114 const AArch64Subtarget &Subtarget_,
115 bool IsReturn)
116 : OutgoingValueAssigner(AssignFn_, AssignFnVarArg_),
117 Subtarget(Subtarget_), IsReturn(IsReturn) {}
118
119 bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
120 CCValAssign::LocInfo LocInfo,
121 const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
122 CCState &State) override {
123 const Function &F = State.getMachineFunction().getFunction();
124 bool IsCalleeWin =
125 Subtarget.isCallingConvWin64(CC: State.getCallingConv(), IsVarArg: F.isVarArg());
126 bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg();
127
128 bool Res;
129 if (!Flags.isVarArg() && !UseVarArgsCCForFixed) {
130 if (!IsReturn)
131 applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
132 Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, Info.Ty, State);
133 } else
134 Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, Info.Ty, State);
135
136 StackSize = State.getStackSize();
137 return Res;
138 }
139};
140
141struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
142 IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
143 : IncomingValueHandler(MIRBuilder, MRI) {}
144
145 Register getStackAddress(uint64_t Size, int64_t Offset,
146 MachinePointerInfo &MPO,
147 ISD::ArgFlagsTy Flags) override {
148 auto &MFI = MIRBuilder.getMF().getFrameInfo();
149
150 // Byval is assumed to be writable memory, but other stack passed arguments
151 // are not.
152 const bool IsImmutable = !Flags.isByVal();
153
154 int FI = MFI.CreateFixedObject(Size, SPOffset: Offset, IsImmutable);
155 MPO = MachinePointerInfo::getFixedStack(MF&: MIRBuilder.getMF(), FI);
156 auto AddrReg = MIRBuilder.buildFrameIndex(Res: LLT::pointer(AddressSpace: 0, SizeInBits: 64), Idx: FI);
157 return AddrReg.getReg(Idx: 0);
158 }
159
160 LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
161 ISD::ArgFlagsTy Flags) const override {
162 // For pointers, we just need to fixup the integer types reported in the
163 // CCValAssign.
164 if (Flags.isPointer())
165 return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
166 return getStackValueStoreTypeHack(VA);
167 }
168
169 void assignValueToReg(Register ValVReg, Register PhysReg,
170 const CCValAssign &VA,
171 ISD::ArgFlagsTy Flags = {}) override {
172 markRegUsed(Reg: PhysReg);
173 IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
174 }
175
176 void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
177 const MachinePointerInfo &MPO,
178 const CCValAssign &VA) override {
179 MachineFunction &MF = MIRBuilder.getMF();
180
181 LLT ValTy(VA.getValVT());
182 LLT LocTy(VA.getLocVT());
183
184 // Fixup the types for the DAG compatibility hack.
185 if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16)
186 std::swap(a&: ValTy, b&: LocTy);
187 else {
188 // The calling code knows if this is a pointer or not, we're only touching
189 // the LocTy for the i8/i16 hack.
190 assert(LocTy.getSizeInBits() == MemTy.getSizeInBits());
191 LocTy = MemTy;
192 }
193
194 auto MMO = MF.getMachineMemOperand(
195 PtrInfo: MPO, f: MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, MemTy: LocTy,
196 base_alignment: inferAlignFromPtrInfo(MF, MPO));
197
198 switch (VA.getLocInfo()) {
199 case CCValAssign::LocInfo::ZExt:
200 MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_ZEXTLOAD, Res: ValVReg, Addr, MMO&: *MMO);
201 return;
202 case CCValAssign::LocInfo::SExt:
203 MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_SEXTLOAD, Res: ValVReg, Addr, MMO&: *MMO);
204 return;
205 default:
206 MIRBuilder.buildLoad(Res: ValVReg, Addr, MMO&: *MMO);
207 return;
208 }
209 }
210
211 /// How the physical register gets marked varies between formal
212 /// parameters (it's a basic-block live-in), and a call instruction
213 /// (it's an implicit-def of the BL).
214 virtual void markRegUsed(Register Reg) = 0;
215};
216
217struct FormalArgHandler : public IncomingArgHandler {
218 FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
219 : IncomingArgHandler(MIRBuilder, MRI) {}
220
221 void markRegUsed(Register Reg) override {
222 MIRBuilder.getMRI()->addLiveIn(Reg: Reg.asMCReg());
223 MIRBuilder.getMBB().addLiveIn(PhysReg: Reg.asMCReg());
224 }
225};
226
227struct CallReturnHandler : public IncomingArgHandler {
228 CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
229 MachineInstrBuilder MIB)
230 : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
231
232 void markRegUsed(Register Reg) override {
233 MIB.addDef(RegNo: Reg, Flags: RegState::Implicit);
234 }
235
236 MachineInstrBuilder MIB;
237};
238
239/// A special return arg handler for "returned" attribute arg calls.
240struct ReturnedArgCallReturnHandler : public CallReturnHandler {
241 ReturnedArgCallReturnHandler(MachineIRBuilder &MIRBuilder,
242 MachineRegisterInfo &MRI,
243 MachineInstrBuilder MIB)
244 : CallReturnHandler(MIRBuilder, MRI, MIB) {}
245
246 void markRegUsed(Register Reg) override {}
247};
248
249struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
250 OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
251 MachineInstrBuilder MIB, bool IsTailCall = false,
252 int FPDiff = 0)
253 : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), IsTailCall(IsTailCall),
254 FPDiff(FPDiff),
255 Subtarget(MIRBuilder.getMF().getSubtarget<AArch64Subtarget>()) {}
256
257 Register getStackAddress(uint64_t Size, int64_t Offset,
258 MachinePointerInfo &MPO,
259 ISD::ArgFlagsTy Flags) override {
260 MachineFunction &MF = MIRBuilder.getMF();
261 LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64);
262 LLT s64 = LLT::scalar(SizeInBits: 64);
263
264 if (IsTailCall) {
265 assert(!Flags.isByVal() && "byval unhandled with tail calls");
266
267 Offset += FPDiff;
268 int FI = MF.getFrameInfo().CreateFixedObject(Size, SPOffset: Offset, IsImmutable: true);
269 auto FIReg = MIRBuilder.buildFrameIndex(Res: p0, Idx: FI);
270 MPO = MachinePointerInfo::getFixedStack(MF, FI);
271 return FIReg.getReg(Idx: 0);
272 }
273
274 if (!SPReg)
275 SPReg = MIRBuilder.buildCopy(Res: p0, Op: Register(AArch64::SP)).getReg(Idx: 0);
276
277 auto OffsetReg = MIRBuilder.buildConstant(Res: s64, Val: Offset);
278
279 auto AddrReg = MIRBuilder.buildPtrAdd(Res: p0, Op0: SPReg, Op1: OffsetReg);
280
281 MPO = MachinePointerInfo::getStack(MF, Offset);
282 return AddrReg.getReg(Idx: 0);
283 }
284
285 /// We need to fixup the reported store size for certain value types because
286 /// we invert the interpretation of ValVT and LocVT in certain cases. This is
287 /// for compatibility with the DAG call lowering implementation, which we're
288 /// currently building on top of.
289 LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
290 ISD::ArgFlagsTy Flags) const override {
291 if (Flags.isPointer())
292 return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
293 return getStackValueStoreTypeHack(VA);
294 }
295
296 void assignValueToReg(Register ValVReg, Register PhysReg,
297 const CCValAssign &VA, ISD::ArgFlagsTy Flags) override {
298 MIB.addUse(RegNo: PhysReg, Flags: RegState::Implicit);
299 Register ExtReg = extendRegister(ValReg: ValVReg, VA);
300 MIRBuilder.buildCopy(Res: PhysReg, Op: ExtReg);
301 }
302
303 /// Check whether a stack argument requires lowering in a tail call.
304 static bool shouldLowerTailCallStackArg(const MachineFunction &MF,
305 const CCValAssign &VA,
306 Register ValVReg,
307 Register StoreAddr) {
308 const MachineRegisterInfo &MRI = MF.getRegInfo();
309 // Print the defining instruction for the value.
310 auto *DefMI = MRI.getVRegDef(Reg: ValVReg);
311 assert(DefMI && "No defining instruction");
312 for (;;) {
313 // Look through nodes that don't alter the bits of the incoming value.
314 unsigned Op = DefMI->getOpcode();
315 if (Op == TargetOpcode::G_ZEXT || Op == TargetOpcode::G_ANYEXT ||
316 Op == TargetOpcode::G_BITCAST || isAssertMI(MI: *DefMI)) {
317 DefMI = MRI.getVRegDef(Reg: DefMI->getOperand(i: 1).getReg());
318 continue;
319 }
320 break;
321 }
322
323 auto *Load = dyn_cast<GLoad>(Val: DefMI);
324 if (!Load)
325 return true;
326 Register LoadReg = Load->getPointerReg();
327 auto *LoadAddrDef = MRI.getVRegDef(Reg: LoadReg);
328 if (LoadAddrDef->getOpcode() != TargetOpcode::G_FRAME_INDEX)
329 return true;
330 const MachineFrameInfo &MFI = MF.getFrameInfo();
331 int LoadFI = LoadAddrDef->getOperand(i: 1).getIndex();
332
333 auto *StoreAddrDef = MRI.getVRegDef(Reg: StoreAddr);
334 if (StoreAddrDef->getOpcode() != TargetOpcode::G_FRAME_INDEX)
335 return true;
336 int StoreFI = StoreAddrDef->getOperand(i: 1).getIndex();
337
338 if (!MFI.isImmutableObjectIndex(ObjectIdx: LoadFI))
339 return true;
340 if (MFI.getObjectOffset(ObjectIdx: LoadFI) != MFI.getObjectOffset(ObjectIdx: StoreFI))
341 return true;
342 if (Load->getMemSize() != MFI.getObjectSize(ObjectIdx: StoreFI))
343 return true;
344
345 return false;
346 }
347
348 void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
349 const MachinePointerInfo &MPO,
350 const CCValAssign &VA) override {
351 MachineFunction &MF = MIRBuilder.getMF();
352 if (!FPDiff && !shouldLowerTailCallStackArg(MF, VA, ValVReg, StoreAddr: Addr))
353 return;
354 auto MMO = MF.getMachineMemOperand(PtrInfo: MPO, f: MachineMemOperand::MOStore, MemTy,
355 base_alignment: inferAlignFromPtrInfo(MF, MPO));
356 MIRBuilder.buildStore(Val: ValVReg, Addr, MMO&: *MMO);
357 }
358
359 void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex,
360 Register Addr, LLT MemTy,
361 const MachinePointerInfo &MPO,
362 const CCValAssign &VA) override {
363 unsigned MaxSize = MemTy.getSizeInBytes() * 8;
364 // For varargs, we always want to extend them to 8 bytes, in which case
365 // we disable setting a max.
366 if (Arg.Flags[0].isVarArg())
367 MaxSize = 0;
368
369 Register ValVReg = Arg.Regs[RegIndex];
370 if (VA.getLocInfo() != CCValAssign::LocInfo::FPExt) {
371 MVT LocVT = VA.getLocVT();
372 MVT ValVT = VA.getValVT();
373
374 if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) {
375 std::swap(a&: ValVT, b&: LocVT);
376 MemTy = LLT(VA.getValVT());
377 }
378
379 ValVReg = extendRegister(ValReg: ValVReg, VA, MaxSizeBits: MaxSize);
380 } else {
381 // The store does not cover the full allocated stack slot.
382 MemTy = LLT(VA.getValVT());
383 }
384
385 assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);
386 }
387
388 MachineInstrBuilder MIB;
389
390 bool IsTailCall;
391
392 /// For tail calls, the byte offset of the call's argument area from the
393 /// callee's. Unused elsewhere.
394 int FPDiff;
395
396 // Cache the SP register vreg if we need it more than once in this call site.
397 Register SPReg;
398
399 const AArch64Subtarget &Subtarget;
400};
401} // namespace
402
403static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
404 return (CallConv == CallingConv::Fast && TailCallOpt) ||
405 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
406}
407
408bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
409 const Value *Val,
410 ArrayRef<Register> VRegs,
411 FunctionLoweringInfo &FLI,
412 Register SwiftErrorVReg) const {
413 auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: AArch64::RET_ReallyLR);
414 assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
415 "Return value without a vreg");
416
417 bool Success = true;
418 if (!FLI.CanLowerReturn) {
419 insertSRetStores(MIRBuilder, RetTy: Val->getType(), VRegs, DemoteReg: FLI.DemoteRegister);
420 } else if (!VRegs.empty()) {
421 MachineFunction &MF = MIRBuilder.getMF();
422 const Function &F = MF.getFunction();
423 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
424
425 MachineRegisterInfo &MRI = MF.getRegInfo();
426 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
427 CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC: F.getCallingConv());
428 auto &DL = F.getDataLayout();
429 LLVMContext &Ctx = Val->getType()->getContext();
430
431 SmallVector<EVT, 4> SplitEVTs;
432 ComputeValueVTs(TLI, DL, Ty: Val->getType(), ValueVTs&: SplitEVTs);
433 assert(VRegs.size() == SplitEVTs.size() &&
434 "For each split Type there should be exactly one VReg.");
435
436 SmallVector<ArgInfo, 8> SplitArgs;
437 CallingConv::ID CC = F.getCallingConv();
438
439 for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
440 Register CurVReg = VRegs[i];
441 ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Context&: Ctx), 0};
442 setArgFlags(Arg&: CurArgInfo, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: F);
443
444 // i1 is a special case because SDAG i1 true is naturally zero extended
445 // when widened using ANYEXT. We need to do it explicitly here.
446 auto &Flags = CurArgInfo.Flags[0];
447 if (MRI.getType(Reg: CurVReg).getSizeInBits() == TypeSize::getFixed(ExactSize: 1) &&
448 !Flags.isSExt() && !Flags.isZExt()) {
449 CurVReg = MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 8), Op: CurVReg).getReg(Idx: 0);
450 } else if (TLI.getNumRegistersForCallingConv(Context&: Ctx, CC, VT: SplitEVTs[i]) ==
451 1) {
452 // Some types will need extending as specified by the CC.
453 MVT NewVT = TLI.getRegisterTypeForCallingConv(Context&: Ctx, CC, VT: SplitEVTs[i]);
454 if (EVT(NewVT) != SplitEVTs[i]) {
455 unsigned ExtendOp = TargetOpcode::G_ANYEXT;
456 if (F.getAttributes().hasRetAttr(Kind: Attribute::SExt))
457 ExtendOp = TargetOpcode::G_SEXT;
458 else if (F.getAttributes().hasRetAttr(Kind: Attribute::ZExt))
459 ExtendOp = TargetOpcode::G_ZEXT;
460
461 LLT NewLLT(NewVT);
462 LLT OldLLT = getLLTForType(Ty&: *CurArgInfo.Ty, DL);
463 CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Context&: Ctx);
464 // Instead of an extend, we might have a vector type which needs
465 // padding with more elements, e.g. <2 x half> -> <4 x half>.
466 if (NewVT.isVector()) {
467 if (OldLLT.isVector()) {
468 if (NewLLT.getNumElements() > OldLLT.getNumElements()) {
469 CurVReg =
470 MIRBuilder.buildPadVectorWithUndefElements(Res: NewLLT, Op0: CurVReg)
471 .getReg(Idx: 0);
472 } else {
473 // Just do a vector extend.
474 CurVReg = MIRBuilder.buildInstr(Opc: ExtendOp, DstOps: {NewLLT}, SrcOps: {CurVReg})
475 .getReg(Idx: 0);
476 }
477 } else if (NewLLT.getNumElements() >= 2 &&
478 NewLLT.getNumElements() <= 8) {
479 // We need to pad a <1 x S> type to <2/4/8 x S>. Since we don't
480 // have <1 x S> vector types in GISel we use a build_vector
481 // instead of a vector merge/concat.
482 CurVReg =
483 MIRBuilder.buildPadVectorWithUndefElements(Res: NewLLT, Op0: CurVReg)
484 .getReg(Idx: 0);
485 } else {
486 LLVM_DEBUG(dbgs() << "Could not handle ret ty\n");
487 return false;
488 }
489 } else {
490 // If the split EVT was a <1 x T> vector, and NewVT is T, then we
491 // don't have to do anything since we don't distinguish between the
492 // two.
493 if (NewLLT != MRI.getType(Reg: CurVReg)) {
494 // A scalar extend.
495 CurVReg = MIRBuilder.buildInstr(Opc: ExtendOp, DstOps: {NewLLT}, SrcOps: {CurVReg})
496 .getReg(Idx: 0);
497 }
498 }
499 }
500 }
501 if (CurVReg != CurArgInfo.Regs[0]) {
502 CurArgInfo.Regs[0] = CurVReg;
503 // Reset the arg flags after modifying CurVReg.
504 setArgFlags(Arg&: CurArgInfo, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: F);
505 }
506 splitToValueTypes(OrigArgInfo: CurArgInfo, SplitArgs, DL, CallConv: CC);
507 }
508
509 AArch64OutgoingValueAssigner Assigner(AssignFn, AssignFn, Subtarget,
510 /*IsReturn*/ true);
511 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
512 Success = determineAndHandleAssignments(Handler, Assigner, Args&: SplitArgs,
513 MIRBuilder, CallConv: CC, IsVarArg: F.isVarArg());
514 }
515
516 if (SwiftErrorVReg) {
517 MIB.addUse(RegNo: AArch64::X21, Flags: RegState::Implicit);
518 MIRBuilder.buildCopy(Res: AArch64::X21, Op: SwiftErrorVReg);
519 }
520
521 MIRBuilder.insertInstr(MIB);
522 return Success;
523}
524
525bool AArch64CallLowering::canLowerReturn(MachineFunction &MF,
526 CallingConv::ID CallConv,
527 SmallVectorImpl<BaseArgInfo> &Outs,
528 bool IsVarArg) const {
529 SmallVector<CCValAssign, 16> ArgLocs;
530 const auto &TLI = *getTLI<AArch64TargetLowering>();
531 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
532 MF.getFunction().getContext());
533
534 return checkReturn(CCInfo, Outs, Fn: TLI.CCAssignFnForReturn(CC: CallConv));
535}
536
537/// Helper function to compute forwarded registers for musttail calls. Computes
538/// the forwarded registers, sets MBB liveness, and emits COPY instructions that
539/// can be used to save + restore registers later.
540static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
541 CCAssignFn *AssignFn) {
542 MachineBasicBlock &MBB = MIRBuilder.getMBB();
543 MachineFunction &MF = MIRBuilder.getMF();
544 MachineFrameInfo &MFI = MF.getFrameInfo();
545
546 if (!MFI.hasMustTailInVarArgFunc())
547 return;
548
549 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
550 const Function &F = MF.getFunction();
551 assert(F.isVarArg() && "Expected F to be vararg?");
552
553 // Compute the set of forwarded registers. The rest are scratch.
554 SmallVector<CCValAssign, 16> ArgLocs;
555 CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs,
556 F.getContext());
557 SmallVector<MVT, 2> RegParmTypes;
558 RegParmTypes.push_back(Elt: MVT::i64);
559 RegParmTypes.push_back(Elt: MVT::f128);
560
561 // Later on, we can use this vector to restore the registers if necessary.
562 SmallVectorImpl<ForwardedRegister> &Forwards =
563 FuncInfo->getForwardedMustTailRegParms();
564 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, Fn: AssignFn);
565
566 // Conservatively forward X8, since it might be used for an aggregate
567 // return.
568 if (!CCInfo.isAllocated(Reg: AArch64::X8)) {
569 Register X8VReg = MF.addLiveIn(PReg: AArch64::X8, RC: &AArch64::GPR64RegClass);
570 Forwards.push_back(Elt: ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
571 }
572
573 // Add the forwards to the MachineBasicBlock and MachineFunction.
574 for (const auto &F : Forwards) {
575 MBB.addLiveIn(PhysReg: F.PReg);
576 MIRBuilder.buildCopy(Res: Register(F.VReg), Op: Register(F.PReg));
577 }
578}
579
580bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
581 auto &F = MF.getFunction();
582 const auto &TM = static_cast<const AArch64TargetMachine &>(MF.getTarget());
583
584 const bool GlobalISelFlag =
585 getCGPassBuilderOption().EnableGlobalISelOption.value_or(u: false);
586
587 auto OptLevel = MF.getTarget().getOptLevel();
588 auto EnableGlobalISelAtO = TM.getEnableGlobalISelAtO();
589
590 // GlobalISel is currently only enabled when the opt level is less than or
591 // equal to EnableGlobalISelAt or it was explicitly enabled via the CLI. If we
592 // encounter this check, we know GlobalISel was enabled. If not by these two,
593 // it must have been used as part of the SDAG pipeline to use GlobalISel for
594 // optnone.
595 if (static_cast<unsigned>(OptLevel) > EnableGlobalISelAtO && !GlobalISelFlag)
596 return !F.hasOptNone();
597
598 if (!EnableSVEGISel && (F.getReturnType()->isScalableTy() ||
599 llvm::any_of(Range: F.args(), P: [](const Argument &A) {
600 return A.getType()->isScalableTy();
601 })))
602 return true;
603 const auto &ST = MF.getSubtarget<AArch64Subtarget>();
604 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
605 LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n");
606 return true;
607 }
608
609 SMEAttrs Attrs = MF.getInfo<AArch64FunctionInfo>()->getSMEFnAttrs();
610 if (Attrs.hasZAState() || Attrs.hasZT0State() ||
611 Attrs.hasStreamingInterfaceOrBody() ||
612 Attrs.hasStreamingCompatibleInterface())
613 return true;
614
615 return false;
616}
617
618void AArch64CallLowering::saveVarArgRegisters(
619 MachineIRBuilder &MIRBuilder, CallLowering::IncomingValueHandler &Handler,
620 CCState &CCInfo) const {
621 auto GPRArgRegs = AArch64::getGPRArgRegs();
622 auto FPRArgRegs = AArch64::getFPRArgRegs();
623
624 MachineFunction &MF = MIRBuilder.getMF();
625 MachineRegisterInfo &MRI = MF.getRegInfo();
626 MachineFrameInfo &MFI = MF.getFrameInfo();
627 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
628 auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
629 bool IsWin64CC = Subtarget.isCallingConvWin64(CC: CCInfo.getCallingConv(),
630 IsVarArg: MF.getFunction().isVarArg());
631 const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64);
632 const LLT s64 = LLT::scalar(SizeInBits: 64);
633
634 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(Regs: GPRArgRegs);
635 unsigned NumVariadicGPRArgRegs = GPRArgRegs.size() - FirstVariadicGPR + 1;
636
637 unsigned GPRSaveSize = 8 * (GPRArgRegs.size() - FirstVariadicGPR);
638 int GPRIdx = 0;
639 if (GPRSaveSize != 0) {
640 if (IsWin64CC) {
641 GPRIdx = MFI.CreateFixedObject(Size: GPRSaveSize,
642 SPOffset: -static_cast<int>(GPRSaveSize), IsImmutable: false);
643 if (GPRSaveSize & 15)
644 // The extra size here, if triggered, will always be 8.
645 MFI.CreateFixedObject(Size: 16 - (GPRSaveSize & 15),
646 SPOffset: -static_cast<int>(alignTo(Value: GPRSaveSize, Align: 16)),
647 IsImmutable: false);
648 } else
649 GPRIdx = MFI.CreateStackObject(Size: GPRSaveSize, Alignment: Align(8), isSpillSlot: false);
650
651 auto FIN = MIRBuilder.buildFrameIndex(Res: p0, Idx: GPRIdx);
652 auto Offset =
653 MIRBuilder.buildConstant(Res: MRI.createGenericVirtualRegister(Ty: s64), Val: 8);
654
655 for (unsigned i = FirstVariadicGPR; i < GPRArgRegs.size(); ++i) {
656 Register Val = MRI.createGenericVirtualRegister(Ty: s64);
657 Handler.assignValueToReg(
658 ValVReg: Val, PhysReg: GPRArgRegs[i],
659 VA: CCValAssign::getReg(ValNo: i + MF.getFunction().getNumOperands(), ValVT: MVT::i64,
660 Reg: GPRArgRegs[i], LocVT: MVT::i64, HTP: CCValAssign::Full));
661 auto MPO = IsWin64CC ? MachinePointerInfo::getFixedStack(
662 MF, FI: GPRIdx, Offset: (i - FirstVariadicGPR) * 8)
663 : MachinePointerInfo::getStack(MF, Offset: i * 8);
664 MIRBuilder.buildStore(Val, Addr: FIN, PtrInfo: MPO, Alignment: inferAlignFromPtrInfo(MF, MPO));
665
666 FIN = MIRBuilder.buildPtrAdd(Res: MRI.createGenericVirtualRegister(Ty: p0),
667 Op0: FIN.getReg(Idx: 0), Op1: Offset);
668 }
669 }
670 FuncInfo->setVarArgsGPRIndex(GPRIdx);
671 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
672
673 if (Subtarget.hasFPARMv8() && !IsWin64CC) {
674 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(Regs: FPRArgRegs);
675
676 unsigned FPRSaveSize = 16 * (FPRArgRegs.size() - FirstVariadicFPR);
677 int FPRIdx = 0;
678 if (FPRSaveSize != 0) {
679 FPRIdx = MFI.CreateStackObject(Size: FPRSaveSize, Alignment: Align(16), isSpillSlot: false);
680
681 auto FIN = MIRBuilder.buildFrameIndex(Res: p0, Idx: FPRIdx);
682 auto Offset =
683 MIRBuilder.buildConstant(Res: MRI.createGenericVirtualRegister(Ty: s64), Val: 16);
684
685 for (unsigned i = FirstVariadicFPR; i < FPRArgRegs.size(); ++i) {
686 Register Val = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 128));
687 Handler.assignValueToReg(
688 ValVReg: Val, PhysReg: FPRArgRegs[i],
689 VA: CCValAssign::getReg(
690 ValNo: i + MF.getFunction().getNumOperands() + NumVariadicGPRArgRegs,
691 ValVT: MVT::f128, Reg: FPRArgRegs[i], LocVT: MVT::f128, HTP: CCValAssign::Full));
692
693 auto MPO = MachinePointerInfo::getStack(MF, Offset: i * 16);
694 MIRBuilder.buildStore(Val, Addr: FIN, PtrInfo: MPO, Alignment: inferAlignFromPtrInfo(MF, MPO));
695
696 FIN = MIRBuilder.buildPtrAdd(Res: MRI.createGenericVirtualRegister(Ty: p0),
697 Op0: FIN.getReg(Idx: 0), Op1: Offset);
698 }
699 }
700 FuncInfo->setVarArgsFPRIndex(FPRIdx);
701 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
702 }
703}
704
705bool AArch64CallLowering::lowerFormalArguments(
706 MachineIRBuilder &MIRBuilder, const Function &F,
707 ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const {
708 MachineFunction &MF = MIRBuilder.getMF();
709 MachineBasicBlock &MBB = MIRBuilder.getMBB();
710 MachineRegisterInfo &MRI = MF.getRegInfo();
711 auto &DL = F.getDataLayout();
712 auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
713
714 // Arm64EC has extra requirements for varargs calls which are only implemented
715 // in SelectionDAG; bail out for now.
716 if (F.isVarArg() && Subtarget.isWindowsArm64EC())
717 return false;
718
719 // Arm64EC thunks have a special calling convention which is only implemented
720 // in SelectionDAG; bail out for now.
721 if (F.getCallingConv() == CallingConv::ARM64EC_Thunk_Native ||
722 F.getCallingConv() == CallingConv::ARM64EC_Thunk_X64)
723 return false;
724
725 bool IsWin64 =
726 Subtarget.isCallingConvWin64(CC: F.getCallingConv(), IsVarArg: F.isVarArg()) &&
727 !Subtarget.isWindowsArm64EC();
728
729 SmallVector<ArgInfo, 8> SplitArgs;
730 SmallVector<std::pair<Register, Register>> BoolArgs;
731
732 // Insert the hidden sret parameter if the return value won't fit in the
733 // return registers.
734 if (!FLI.CanLowerReturn)
735 insertSRetIncomingArgument(F, SplitArgs, DemoteReg&: FLI.DemoteRegister, MRI, DL);
736
737 unsigned i = 0;
738 for (auto &Arg : F.args()) {
739 if (DL.getTypeStoreSize(Ty: Arg.getType()).isZero())
740 continue;
741
742 ArgInfo OrigArg{VRegs[i], Arg, i};
743 setArgFlags(Arg&: OrigArg, OpIdx: i + AttributeList::FirstArgIndex, DL, FuncInfo: F);
744
745 // i1 arguments are zero-extended to i8 by the caller. Emit a
746 // hint to reflect this.
747 if (OrigArg.Ty->isIntegerTy(Bitwidth: 1)) {
748 assert(OrigArg.Regs.size() == 1 &&
749 MRI.getType(OrigArg.Regs[0]).getSizeInBits() == 1 &&
750 "Unexpected registers used for i1 arg");
751
752 auto &Flags = OrigArg.Flags[0];
753 if (!Flags.isZExt() && !Flags.isSExt()) {
754 // Lower i1 argument as i8, and insert AssertZExt + Trunc later.
755 Register OrigReg = OrigArg.Regs[0];
756 Register WideReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 8));
757 OrigArg.Regs[0] = WideReg;
758 BoolArgs.push_back(Elt: {OrigReg, WideReg});
759 }
760 }
761
762 if (Arg.hasAttribute(Kind: Attribute::SwiftAsync))
763 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
764
765 splitToValueTypes(OrigArgInfo: OrigArg, SplitArgs, DL, CallConv: F.getCallingConv());
766 ++i;
767 }
768
769 if (!MBB.empty())
770 MIRBuilder.setInstr(*MBB.begin());
771
772 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
773 CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC: F.getCallingConv(), IsVarArg: IsWin64 && F.isVarArg());
774
775 AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn);
776 FormalArgHandler Handler(MIRBuilder, MRI);
777 SmallVector<CCValAssign, 16> ArgLocs;
778 CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
779 if (!determineAssignments(Assigner, Args&: SplitArgs, CCInfo) ||
780 !handleAssignments(Handler, Args&: SplitArgs, CCState&: CCInfo, ArgLocs, MIRBuilder))
781 return false;
782
783 if (!BoolArgs.empty()) {
784 for (auto &KV : BoolArgs) {
785 Register OrigReg = KV.first;
786 Register WideReg = KV.second;
787 LLT WideTy = MRI.getType(Reg: WideReg);
788 assert(MRI.getType(OrigReg).getScalarSizeInBits() == 1 &&
789 "Unexpected bit size of a bool arg");
790 MIRBuilder.buildTrunc(
791 Res: OrigReg, Op: MIRBuilder.buildAssertZExt(Res: WideTy, Op: WideReg, Size: 1).getReg(Idx: 0));
792 }
793 }
794
795 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
796 uint64_t StackSize = Assigner.StackSize;
797 if (F.isVarArg()) {
798 if ((!Subtarget.isTargetDarwin() && !Subtarget.isWindowsArm64EC()) || IsWin64) {
799 // The AAPCS variadic function ABI is identical to the non-variadic
800 // one. As a result there may be more arguments in registers and we should
801 // save them for future reference.
802 // Win64 variadic functions also pass arguments in registers, but all
803 // float arguments are passed in integer registers.
804 saveVarArgRegisters(MIRBuilder, Handler, CCInfo);
805 } else if (Subtarget.isWindowsArm64EC()) {
806 return false;
807 }
808
809 // We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
810 StackSize = alignTo(Value: Assigner.StackSize, Align: Subtarget.isTargetILP32() ? 4 : 8);
811
812 auto &MFI = MIRBuilder.getMF().getFrameInfo();
813 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(Size: 4, SPOffset: StackSize, IsImmutable: true));
814 }
815
816 if (doesCalleeRestoreStack(CallConv: F.getCallingConv(),
817 TailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt)) {
818 // We have a non-standard ABI, so why not make full use of the stack that
819 // we're going to pop? It must be aligned to 16 B in any case.
820 StackSize = alignTo(Value: StackSize, Align: 16);
821
822 // If we're expected to restore the stack (e.g. fastcc), then we'll be
823 // adding a multiple of 16.
824 FuncInfo->setArgumentStackToRestore(StackSize);
825
826 // Our own callers will guarantee that the space is free by giving an
827 // aligned value to CALLSEQ_START.
828 }
829
830 // When we tail call, we need to check if the callee's arguments
831 // will fit on the caller's stack. So, whenever we lower formal arguments,
832 // we should keep track of this information, since we might lower a tail call
833 // in this function later.
834 FuncInfo->setBytesInStackArgArea(StackSize);
835
836 if (Subtarget.hasCustomCallingConv())
837 Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
838
839 handleMustTailForwardedRegisters(MIRBuilder, AssignFn);
840
841 // Move back to the end of the basic block.
842 MIRBuilder.setMBB(MBB);
843
844 return true;
845}
846
847/// Return true if the calling convention is one that we can guarantee TCO for.
848static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
849 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
850 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
851}
852
853/// Return true if we might ever do TCO for calls with this calling convention.
854static bool mayTailCallThisCC(CallingConv::ID CC) {
855 switch (CC) {
856 case CallingConv::C:
857 case CallingConv::PreserveMost:
858 case CallingConv::PreserveAll:
859 case CallingConv::PreserveNone:
860 case CallingConv::Swift:
861 case CallingConv::SwiftTail:
862 case CallingConv::Tail:
863 case CallingConv::Fast:
864 return true;
865 default:
866 return false;
867 }
868}
869
870/// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
871/// CC.
872static std::pair<CCAssignFn *, CCAssignFn *>
873getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) {
874 return {TLI.CCAssignFnForCall(CC, IsVarArg: false), TLI.CCAssignFnForCall(CC, IsVarArg: true)};
875}
876
877bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay(
878 CallLoweringInfo &Info, MachineFunction &MF,
879 SmallVectorImpl<ArgInfo> &InArgs) const {
880 const Function &CallerF = MF.getFunction();
881 CallingConv::ID CalleeCC = Info.CallConv;
882 CallingConv::ID CallerCC = CallerF.getCallingConv();
883
884 // If the calling conventions match, then everything must be the same.
885 if (CalleeCC == CallerCC)
886 return true;
887
888 // Check if the caller and callee will handle arguments in the same way.
889 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
890 CCAssignFn *CalleeAssignFnFixed;
891 CCAssignFn *CalleeAssignFnVarArg;
892 std::tie(args&: CalleeAssignFnFixed, args&: CalleeAssignFnVarArg) =
893 getAssignFnsForCC(CC: CalleeCC, TLI);
894
895 CCAssignFn *CallerAssignFnFixed;
896 CCAssignFn *CallerAssignFnVarArg;
897 std::tie(args&: CallerAssignFnFixed, args&: CallerAssignFnVarArg) =
898 getAssignFnsForCC(CC: CallerCC, TLI);
899
900 AArch64IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed,
901 CalleeAssignFnVarArg);
902 AArch64IncomingValueAssigner CallerAssigner(CallerAssignFnFixed,
903 CallerAssignFnVarArg);
904
905 if (!resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner))
906 return false;
907
908 // Make sure that the caller and callee preserve all of the same registers.
909 auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
910 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
911 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
912 if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) {
913 TRI->UpdateCustomCallPreservedMask(MF, Mask: &CallerPreserved);
914 TRI->UpdateCustomCallPreservedMask(MF, Mask: &CalleePreserved);
915 }
916
917 return TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved);
918}
919
920bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
921 CallLoweringInfo &Info, MachineFunction &MF,
922 SmallVectorImpl<ArgInfo> &OrigOutArgs) const {
923 // If there are no outgoing arguments, then we are done.
924 if (OrigOutArgs.empty())
925 return true;
926
927 const Function &CallerF = MF.getFunction();
928 LLVMContext &Ctx = CallerF.getContext();
929 CallingConv::ID CalleeCC = Info.CallConv;
930 CallingConv::ID CallerCC = CallerF.getCallingConv();
931 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
932 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
933
934 CCAssignFn *AssignFnFixed;
935 CCAssignFn *AssignFnVarArg;
936 std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) = getAssignFnsForCC(CC: CalleeCC, TLI);
937
938 // We have outgoing arguments. Make sure that we can tail call with them.
939 SmallVector<CCValAssign, 16> OutLocs;
940 CCState OutInfo(CalleeCC, false, MF, OutLocs, Ctx);
941
942 AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
943 Subtarget, /*IsReturn*/ false);
944 // determineAssignments() may modify argument flags, so make a copy.
945 SmallVector<ArgInfo, 8> OutArgs;
946 append_range(C&: OutArgs, R&: OrigOutArgs);
947 if (!determineAssignments(Assigner&: CalleeAssigner, Args&: OutArgs, CCInfo&: OutInfo)) {
948 LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
949 return false;
950 }
951
952 // Make sure that they can fit on the caller's stack.
953 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
954 if (OutInfo.getStackSize() > FuncInfo->getBytesInStackArgArea()) {
955 LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
956 return false;
957 }
958
959 // Verify that the parameters in callee-saved registers match.
960 // TODO: Port this over to CallLowering as general code once swiftself is
961 // supported.
962 auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
963 const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
964 MachineRegisterInfo &MRI = MF.getRegInfo();
965
966 if (Info.IsVarArg) {
967 // Be conservative and disallow variadic memory operands to match SDAG's
968 // behaviour.
969 // FIXME: If the caller's calling convention is C, then we can
970 // potentially use its argument area. However, for cases like fastcc,
971 // we can't do anything.
972 for (unsigned i = 0; i < OutLocs.size(); ++i) {
973 auto &ArgLoc = OutLocs[i];
974 if (ArgLoc.isRegLoc())
975 continue;
976
977 LLVM_DEBUG(
978 dbgs()
979 << "... Cannot tail call vararg function with stack arguments\n");
980 return false;
981 }
982 }
983
984 return parametersInCSRMatch(MRI, CallerPreservedMask, ArgLocs: OutLocs, OutVals: OutArgs);
985}
986
987bool AArch64CallLowering::isEligibleForTailCallOptimization(
988 MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
989 SmallVectorImpl<ArgInfo> &InArgs,
990 SmallVectorImpl<ArgInfo> &OutArgs) const {
991
992 // Must pass all target-independent checks in order to tail call optimize.
993 if (!Info.IsTailCall)
994 return false;
995
996 CallingConv::ID CalleeCC = Info.CallConv;
997 MachineFunction &MF = MIRBuilder.getMF();
998 const Function &CallerF = MF.getFunction();
999
1000 LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
1001
1002 if (Info.SwiftErrorVReg) {
1003 // TODO: We should handle this.
1004 // Note that this is also handled by the check for no outgoing arguments.
1005 // Proactively disabling this though, because the swifterror handling in
1006 // lowerCall inserts a COPY *after* the location of the call.
1007 LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n");
1008 return false;
1009 }
1010
1011 if (!mayTailCallThisCC(CC: CalleeCC)) {
1012 LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
1013 return false;
1014 }
1015
1016 // Byval parameters hand the function a pointer directly into the stack area
1017 // we want to reuse during a tail call. Working around this *is* possible (see
1018 // X86).
1019 //
1020 // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
1021 // it?
1022 //
1023 // On Windows, "inreg" attributes signify non-aggregate indirect returns.
1024 // In this case, it is necessary to save/restore X0 in the callee. Tail
1025 // call opt interferes with this. So we disable tail call opt when the
1026 // caller has an argument with "inreg" attribute.
1027 //
1028 // FIXME: Check whether the callee also has an "inreg" argument.
1029 //
1030 // When the caller has a swifterror argument, we don't want to tail call
1031 // because would have to move into the swifterror register before the
1032 // tail call.
1033 if (any_of(Range: CallerF.args(), P: [](const Argument &A) {
1034 return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr();
1035 })) {
1036 LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, "
1037 "inreg, or swifterror arguments\n");
1038 return false;
1039 }
1040
1041 // Externally-defined functions with weak linkage should not be
1042 // tail-called on AArch64 when the OS does not support dynamic
1043 // pre-emption of symbols, as the AAELF spec requires normal calls
1044 // to undefined weak functions to be replaced with a NOP or jump to the
1045 // next instruction. The behaviour of branch instructions in this
1046 // situation (as used for tail calls) is implementation-defined, so we
1047 // cannot rely on the linker replacing the tail call with a return.
1048 if (Info.Callee.isGlobal()) {
1049 const GlobalValue *GV = Info.Callee.getGlobal();
1050 const Triple &TT = MF.getTarget().getTargetTriple();
1051 if (GV->hasExternalWeakLinkage() &&
1052 (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
1053 TT.isOSBinFormatMachO())) {
1054 LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
1055 "with weak linkage for this OS.\n");
1056 return false;
1057 }
1058 }
1059
1060 // If we have -tailcallopt, then we're done.
1061 if (canGuaranteeTCO(CC: CalleeCC, GuaranteeTailCalls: MF.getTarget().Options.GuaranteedTailCallOpt))
1062 return CalleeCC == CallerF.getCallingConv();
1063
1064 // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
1065 // Try to find cases where we can do that.
1066
1067 // I want anyone implementing a new calling convention to think long and hard
1068 // about this assert.
1069 assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
1070 "Unexpected variadic calling convention");
1071
1072 // Verify that the incoming and outgoing arguments from the callee are
1073 // safe to tail call.
1074 if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
1075 LLVM_DEBUG(
1076 dbgs()
1077 << "... Caller and callee have incompatible calling conventions.\n");
1078 return false;
1079 }
1080
1081 if (!areCalleeOutgoingArgsTailCallable(Info, MF, OrigOutArgs&: OutArgs))
1082 return false;
1083
1084 LLVM_DEBUG(
1085 dbgs() << "... Call is eligible for tail call optimization.\n");
1086 return true;
1087}
1088
1089static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
1090 bool IsTailCall,
1091 std::optional<CallLowering::PtrAuthInfo> &PAI,
1092 MachineRegisterInfo &MRI) {
1093 const AArch64FunctionInfo *FuncInfo = CallerF.getInfo<AArch64FunctionInfo>();
1094
1095 if (!IsTailCall) {
1096 if (!PAI)
1097 return IsIndirect ? getBLRCallOpcode(MF: CallerF) : (unsigned)AArch64::BL;
1098
1099 assert(IsIndirect && "Direct call should not be authenticated");
1100 assert((PAI->Key == AArch64PACKey::IA || PAI->Key == AArch64PACKey::IB) &&
1101 "Invalid auth call key");
1102 return AArch64::BLRA;
1103 }
1104
1105 if (!IsIndirect)
1106 return AArch64::TCRETURNdi;
1107
1108 // When BTI or PAuthLR are enabled, there are restrictions on using x16 and
1109 // x17 to hold the function pointer.
1110 if (FuncInfo->branchTargetEnforcement()) {
1111 if (FuncInfo->branchProtectionPAuthLR()) {
1112 assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR");
1113 return AArch64::TCRETURNrix17;
1114 }
1115 if (PAI)
1116 return AArch64::AUTH_TCRETURN_BTI;
1117 return AArch64::TCRETURNrix16x17;
1118 }
1119
1120 if (FuncInfo->branchProtectionPAuthLR()) {
1121 assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR");
1122 return AArch64::TCRETURNrinotx16;
1123 }
1124
1125 if (PAI)
1126 return AArch64::AUTH_TCRETURN;
1127 return AArch64::TCRETURNri;
1128}
1129
1130static const uint32_t *
1131getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> &OutArgs,
1132 AArch64CallLowering::CallLoweringInfo &Info,
1133 const AArch64RegisterInfo &TRI, MachineFunction &MF) {
1134 const uint32_t *Mask;
1135 if (!OutArgs.empty() && OutArgs[0].Flags[0].isReturned()) {
1136 // For 'this' returns, use the X0-preserving mask if applicable
1137 Mask = TRI.getThisReturnPreservedMask(MF, Info.CallConv);
1138 if (!Mask) {
1139 OutArgs[0].Flags[0].setReturned(false);
1140 Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
1141 }
1142 } else {
1143 Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
1144 }
1145 return Mask;
1146}
1147
1148bool AArch64CallLowering::lowerTailCall(
1149 MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
1150 SmallVectorImpl<ArgInfo> &OutArgs) const {
1151 MachineFunction &MF = MIRBuilder.getMF();
1152 const Function &F = MF.getFunction();
1153 MachineRegisterInfo &MRI = MF.getRegInfo();
1154 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
1155 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1156
1157 // True when we're tail calling, but without -tailcallopt.
1158 bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt &&
1159 Info.CallConv != CallingConv::Tail &&
1160 Info.CallConv != CallingConv::SwiftTail;
1161
1162 // Find out which ABI gets to decide where things go.
1163 CallingConv::ID CalleeCC = Info.CallConv;
1164 CCAssignFn *AssignFnFixed;
1165 CCAssignFn *AssignFnVarArg;
1166 std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) = getAssignFnsForCC(CC: CalleeCC, TLI);
1167
1168 MachineInstrBuilder CallSeqStart;
1169 if (!IsSibCall)
1170 CallSeqStart = MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKDOWN);
1171
1172 unsigned Opc = getCallOpcode(CallerF: MF, IsIndirect: Info.Callee.isReg(), IsTailCall: true, PAI&: Info.PAI, MRI);
1173 auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: Opc);
1174 MIB.add(MO: Info.Callee);
1175
1176 // Tell the call which registers are clobbered.
1177 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1178 auto TRI = Subtarget.getRegisterInfo();
1179
1180 // Byte offset for the tail call. When we are sibcalling, this will always
1181 // be 0.
1182 MIB.addImm(Val: 0);
1183
1184 // Authenticated tail calls always take key/discriminator arguments.
1185 if (Opc == AArch64::AUTH_TCRETURN || Opc == AArch64::AUTH_TCRETURN_BTI) {
1186 assert((Info.PAI->Key == AArch64PACKey::IA ||
1187 Info.PAI->Key == AArch64PACKey::IB) &&
1188 "Invalid auth call key");
1189 MIB.addImm(Val: Info.PAI->Key);
1190
1191 Register AddrDisc = 0;
1192 uint16_t IntDisc = 0;
1193 std::tie(args&: IntDisc, args&: AddrDisc) =
1194 extractPtrauthBlendDiscriminators(Disc: Info.PAI->Discriminator, MRI);
1195
1196 MIB.addImm(Val: IntDisc);
1197 MIB.addUse(RegNo: AddrDisc);
1198 if (AddrDisc != AArch64::NoRegister) {
1199 MIB->getOperand(i: 4).setReg(constrainOperandRegClass(
1200 MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(),
1201 RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB, II: MIB->getDesc(),
1202 RegMO&: MIB->getOperand(i: 4), OpIdx: 4));
1203 }
1204 }
1205
1206 // Tell the call which registers are clobbered.
1207 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
1208 if (Subtarget.hasCustomCallingConv())
1209 TRI->UpdateCustomCallPreservedMask(MF, Mask: &Mask);
1210 MIB.addRegMask(Mask);
1211
1212 if (Info.CFIType)
1213 MIB->setCFIType(MF, Type: Info.CFIType->getZExtValue());
1214
1215 if (TRI->isAnyArgRegReserved(MF))
1216 TRI->emitReservedArgRegCallError(MF);
1217
1218 // FPDiff is the byte offset of the call's argument area from the callee's.
1219 // Stores to callee stack arguments will be placed in FixedStackSlots offset
1220 // by this amount for a tail call. In a sibling call it must be 0 because the
1221 // caller will deallocate the entire stack and the callee still expects its
1222 // arguments to begin at SP+0.
1223 int FPDiff = 0;
1224
1225 // This will be 0 for sibcalls, potentially nonzero for tail calls produced
1226 // by -tailcallopt. For sibcalls, the memory operands for the call are
1227 // already available in the caller's incoming argument space.
1228 unsigned NumBytes = 0;
1229 if (!IsSibCall) {
1230 // We aren't sibcalling, so we need to compute FPDiff. We need to do this
1231 // before handling assignments, because FPDiff must be known for memory
1232 // arguments.
1233 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
1234 SmallVector<CCValAssign, 16> OutLocs;
1235 CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
1236
1237 AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
1238 Subtarget, /*IsReturn*/ false);
1239 if (!determineAssignments(Assigner&: CalleeAssigner, Args&: OutArgs, CCInfo&: OutInfo))
1240 return false;
1241
1242 // The callee will pop the argument stack as a tail call. Thus, we must
1243 // keep it 16-byte aligned.
1244 NumBytes = alignTo(Value: OutInfo.getStackSize(), Align: 16);
1245
1246 // FPDiff will be negative if this tail call requires more space than we
1247 // would automatically have in our incoming argument space. Positive if we
1248 // actually shrink the stack.
1249 FPDiff = NumReusableBytes - NumBytes;
1250
1251 // Update the required reserved area if this is the tail call requiring the
1252 // most argument stack space.
1253 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
1254 FuncInfo->setTailCallReservedStack(-FPDiff);
1255
1256 // The stack pointer must be 16-byte aligned at all times it's used for a
1257 // memory operation, which in practice means at *all* times and in
1258 // particular across call boundaries. Therefore our own arguments started at
1259 // a 16-byte aligned SP and the delta applied for the tail call should
1260 // satisfy the same constraint.
1261 assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
1262 }
1263
1264 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
1265
1266 AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
1267 Subtarget, /*IsReturn*/ false);
1268
1269 // Do the actual argument marshalling.
1270 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB,
1271 /*IsTailCall*/ true, FPDiff);
1272 if (!determineAndHandleAssignments(Handler, Assigner, Args&: OutArgs, MIRBuilder,
1273 CallConv: CalleeCC, IsVarArg: Info.IsVarArg))
1274 return false;
1275
1276 Mask = getMaskForArgs(OutArgs, Info, TRI: *TRI, MF);
1277
1278 if (Info.IsVarArg && Info.IsMustTailCall) {
1279 // Now we know what's being passed to the function. Add uses to the call for
1280 // the forwarded registers that we *aren't* passing as parameters. This will
1281 // preserve the copies we build earlier.
1282 for (const auto &F : Forwards) {
1283 Register ForwardedReg = F.PReg;
1284 // If the register is already passed, or aliases a register which is
1285 // already being passed, then skip it.
1286 if (any_of(Range: MIB->uses(), P: [&ForwardedReg, &TRI](const MachineOperand &Use) {
1287 if (!Use.isReg())
1288 return false;
1289 return TRI->regsOverlap(RegA: Use.getReg(), RegB: ForwardedReg);
1290 }))
1291 continue;
1292
1293 // We aren't passing it already, so we should add it to the call.
1294 MIRBuilder.buildCopy(Res: ForwardedReg, Op: Register(F.VReg));
1295 MIB.addReg(RegNo: ForwardedReg, Flags: RegState::Implicit);
1296 }
1297 }
1298
1299 // If we have -tailcallopt, we need to adjust the stack. We'll do the call
1300 // sequence start and end here.
1301 if (!IsSibCall) {
1302 MIB->getOperand(i: 1).setImm(FPDiff);
1303 CallSeqStart.addImm(Val: 0).addImm(Val: 0);
1304 // End the call sequence *before* emitting the call. Normally, we would
1305 // tidy the frame up after the call. However, here, we've laid out the
1306 // parameters so that when SP is reset, they will be in the correct
1307 // location.
1308 MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKUP).addImm(Val: 0).addImm(Val: 0);
1309 }
1310
1311 // Now we can add the actual call instruction to the correct basic block.
1312 MIRBuilder.insertInstr(MIB);
1313
1314 // If Callee is a reg, since it is used by a target specific instruction,
1315 // it must have a register class matching the constraint of that instruction.
1316 if (MIB->getOperand(i: 0).isReg())
1317 constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(),
1318 RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB,
1319 II: MIB->getDesc(), RegMO&: MIB->getOperand(i: 0), OpIdx: 0);
1320
1321 MF.getFrameInfo().setHasTailCall();
1322 Info.LoweredTailCall = true;
1323 return true;
1324}
1325
1326bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
1327 CallLoweringInfo &Info) const {
1328 MachineFunction &MF = MIRBuilder.getMF();
1329 const Function &F = MF.getFunction();
1330 MachineRegisterInfo &MRI = MF.getRegInfo();
1331 auto &DL = F.getDataLayout();
1332 const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
1333 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1334
1335 // Arm64EC has extra requirements for varargs calls; bail out for now.
1336 //
1337 // Arm64EC has special mangling rules for calls; bail out on all calls for
1338 // now.
1339 if (Subtarget.isWindowsArm64EC())
1340 return false;
1341
1342 // Arm64EC thunks have a special calling convention which is only implemented
1343 // in SelectionDAG; bail out for now.
1344 if (Info.CallConv == CallingConv::ARM64EC_Thunk_Native ||
1345 Info.CallConv == CallingConv::ARM64EC_Thunk_X64)
1346 return false;
1347
1348 SmallVector<ArgInfo, 8> OutArgs;
1349 for (auto &OrigArg : Info.OrigArgs) {
1350 splitToValueTypes(OrigArgInfo: OrigArg, SplitArgs&: OutArgs, DL, CallConv: Info.CallConv);
1351 // AAPCS requires that we zero-extend i1 to 8 bits by the caller.
1352 auto &Flags = OrigArg.Flags[0];
1353 if (OrigArg.Ty->isIntegerTy(Bitwidth: 1) && !Flags.isSExt() && !Flags.isZExt()) {
1354 ArgInfo &OutArg = OutArgs.back();
1355 assert(OutArg.Regs.size() == 1 &&
1356 MRI.getType(OutArg.Regs[0]).getSizeInBits() == 1 &&
1357 "Unexpected registers used for i1 arg");
1358
1359 // We cannot use a ZExt ArgInfo flag here, because it will
1360 // zero-extend the argument to i32 instead of just i8.
1361 OutArg.Regs[0] =
1362 MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 8), Op: OutArg.Regs[0]).getReg(Idx: 0);
1363 LLVMContext &Ctx = MF.getFunction().getContext();
1364 OutArg.Ty = Type::getInt8Ty(C&: Ctx);
1365 }
1366 }
1367
1368 SmallVector<ArgInfo, 8> InArgs;
1369 if (!Info.OrigRet.Ty->isVoidTy())
1370 splitToValueTypes(OrigArgInfo: Info.OrigRet, SplitArgs&: InArgs, DL, CallConv: Info.CallConv);
1371
1372 // If we can lower as a tail call, do that instead.
1373 bool CanTailCallOpt =
1374 isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
1375
1376 // We must emit a tail call if we have musttail.
1377 if (Info.IsMustTailCall && !CanTailCallOpt) {
1378 // There are types of incoming/outgoing arguments we can't handle yet, so
1379 // it doesn't make sense to actually die here like in ISelLowering. Instead,
1380 // fall back to SelectionDAG and let it try to handle this.
1381 LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
1382 return false;
1383 }
1384
1385 Info.IsTailCall = CanTailCallOpt;
1386 if (CanTailCallOpt)
1387 return lowerTailCall(MIRBuilder, Info, OutArgs);
1388
1389 // Find out which ABI gets to decide where things go.
1390 CCAssignFn *AssignFnFixed;
1391 CCAssignFn *AssignFnVarArg;
1392 std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) =
1393 getAssignFnsForCC(CC: Info.CallConv, TLI);
1394
1395 MachineInstrBuilder CallSeqStart;
1396 CallSeqStart = MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKDOWN);
1397
1398 // Create a temporarily-floating call instruction so we can add the implicit
1399 // uses of arg registers.
1400
1401 unsigned Opc = 0;
1402 // Calls with operand bundle "clang.arc.attachedcall" are special. They should
1403 // be expanded to the call, directly followed by a special marker sequence and
1404 // a call to an ObjC library function.
1405 if (Info.CB && objcarc::hasAttachedCallOpBundle(CB: Info.CB))
1406 Opc = Info.PAI ? AArch64::BLRA_RVMARKER : AArch64::BLR_RVMARKER;
1407 // A call to a returns twice function like setjmp must be followed by a bti
1408 // instruction.
1409 else if (Info.CB && Info.CB->hasFnAttr(Kind: Attribute::ReturnsTwice) &&
1410 !Subtarget.noBTIAtReturnTwice() &&
1411 MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
1412 Opc = AArch64::BLR_BTI;
1413 else {
1414 // For an intrinsic call (e.g. memset), use GOT if "RtLibUseGOT" (-fno-plt)
1415 // is set.
1416 if (Info.Callee.isSymbol() && F.getParent()->getRtLibUseGOT()) {
1417 auto MIB = MIRBuilder.buildInstr(Opcode: TargetOpcode::G_GLOBAL_VALUE);
1418 DstOp(getLLTForType(Ty&: *F.getType(), DL)).addDefToMIB(MRI, MIB);
1419 MIB.addExternalSymbol(FnName: Info.Callee.getSymbolName(), TargetFlags: AArch64II::MO_GOT);
1420 Info.Callee = MachineOperand::CreateReg(Reg: MIB.getReg(Idx: 0), isDef: false);
1421 }
1422 Opc = getCallOpcode(CallerF: MF, IsIndirect: Info.Callee.isReg(), IsTailCall: false, PAI&: Info.PAI, MRI);
1423 }
1424
1425 auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: Opc);
1426 unsigned CalleeOpNo = 0;
1427
1428 if (Opc == AArch64::BLR_RVMARKER || Opc == AArch64::BLRA_RVMARKER) {
1429 // Add a target global address for the retainRV/claimRV runtime function
1430 // just before the call target.
1431 Function *ARCFn = *objcarc::getAttachedARCFunction(CB: Info.CB);
1432 MIB.addGlobalAddress(GV: ARCFn);
1433 ++CalleeOpNo;
1434
1435 // We may or may not need to emit both the marker and the retain/claim call.
1436 // Tell the pseudo expansion using an additional boolean op.
1437 MIB.addImm(Val: objcarc::attachedCallOpBundleNeedsMarker(CB: Info.CB));
1438 ++CalleeOpNo;
1439 } else if (Info.CFIType) {
1440 MIB->setCFIType(MF, Type: Info.CFIType->getZExtValue());
1441 }
1442 MIB->setDeactivationSymbol(MF, DS: Info.DeactivationSymbol);
1443
1444 MIB.add(MO: Info.Callee);
1445
1446 // Tell the call which registers are clobbered.
1447 const uint32_t *Mask;
1448 const auto *TRI = Subtarget.getRegisterInfo();
1449
1450 AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
1451 Subtarget, /*IsReturn*/ false);
1452 // Do the actual argument marshalling.
1453 OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, /*IsReturn*/ false);
1454 if (!determineAndHandleAssignments(Handler, Assigner, Args&: OutArgs, MIRBuilder,
1455 CallConv: Info.CallConv, IsVarArg: Info.IsVarArg))
1456 return false;
1457
1458 Mask = getMaskForArgs(OutArgs, Info, TRI: *TRI, MF);
1459
1460 if (Opc == AArch64::BLRA || Opc == AArch64::BLRA_RVMARKER) {
1461 assert((Info.PAI->Key == AArch64PACKey::IA ||
1462 Info.PAI->Key == AArch64PACKey::IB) &&
1463 "Invalid auth call key");
1464 MIB.addImm(Val: Info.PAI->Key);
1465
1466 Register AddrDisc = 0;
1467 uint16_t IntDisc = 0;
1468 std::tie(args&: IntDisc, args&: AddrDisc) =
1469 extractPtrauthBlendDiscriminators(Disc: Info.PAI->Discriminator, MRI);
1470
1471 MIB.addImm(Val: IntDisc);
1472 MIB.addUse(RegNo: AddrDisc);
1473 if (AddrDisc != AArch64::NoRegister) {
1474 constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(),
1475 RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB,
1476 II: MIB->getDesc(), RegMO&: MIB->getOperand(i: CalleeOpNo + 3),
1477 OpIdx: CalleeOpNo + 3);
1478 }
1479 }
1480
1481 // Tell the call which registers are clobbered.
1482 if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
1483 TRI->UpdateCustomCallPreservedMask(MF, Mask: &Mask);
1484 MIB.addRegMask(Mask);
1485
1486 if (TRI->isAnyArgRegReserved(MF))
1487 TRI->emitReservedArgRegCallError(MF);
1488
1489 // Now we can add the actual call instruction to the correct basic block.
1490 MIRBuilder.insertInstr(MIB);
1491
1492 uint64_t CalleePopBytes =
1493 doesCalleeRestoreStack(CallConv: Info.CallConv,
1494 TailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt)
1495 ? alignTo(Value: Assigner.StackSize, Align: 16)
1496 : 0;
1497
1498 CallSeqStart.addImm(Val: Assigner.StackSize).addImm(Val: 0);
1499 MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKUP)
1500 .addImm(Val: Assigner.StackSize)
1501 .addImm(Val: CalleePopBytes);
1502
1503 // If Callee is a reg, since it is used by a target specific
1504 // instruction, it must have a register class matching the
1505 // constraint of that instruction.
1506 if (MIB->getOperand(i: CalleeOpNo).isReg())
1507 constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *Subtarget.getInstrInfo(),
1508 RBI: *Subtarget.getRegBankInfo(), InsertPt&: *MIB, II: MIB->getDesc(),
1509 RegMO&: MIB->getOperand(i: CalleeOpNo), OpIdx: CalleeOpNo);
1510
1511 // Finally we can copy the returned value back into its virtual-register. In
1512 // symmetry with the arguments, the physical register must be an
1513 // implicit-define of the call instruction.
1514 if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) {
1515 CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(CC: Info.CallConv);
1516 CallReturnHandler Handler(MIRBuilder, MRI, MIB);
1517 bool UsingReturnedArg =
1518 !OutArgs.empty() && OutArgs[0].Flags[0].isReturned();
1519
1520 AArch64OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn, Subtarget,
1521 /*IsReturn*/ false);
1522 ReturnedArgCallReturnHandler ReturnedArgHandler(MIRBuilder, MRI, MIB);
1523 if (!determineAndHandleAssignments(
1524 Handler&: UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, Args&: InArgs,
1525 MIRBuilder, CallConv: Info.CallConv, IsVarArg: Info.IsVarArg,
1526 ThisReturnRegs: UsingReturnedArg ? ArrayRef(OutArgs[0].Regs)
1527 : ArrayRef<Register>()))
1528 return false;
1529 }
1530
1531 if (Info.SwiftErrorVReg) {
1532 MIB.addDef(RegNo: AArch64::X21, Flags: RegState::Implicit);
1533 MIRBuilder.buildCopy(Res: Info.SwiftErrorVReg, Op: Register(AArch64::X21));
1534 }
1535
1536 if (!Info.CanLowerReturn) {
1537 insertSRetLoads(MIRBuilder, RetTy: Info.OrigRet.Ty, VRegs: Info.OrigRet.Regs,
1538 DemoteReg: Info.DemoteRegister, FI: Info.DemoteStackIndex);
1539 }
1540 return true;
1541}
1542
1543bool AArch64CallLowering::isTypeIsValidForThisReturn(EVT Ty) const {
1544 return Ty.getSizeInBits() == 64;
1545}
1546