1 | //===--- AArch64CallLowering.cpp - Call lowering --------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file implements the lowering of LLVM calls to machine code calls for |
11 | /// GlobalISel. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "AArch64CallLowering.h" |
16 | #include "AArch64GlobalISelUtils.h" |
17 | #include "AArch64ISelLowering.h" |
18 | #include "AArch64MachineFunctionInfo.h" |
19 | #include "AArch64RegisterInfo.h" |
20 | #include "AArch64Subtarget.h" |
21 | #include "llvm/ADT/ArrayRef.h" |
22 | #include "llvm/ADT/SmallVector.h" |
23 | #include "llvm/Analysis/ObjCARCUtil.h" |
24 | #include "llvm/CodeGen/Analysis.h" |
25 | #include "llvm/CodeGen/CallingConvLower.h" |
26 | #include "llvm/CodeGen/FunctionLoweringInfo.h" |
27 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
28 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
29 | #include "llvm/CodeGen/LowLevelTypeUtils.h" |
30 | #include "llvm/CodeGen/MachineBasicBlock.h" |
31 | #include "llvm/CodeGen/MachineFrameInfo.h" |
32 | #include "llvm/CodeGen/MachineFunction.h" |
33 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
34 | #include "llvm/CodeGen/MachineMemOperand.h" |
35 | #include "llvm/CodeGen/MachineOperand.h" |
36 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
37 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
38 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
39 | #include "llvm/CodeGen/ValueTypes.h" |
40 | #include "llvm/CodeGenTypes/MachineValueType.h" |
41 | #include "llvm/IR/Argument.h" |
42 | #include "llvm/IR/Attributes.h" |
43 | #include "llvm/IR/Function.h" |
44 | #include "llvm/IR/Type.h" |
45 | #include "llvm/IR/Value.h" |
46 | #include <algorithm> |
47 | #include <cassert> |
48 | #include <cstdint> |
49 | #include <iterator> |
50 | |
51 | #define DEBUG_TYPE "aarch64-call-lowering" |
52 | |
53 | using namespace llvm; |
54 | using namespace AArch64GISelUtils; |
55 | |
56 | extern cl::opt<bool> EnableSVEGISel; |
57 | |
58 | AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI) |
59 | : CallLowering(&TLI) {} |
60 | |
61 | static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT, |
62 | MVT &LocVT) { |
63 | // If ValVT is i1/i8/i16, we should set LocVT to i8/i8/i16. This is a legacy |
64 | // hack because the DAG calls the assignment function with pre-legalized |
65 | // register typed values, not the raw type. |
66 | // |
67 | // This hack is not applied to return values which are not passed on the |
68 | // stack. |
69 | if (OrigVT == MVT::i1 || OrigVT == MVT::i8) |
70 | ValVT = LocVT = MVT::i8; |
71 | else if (OrigVT == MVT::i16) |
72 | ValVT = LocVT = MVT::i16; |
73 | } |
74 | |
75 | // Account for i1/i8/i16 stack passed value hack |
76 | static LLT getStackValueStoreTypeHack(const CCValAssign &VA) { |
77 | const MVT ValVT = VA.getValVT(); |
78 | return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT) |
79 | : LLT(VA.getLocVT()); |
80 | } |
81 | |
82 | namespace { |
83 | |
84 | struct AArch64IncomingValueAssigner |
85 | : public CallLowering::IncomingValueAssigner { |
86 | AArch64IncomingValueAssigner(CCAssignFn *AssignFn_, |
87 | CCAssignFn *AssignFnVarArg_) |
88 | : IncomingValueAssigner(AssignFn_, AssignFnVarArg_) {} |
89 | |
90 | bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT, |
91 | CCValAssign::LocInfo LocInfo, |
92 | const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, |
93 | CCState &State) override { |
94 | applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT); |
95 | return IncomingValueAssigner::assignArg(ValNo, OrigVT, ValVT, LocVT, |
96 | LocInfo, Info, Flags, State); |
97 | } |
98 | }; |
99 | |
100 | struct AArch64OutgoingValueAssigner |
101 | : public CallLowering::OutgoingValueAssigner { |
102 | const AArch64Subtarget &Subtarget; |
103 | |
104 | /// Track if this is used for a return instead of function argument |
105 | /// passing. We apply a hack to i1/i8/i16 stack passed values, but do not use |
106 | /// stack passed returns for them and cannot apply the type adjustment. |
107 | bool IsReturn; |
108 | |
109 | AArch64OutgoingValueAssigner(CCAssignFn *AssignFn_, |
110 | CCAssignFn *AssignFnVarArg_, |
111 | const AArch64Subtarget &Subtarget_, |
112 | bool IsReturn) |
113 | : OutgoingValueAssigner(AssignFn_, AssignFnVarArg_), |
114 | Subtarget(Subtarget_), IsReturn(IsReturn) {} |
115 | |
116 | bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT, |
117 | CCValAssign::LocInfo LocInfo, |
118 | const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, |
119 | CCState &State) override { |
120 | const Function &F = State.getMachineFunction().getFunction(); |
121 | bool IsCalleeWin = |
122 | Subtarget.isCallingConvWin64(CC: State.getCallingConv(), IsVarArg: F.isVarArg()); |
123 | bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg(); |
124 | |
125 | bool Res; |
126 | if (Info.IsFixed && !UseVarArgsCCForFixed) { |
127 | if (!IsReturn) |
128 | applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT); |
129 | Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); |
130 | } else |
131 | Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State); |
132 | |
133 | StackSize = State.getStackSize(); |
134 | return Res; |
135 | } |
136 | }; |
137 | |
138 | struct IncomingArgHandler : public CallLowering::IncomingValueHandler { |
139 | IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) |
140 | : IncomingValueHandler(MIRBuilder, MRI) {} |
141 | |
142 | Register getStackAddress(uint64_t Size, int64_t Offset, |
143 | MachinePointerInfo &MPO, |
144 | ISD::ArgFlagsTy Flags) override { |
145 | auto &MFI = MIRBuilder.getMF().getFrameInfo(); |
146 | |
147 | // Byval is assumed to be writable memory, but other stack passed arguments |
148 | // are not. |
149 | const bool IsImmutable = !Flags.isByVal(); |
150 | |
151 | int FI = MFI.CreateFixedObject(Size, SPOffset: Offset, IsImmutable); |
152 | MPO = MachinePointerInfo::getFixedStack(MF&: MIRBuilder.getMF(), FI); |
153 | auto AddrReg = MIRBuilder.buildFrameIndex(Res: LLT::pointer(AddressSpace: 0, SizeInBits: 64), Idx: FI); |
154 | return AddrReg.getReg(Idx: 0); |
155 | } |
156 | |
157 | LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA, |
158 | ISD::ArgFlagsTy Flags) const override { |
159 | // For pointers, we just need to fixup the integer types reported in the |
160 | // CCValAssign. |
161 | if (Flags.isPointer()) |
162 | return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags); |
163 | return getStackValueStoreTypeHack(VA); |
164 | } |
165 | |
166 | void assignValueToReg(Register ValVReg, Register PhysReg, |
167 | const CCValAssign &VA) override { |
168 | markPhysRegUsed(PhysReg); |
169 | IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA); |
170 | } |
171 | |
172 | void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, |
173 | const MachinePointerInfo &MPO, |
174 | const CCValAssign &VA) override { |
175 | MachineFunction &MF = MIRBuilder.getMF(); |
176 | |
177 | LLT ValTy(VA.getValVT()); |
178 | LLT LocTy(VA.getLocVT()); |
179 | |
180 | // Fixup the types for the DAG compatibility hack. |
181 | if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) |
182 | std::swap(a&: ValTy, b&: LocTy); |
183 | else { |
184 | // The calling code knows if this is a pointer or not, we're only touching |
185 | // the LocTy for the i8/i16 hack. |
186 | assert(LocTy.getSizeInBits() == MemTy.getSizeInBits()); |
187 | LocTy = MemTy; |
188 | } |
189 | |
190 | auto MMO = MF.getMachineMemOperand( |
191 | PtrInfo: MPO, f: MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, MemTy: LocTy, |
192 | base_alignment: inferAlignFromPtrInfo(MF, MPO)); |
193 | |
194 | switch (VA.getLocInfo()) { |
195 | case CCValAssign::LocInfo::ZExt: |
196 | MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_ZEXTLOAD, Res: ValVReg, Addr, MMO&: *MMO); |
197 | return; |
198 | case CCValAssign::LocInfo::SExt: |
199 | MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_SEXTLOAD, Res: ValVReg, Addr, MMO&: *MMO); |
200 | return; |
201 | default: |
202 | MIRBuilder.buildLoad(Res: ValVReg, Addr, MMO&: *MMO); |
203 | return; |
204 | } |
205 | } |
206 | |
207 | /// How the physical register gets marked varies between formal |
208 | /// parameters (it's a basic-block live-in), and a call instruction |
209 | /// (it's an implicit-def of the BL). |
210 | virtual void markPhysRegUsed(MCRegister PhysReg) = 0; |
211 | }; |
212 | |
213 | struct FormalArgHandler : public IncomingArgHandler { |
214 | FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) |
215 | : IncomingArgHandler(MIRBuilder, MRI) {} |
216 | |
217 | void markPhysRegUsed(MCRegister PhysReg) override { |
218 | MIRBuilder.getMRI()->addLiveIn(Reg: PhysReg); |
219 | MIRBuilder.getMBB().addLiveIn(PhysReg); |
220 | } |
221 | }; |
222 | |
223 | struct CallReturnHandler : public IncomingArgHandler { |
224 | CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, |
225 | MachineInstrBuilder MIB) |
226 | : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {} |
227 | |
228 | void markPhysRegUsed(MCRegister PhysReg) override { |
229 | MIB.addDef(RegNo: PhysReg, Flags: RegState::Implicit); |
230 | } |
231 | |
232 | MachineInstrBuilder MIB; |
233 | }; |
234 | |
235 | /// A special return arg handler for "returned" attribute arg calls. |
236 | struct ReturnedArgCallReturnHandler : public CallReturnHandler { |
237 | ReturnedArgCallReturnHandler(MachineIRBuilder &MIRBuilder, |
238 | MachineRegisterInfo &MRI, |
239 | MachineInstrBuilder MIB) |
240 | : CallReturnHandler(MIRBuilder, MRI, MIB) {} |
241 | |
242 | void markPhysRegUsed(MCRegister PhysReg) override {} |
243 | }; |
244 | |
245 | struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { |
246 | OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, |
247 | MachineInstrBuilder MIB, bool IsTailCall = false, |
248 | int FPDiff = 0) |
249 | : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), IsTailCall(IsTailCall), |
250 | FPDiff(FPDiff), |
251 | Subtarget(MIRBuilder.getMF().getSubtarget<AArch64Subtarget>()) {} |
252 | |
253 | Register getStackAddress(uint64_t Size, int64_t Offset, |
254 | MachinePointerInfo &MPO, |
255 | ISD::ArgFlagsTy Flags) override { |
256 | MachineFunction &MF = MIRBuilder.getMF(); |
257 | LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64); |
258 | LLT s64 = LLT::scalar(SizeInBits: 64); |
259 | |
260 | if (IsTailCall) { |
261 | assert(!Flags.isByVal() && "byval unhandled with tail calls" ); |
262 | |
263 | Offset += FPDiff; |
264 | int FI = MF.getFrameInfo().CreateFixedObject(Size, SPOffset: Offset, IsImmutable: true); |
265 | auto FIReg = MIRBuilder.buildFrameIndex(Res: p0, Idx: FI); |
266 | MPO = MachinePointerInfo::getFixedStack(MF, FI); |
267 | return FIReg.getReg(Idx: 0); |
268 | } |
269 | |
270 | if (!SPReg) |
271 | SPReg = MIRBuilder.buildCopy(Res: p0, Op: Register(AArch64::SP)).getReg(Idx: 0); |
272 | |
273 | auto OffsetReg = MIRBuilder.buildConstant(Res: s64, Val: Offset); |
274 | |
275 | auto AddrReg = MIRBuilder.buildPtrAdd(Res: p0, Op0: SPReg, Op1: OffsetReg); |
276 | |
277 | MPO = MachinePointerInfo::getStack(MF, Offset); |
278 | return AddrReg.getReg(Idx: 0); |
279 | } |
280 | |
281 | /// We need to fixup the reported store size for certain value types because |
282 | /// we invert the interpretation of ValVT and LocVT in certain cases. This is |
283 | /// for compatability with the DAG call lowering implementation, which we're |
284 | /// currently building on top of. |
285 | LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA, |
286 | ISD::ArgFlagsTy Flags) const override { |
287 | if (Flags.isPointer()) |
288 | return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags); |
289 | return getStackValueStoreTypeHack(VA); |
290 | } |
291 | |
292 | void assignValueToReg(Register ValVReg, Register PhysReg, |
293 | const CCValAssign &VA) override { |
294 | MIB.addUse(RegNo: PhysReg, Flags: RegState::Implicit); |
295 | Register ExtReg = extendRegister(ValReg: ValVReg, VA); |
296 | MIRBuilder.buildCopy(Res: PhysReg, Op: ExtReg); |
297 | } |
298 | |
299 | void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, |
300 | const MachinePointerInfo &MPO, |
301 | const CCValAssign &VA) override { |
302 | MachineFunction &MF = MIRBuilder.getMF(); |
303 | auto MMO = MF.getMachineMemOperand(PtrInfo: MPO, f: MachineMemOperand::MOStore, MemTy, |
304 | base_alignment: inferAlignFromPtrInfo(MF, MPO)); |
305 | MIRBuilder.buildStore(Val: ValVReg, Addr, MMO&: *MMO); |
306 | } |
307 | |
308 | void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex, |
309 | Register Addr, LLT MemTy, |
310 | const MachinePointerInfo &MPO, |
311 | const CCValAssign &VA) override { |
312 | unsigned MaxSize = MemTy.getSizeInBytes() * 8; |
313 | // For varargs, we always want to extend them to 8 bytes, in which case |
314 | // we disable setting a max. |
315 | if (!Arg.IsFixed) |
316 | MaxSize = 0; |
317 | |
318 | Register ValVReg = Arg.Regs[RegIndex]; |
319 | if (VA.getLocInfo() != CCValAssign::LocInfo::FPExt) { |
320 | MVT LocVT = VA.getLocVT(); |
321 | MVT ValVT = VA.getValVT(); |
322 | |
323 | if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) { |
324 | std::swap(a&: ValVT, b&: LocVT); |
325 | MemTy = LLT(VA.getValVT()); |
326 | } |
327 | |
328 | ValVReg = extendRegister(ValReg: ValVReg, VA, MaxSizeBits: MaxSize); |
329 | } else { |
330 | // The store does not cover the full allocated stack slot. |
331 | MemTy = LLT(VA.getValVT()); |
332 | } |
333 | |
334 | assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA); |
335 | } |
336 | |
337 | MachineInstrBuilder MIB; |
338 | |
339 | bool IsTailCall; |
340 | |
341 | /// For tail calls, the byte offset of the call's argument area from the |
342 | /// callee's. Unused elsewhere. |
343 | int FPDiff; |
344 | |
345 | // Cache the SP register vreg if we need it more than once in this call site. |
346 | Register SPReg; |
347 | |
348 | const AArch64Subtarget &Subtarget; |
349 | }; |
350 | } // namespace |
351 | |
352 | static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) { |
353 | return (CallConv == CallingConv::Fast && TailCallOpt) || |
354 | CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; |
355 | } |
356 | |
357 | bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, |
358 | const Value *Val, |
359 | ArrayRef<Register> VRegs, |
360 | FunctionLoweringInfo &FLI, |
361 | Register SwiftErrorVReg) const { |
362 | auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: AArch64::RET_ReallyLR); |
363 | assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) && |
364 | "Return value without a vreg" ); |
365 | |
366 | bool Success = true; |
367 | if (!FLI.CanLowerReturn) { |
368 | insertSRetStores(MIRBuilder, RetTy: Val->getType(), VRegs, DemoteReg: FLI.DemoteRegister); |
369 | } else if (!VRegs.empty()) { |
370 | MachineFunction &MF = MIRBuilder.getMF(); |
371 | const Function &F = MF.getFunction(); |
372 | const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
373 | |
374 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
375 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
376 | CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC: F.getCallingConv()); |
377 | auto &DL = F.getDataLayout(); |
378 | LLVMContext &Ctx = Val->getType()->getContext(); |
379 | |
380 | SmallVector<EVT, 4> SplitEVTs; |
381 | ComputeValueVTs(TLI, DL, Ty: Val->getType(), ValueVTs&: SplitEVTs); |
382 | assert(VRegs.size() == SplitEVTs.size() && |
383 | "For each split Type there should be exactly one VReg." ); |
384 | |
385 | SmallVector<ArgInfo, 8> SplitArgs; |
386 | CallingConv::ID CC = F.getCallingConv(); |
387 | |
388 | for (unsigned i = 0; i < SplitEVTs.size(); ++i) { |
389 | Register CurVReg = VRegs[i]; |
390 | ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Context&: Ctx), 0}; |
391 | setArgFlags(Arg&: CurArgInfo, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: F); |
392 | |
393 | // i1 is a special case because SDAG i1 true is naturally zero extended |
394 | // when widened using ANYEXT. We need to do it explicitly here. |
395 | auto &Flags = CurArgInfo.Flags[0]; |
396 | if (MRI.getType(Reg: CurVReg).getSizeInBits() == 1 && !Flags.isSExt() && |
397 | !Flags.isZExt()) { |
398 | CurVReg = MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 8), Op: CurVReg).getReg(Idx: 0); |
399 | } else if (TLI.getNumRegistersForCallingConv(Context&: Ctx, CC, VT: SplitEVTs[i]) == |
400 | 1) { |
401 | // Some types will need extending as specified by the CC. |
402 | MVT NewVT = TLI.getRegisterTypeForCallingConv(Context&: Ctx, CC, VT: SplitEVTs[i]); |
403 | if (EVT(NewVT) != SplitEVTs[i]) { |
404 | unsigned ExtendOp = TargetOpcode::G_ANYEXT; |
405 | if (F.getAttributes().hasRetAttr(Kind: Attribute::SExt)) |
406 | ExtendOp = TargetOpcode::G_SEXT; |
407 | else if (F.getAttributes().hasRetAttr(Kind: Attribute::ZExt)) |
408 | ExtendOp = TargetOpcode::G_ZEXT; |
409 | |
410 | LLT NewLLT(NewVT); |
411 | LLT OldLLT = getLLTForType(Ty&: *CurArgInfo.Ty, DL); |
412 | CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Context&: Ctx); |
413 | // Instead of an extend, we might have a vector type which needs |
414 | // padding with more elements, e.g. <2 x half> -> <4 x half>. |
415 | if (NewVT.isVector()) { |
416 | if (OldLLT.isVector()) { |
417 | if (NewLLT.getNumElements() > OldLLT.getNumElements()) { |
418 | CurVReg = |
419 | MIRBuilder.buildPadVectorWithUndefElements(Res: NewLLT, Op0: CurVReg) |
420 | .getReg(Idx: 0); |
421 | } else { |
422 | // Just do a vector extend. |
423 | CurVReg = MIRBuilder.buildInstr(Opc: ExtendOp, DstOps: {NewLLT}, SrcOps: {CurVReg}) |
424 | .getReg(Idx: 0); |
425 | } |
426 | } else if (NewLLT.getNumElements() >= 2 && |
427 | NewLLT.getNumElements() <= 8) { |
428 | // We need to pad a <1 x S> type to <2/4/8 x S>. Since we don't |
429 | // have <1 x S> vector types in GISel we use a build_vector |
430 | // instead of a vector merge/concat. |
431 | CurVReg = |
432 | MIRBuilder.buildPadVectorWithUndefElements(Res: NewLLT, Op0: CurVReg) |
433 | .getReg(Idx: 0); |
434 | } else { |
435 | LLVM_DEBUG(dbgs() << "Could not handle ret ty\n" ); |
436 | return false; |
437 | } |
438 | } else { |
439 | // If the split EVT was a <1 x T> vector, and NewVT is T, then we |
440 | // don't have to do anything since we don't distinguish between the |
441 | // two. |
442 | if (NewLLT != MRI.getType(Reg: CurVReg)) { |
443 | // A scalar extend. |
444 | CurVReg = MIRBuilder.buildInstr(Opc: ExtendOp, DstOps: {NewLLT}, SrcOps: {CurVReg}) |
445 | .getReg(Idx: 0); |
446 | } |
447 | } |
448 | } |
449 | } |
450 | if (CurVReg != CurArgInfo.Regs[0]) { |
451 | CurArgInfo.Regs[0] = CurVReg; |
452 | // Reset the arg flags after modifying CurVReg. |
453 | setArgFlags(Arg&: CurArgInfo, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: F); |
454 | } |
455 | splitToValueTypes(OrigArgInfo: CurArgInfo, SplitArgs, DL, CallConv: CC); |
456 | } |
457 | |
458 | AArch64OutgoingValueAssigner Assigner(AssignFn, AssignFn, Subtarget, |
459 | /*IsReturn*/ true); |
460 | OutgoingArgHandler Handler(MIRBuilder, MRI, MIB); |
461 | Success = determineAndHandleAssignments(Handler, Assigner, Args&: SplitArgs, |
462 | MIRBuilder, CallConv: CC, IsVarArg: F.isVarArg()); |
463 | } |
464 | |
465 | if (SwiftErrorVReg) { |
466 | MIB.addUse(RegNo: AArch64::X21, Flags: RegState::Implicit); |
467 | MIRBuilder.buildCopy(Res: AArch64::X21, Op: SwiftErrorVReg); |
468 | } |
469 | |
470 | MIRBuilder.insertInstr(MIB); |
471 | return Success; |
472 | } |
473 | |
474 | bool AArch64CallLowering::canLowerReturn(MachineFunction &MF, |
475 | CallingConv::ID CallConv, |
476 | SmallVectorImpl<BaseArgInfo> &Outs, |
477 | bool IsVarArg) const { |
478 | SmallVector<CCValAssign, 16> ArgLocs; |
479 | const auto &TLI = *getTLI<AArch64TargetLowering>(); |
480 | CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, |
481 | MF.getFunction().getContext()); |
482 | |
483 | return checkReturn(CCInfo, Outs, Fn: TLI.CCAssignFnForReturn(CC: CallConv)); |
484 | } |
485 | |
486 | /// Helper function to compute forwarded registers for musttail calls. Computes |
487 | /// the forwarded registers, sets MBB liveness, and emits COPY instructions that |
488 | /// can be used to save + restore registers later. |
489 | static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder, |
490 | CCAssignFn *AssignFn) { |
491 | MachineBasicBlock &MBB = MIRBuilder.getMBB(); |
492 | MachineFunction &MF = MIRBuilder.getMF(); |
493 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
494 | |
495 | if (!MFI.hasMustTailInVarArgFunc()) |
496 | return; |
497 | |
498 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
499 | const Function &F = MF.getFunction(); |
500 | assert(F.isVarArg() && "Expected F to be vararg?" ); |
501 | |
502 | // Compute the set of forwarded registers. The rest are scratch. |
503 | SmallVector<CCValAssign, 16> ArgLocs; |
504 | CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs, |
505 | F.getContext()); |
506 | SmallVector<MVT, 2> RegParmTypes; |
507 | RegParmTypes.push_back(Elt: MVT::i64); |
508 | RegParmTypes.push_back(Elt: MVT::f128); |
509 | |
510 | // Later on, we can use this vector to restore the registers if necessary. |
511 | SmallVectorImpl<ForwardedRegister> &Forwards = |
512 | FuncInfo->getForwardedMustTailRegParms(); |
513 | CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, Fn: AssignFn); |
514 | |
515 | // Conservatively forward X8, since it might be used for an aggregate |
516 | // return. |
517 | if (!CCInfo.isAllocated(Reg: AArch64::X8)) { |
518 | Register X8VReg = MF.addLiveIn(PReg: AArch64::X8, RC: &AArch64::GPR64RegClass); |
519 | Forwards.push_back(Elt: ForwardedRegister(X8VReg, AArch64::X8, MVT::i64)); |
520 | } |
521 | |
522 | // Add the forwards to the MachineBasicBlock and MachineFunction. |
523 | for (const auto &F : Forwards) { |
524 | MBB.addLiveIn(PhysReg: F.PReg); |
525 | MIRBuilder.buildCopy(Res: Register(F.VReg), Op: Register(F.PReg)); |
526 | } |
527 | } |
528 | |
529 | bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const { |
530 | auto &F = MF.getFunction(); |
531 | if (!EnableSVEGISel && (F.getReturnType()->isScalableTy() || |
532 | llvm::any_of(Range: F.args(), P: [](const Argument &A) { |
533 | return A.getType()->isScalableTy(); |
534 | }))) |
535 | return true; |
536 | const auto &ST = MF.getSubtarget<AArch64Subtarget>(); |
537 | if (!ST.hasNEON() || !ST.hasFPARMv8()) { |
538 | LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n" ); |
539 | return true; |
540 | } |
541 | |
542 | SMEAttrs Attrs(F); |
543 | if (Attrs.hasZAState() || Attrs.hasZT0State() || |
544 | Attrs.hasStreamingInterfaceOrBody() || |
545 | Attrs.hasStreamingCompatibleInterface()) |
546 | return true; |
547 | |
548 | return false; |
549 | } |
550 | |
551 | void AArch64CallLowering::saveVarArgRegisters( |
552 | MachineIRBuilder &MIRBuilder, CallLowering::IncomingValueHandler &Handler, |
553 | CCState &CCInfo) const { |
554 | auto GPRArgRegs = AArch64::getGPRArgRegs(); |
555 | auto FPRArgRegs = AArch64::getFPRArgRegs(); |
556 | |
557 | MachineFunction &MF = MIRBuilder.getMF(); |
558 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
559 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
560 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
561 | auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
562 | bool IsWin64CC = Subtarget.isCallingConvWin64(CC: CCInfo.getCallingConv(), |
563 | IsVarArg: MF.getFunction().isVarArg()); |
564 | const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64); |
565 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
566 | |
567 | unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(Regs: GPRArgRegs); |
568 | unsigned NumVariadicGPRArgRegs = GPRArgRegs.size() - FirstVariadicGPR + 1; |
569 | |
570 | unsigned GPRSaveSize = 8 * (GPRArgRegs.size() - FirstVariadicGPR); |
571 | int GPRIdx = 0; |
572 | if (GPRSaveSize != 0) { |
573 | if (IsWin64CC) { |
574 | GPRIdx = MFI.CreateFixedObject(Size: GPRSaveSize, |
575 | SPOffset: -static_cast<int>(GPRSaveSize), IsImmutable: false); |
576 | if (GPRSaveSize & 15) |
577 | // The extra size here, if triggered, will always be 8. |
578 | MFI.CreateFixedObject(Size: 16 - (GPRSaveSize & 15), |
579 | SPOffset: -static_cast<int>(alignTo(Value: GPRSaveSize, Align: 16)), |
580 | IsImmutable: false); |
581 | } else |
582 | GPRIdx = MFI.CreateStackObject(Size: GPRSaveSize, Alignment: Align(8), isSpillSlot: false); |
583 | |
584 | auto FIN = MIRBuilder.buildFrameIndex(Res: p0, Idx: GPRIdx); |
585 | auto Offset = |
586 | MIRBuilder.buildConstant(Res: MRI.createGenericVirtualRegister(Ty: s64), Val: 8); |
587 | |
588 | for (unsigned i = FirstVariadicGPR; i < GPRArgRegs.size(); ++i) { |
589 | Register Val = MRI.createGenericVirtualRegister(Ty: s64); |
590 | Handler.assignValueToReg( |
591 | ValVReg: Val, PhysReg: GPRArgRegs[i], |
592 | VA: CCValAssign::getReg(ValNo: i + MF.getFunction().getNumOperands(), ValVT: MVT::i64, |
593 | RegNo: GPRArgRegs[i], LocVT: MVT::i64, HTP: CCValAssign::Full)); |
594 | auto MPO = IsWin64CC ? MachinePointerInfo::getFixedStack( |
595 | MF, FI: GPRIdx, Offset: (i - FirstVariadicGPR) * 8) |
596 | : MachinePointerInfo::getStack(MF, Offset: i * 8); |
597 | MIRBuilder.buildStore(Val, Addr: FIN, PtrInfo: MPO, Alignment: inferAlignFromPtrInfo(MF, MPO)); |
598 | |
599 | FIN = MIRBuilder.buildPtrAdd(Res: MRI.createGenericVirtualRegister(Ty: p0), |
600 | Op0: FIN.getReg(Idx: 0), Op1: Offset); |
601 | } |
602 | } |
603 | FuncInfo->setVarArgsGPRIndex(GPRIdx); |
604 | FuncInfo->setVarArgsGPRSize(GPRSaveSize); |
605 | |
606 | if (Subtarget.hasFPARMv8() && !IsWin64CC) { |
607 | unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(Regs: FPRArgRegs); |
608 | |
609 | unsigned FPRSaveSize = 16 * (FPRArgRegs.size() - FirstVariadicFPR); |
610 | int FPRIdx = 0; |
611 | if (FPRSaveSize != 0) { |
612 | FPRIdx = MFI.CreateStackObject(Size: FPRSaveSize, Alignment: Align(16), isSpillSlot: false); |
613 | |
614 | auto FIN = MIRBuilder.buildFrameIndex(Res: p0, Idx: FPRIdx); |
615 | auto Offset = |
616 | MIRBuilder.buildConstant(Res: MRI.createGenericVirtualRegister(Ty: s64), Val: 16); |
617 | |
618 | for (unsigned i = FirstVariadicFPR; i < FPRArgRegs.size(); ++i) { |
619 | Register Val = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 128)); |
620 | Handler.assignValueToReg( |
621 | ValVReg: Val, PhysReg: FPRArgRegs[i], |
622 | VA: CCValAssign::getReg( |
623 | ValNo: i + MF.getFunction().getNumOperands() + NumVariadicGPRArgRegs, |
624 | ValVT: MVT::f128, RegNo: FPRArgRegs[i], LocVT: MVT::f128, HTP: CCValAssign::Full)); |
625 | |
626 | auto MPO = MachinePointerInfo::getStack(MF, Offset: i * 16); |
627 | MIRBuilder.buildStore(Val, Addr: FIN, PtrInfo: MPO, Alignment: inferAlignFromPtrInfo(MF, MPO)); |
628 | |
629 | FIN = MIRBuilder.buildPtrAdd(Res: MRI.createGenericVirtualRegister(Ty: p0), |
630 | Op0: FIN.getReg(Idx: 0), Op1: Offset); |
631 | } |
632 | } |
633 | FuncInfo->setVarArgsFPRIndex(FPRIdx); |
634 | FuncInfo->setVarArgsFPRSize(FPRSaveSize); |
635 | } |
636 | } |
637 | |
638 | bool AArch64CallLowering::lowerFormalArguments( |
639 | MachineIRBuilder &MIRBuilder, const Function &F, |
640 | ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const { |
641 | MachineFunction &MF = MIRBuilder.getMF(); |
642 | MachineBasicBlock &MBB = MIRBuilder.getMBB(); |
643 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
644 | auto &DL = F.getDataLayout(); |
645 | auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
646 | |
647 | // Arm64EC has extra requirements for varargs calls which are only implemented |
648 | // in SelectionDAG; bail out for now. |
649 | if (F.isVarArg() && Subtarget.isWindowsArm64EC()) |
650 | return false; |
651 | |
652 | // Arm64EC thunks have a special calling convention which is only implemented |
653 | // in SelectionDAG; bail out for now. |
654 | if (F.getCallingConv() == CallingConv::ARM64EC_Thunk_Native || |
655 | F.getCallingConv() == CallingConv::ARM64EC_Thunk_X64) |
656 | return false; |
657 | |
658 | bool IsWin64 = |
659 | Subtarget.isCallingConvWin64(CC: F.getCallingConv(), IsVarArg: F.isVarArg()) && |
660 | !Subtarget.isWindowsArm64EC(); |
661 | |
662 | SmallVector<ArgInfo, 8> SplitArgs; |
663 | SmallVector<std::pair<Register, Register>> BoolArgs; |
664 | |
665 | // Insert the hidden sret parameter if the return value won't fit in the |
666 | // return registers. |
667 | if (!FLI.CanLowerReturn) |
668 | insertSRetIncomingArgument(F, SplitArgs, DemoteReg&: FLI.DemoteRegister, MRI, DL); |
669 | |
670 | unsigned i = 0; |
671 | for (auto &Arg : F.args()) { |
672 | if (DL.getTypeStoreSize(Ty: Arg.getType()).isZero()) |
673 | continue; |
674 | |
675 | ArgInfo OrigArg{VRegs[i], Arg, i}; |
676 | setArgFlags(Arg&: OrigArg, OpIdx: i + AttributeList::FirstArgIndex, DL, FuncInfo: F); |
677 | |
678 | // i1 arguments are zero-extended to i8 by the caller. Emit a |
679 | // hint to reflect this. |
680 | if (OrigArg.Ty->isIntegerTy(Bitwidth: 1)) { |
681 | assert(OrigArg.Regs.size() == 1 && |
682 | MRI.getType(OrigArg.Regs[0]).getSizeInBits() == 1 && |
683 | "Unexpected registers used for i1 arg" ); |
684 | |
685 | auto &Flags = OrigArg.Flags[0]; |
686 | if (!Flags.isZExt() && !Flags.isSExt()) { |
687 | // Lower i1 argument as i8, and insert AssertZExt + Trunc later. |
688 | Register OrigReg = OrigArg.Regs[0]; |
689 | Register WideReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 8)); |
690 | OrigArg.Regs[0] = WideReg; |
691 | BoolArgs.push_back(Elt: {OrigReg, WideReg}); |
692 | } |
693 | } |
694 | |
695 | if (Arg.hasAttribute(Kind: Attribute::SwiftAsync)) |
696 | MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); |
697 | |
698 | splitToValueTypes(OrigArgInfo: OrigArg, SplitArgs, DL, CallConv: F.getCallingConv()); |
699 | ++i; |
700 | } |
701 | |
702 | if (!MBB.empty()) |
703 | MIRBuilder.setInstr(*MBB.begin()); |
704 | |
705 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
706 | CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC: F.getCallingConv(), IsVarArg: IsWin64 && F.isVarArg()); |
707 | |
708 | AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn); |
709 | FormalArgHandler Handler(MIRBuilder, MRI); |
710 | SmallVector<CCValAssign, 16> ArgLocs; |
711 | CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); |
712 | if (!determineAssignments(Assigner, Args&: SplitArgs, CCInfo) || |
713 | !handleAssignments(Handler, Args&: SplitArgs, CCState&: CCInfo, ArgLocs, MIRBuilder)) |
714 | return false; |
715 | |
716 | if (!BoolArgs.empty()) { |
717 | for (auto &KV : BoolArgs) { |
718 | Register OrigReg = KV.first; |
719 | Register WideReg = KV.second; |
720 | LLT WideTy = MRI.getType(Reg: WideReg); |
721 | assert(MRI.getType(OrigReg).getScalarSizeInBits() == 1 && |
722 | "Unexpected bit size of a bool arg" ); |
723 | MIRBuilder.buildTrunc( |
724 | Res: OrigReg, Op: MIRBuilder.buildAssertZExt(Res: WideTy, Op: WideReg, Size: 1).getReg(Idx: 0)); |
725 | } |
726 | } |
727 | |
728 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
729 | uint64_t StackSize = Assigner.StackSize; |
730 | if (F.isVarArg()) { |
731 | if ((!Subtarget.isTargetDarwin() && !Subtarget.isWindowsArm64EC()) || IsWin64) { |
732 | // The AAPCS variadic function ABI is identical to the non-variadic |
733 | // one. As a result there may be more arguments in registers and we should |
734 | // save them for future reference. |
735 | // Win64 variadic functions also pass arguments in registers, but all |
736 | // float arguments are passed in integer registers. |
737 | saveVarArgRegisters(MIRBuilder, Handler, CCInfo); |
738 | } else if (Subtarget.isWindowsArm64EC()) { |
739 | return false; |
740 | } |
741 | |
742 | // We currently pass all varargs at 8-byte alignment, or 4 in ILP32. |
743 | StackSize = alignTo(Value: Assigner.StackSize, Align: Subtarget.isTargetILP32() ? 4 : 8); |
744 | |
745 | auto &MFI = MIRBuilder.getMF().getFrameInfo(); |
746 | FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(Size: 4, SPOffset: StackSize, IsImmutable: true)); |
747 | } |
748 | |
749 | if (doesCalleeRestoreStack(CallConv: F.getCallingConv(), |
750 | TailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt)) { |
751 | // We have a non-standard ABI, so why not make full use of the stack that |
752 | // we're going to pop? It must be aligned to 16 B in any case. |
753 | StackSize = alignTo(Value: StackSize, Align: 16); |
754 | |
755 | // If we're expected to restore the stack (e.g. fastcc), then we'll be |
756 | // adding a multiple of 16. |
757 | FuncInfo->setArgumentStackToRestore(StackSize); |
758 | |
759 | // Our own callers will guarantee that the space is free by giving an |
760 | // aligned value to CALLSEQ_START. |
761 | } |
762 | |
763 | // When we tail call, we need to check if the callee's arguments |
764 | // will fit on the caller's stack. So, whenever we lower formal arguments, |
765 | // we should keep track of this information, since we might lower a tail call |
766 | // in this function later. |
767 | FuncInfo->setBytesInStackArgArea(StackSize); |
768 | |
769 | if (Subtarget.hasCustomCallingConv()) |
770 | Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF); |
771 | |
772 | handleMustTailForwardedRegisters(MIRBuilder, AssignFn); |
773 | |
774 | // Move back to the end of the basic block. |
775 | MIRBuilder.setMBB(MBB); |
776 | |
777 | return true; |
778 | } |
779 | |
780 | /// Return true if the calling convention is one that we can guarantee TCO for. |
781 | static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { |
782 | return (CC == CallingConv::Fast && GuaranteeTailCalls) || |
783 | CC == CallingConv::Tail || CC == CallingConv::SwiftTail; |
784 | } |
785 | |
786 | /// Return true if we might ever do TCO for calls with this calling convention. |
787 | static bool mayTailCallThisCC(CallingConv::ID CC) { |
788 | switch (CC) { |
789 | case CallingConv::C: |
790 | case CallingConv::PreserveMost: |
791 | case CallingConv::PreserveAll: |
792 | case CallingConv::PreserveNone: |
793 | case CallingConv::Swift: |
794 | case CallingConv::SwiftTail: |
795 | case CallingConv::Tail: |
796 | case CallingConv::Fast: |
797 | return true; |
798 | default: |
799 | return false; |
800 | } |
801 | } |
802 | |
803 | /// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for |
804 | /// CC. |
805 | static std::pair<CCAssignFn *, CCAssignFn *> |
806 | getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) { |
807 | return {TLI.CCAssignFnForCall(CC, IsVarArg: false), TLI.CCAssignFnForCall(CC, IsVarArg: true)}; |
808 | } |
809 | |
810 | bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay( |
811 | CallLoweringInfo &Info, MachineFunction &MF, |
812 | SmallVectorImpl<ArgInfo> &InArgs) const { |
813 | const Function &CallerF = MF.getFunction(); |
814 | CallingConv::ID CalleeCC = Info.CallConv; |
815 | CallingConv::ID CallerCC = CallerF.getCallingConv(); |
816 | |
817 | // If the calling conventions match, then everything must be the same. |
818 | if (CalleeCC == CallerCC) |
819 | return true; |
820 | |
821 | // Check if the caller and callee will handle arguments in the same way. |
822 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
823 | CCAssignFn *CalleeAssignFnFixed; |
824 | CCAssignFn *CalleeAssignFnVarArg; |
825 | std::tie(args&: CalleeAssignFnFixed, args&: CalleeAssignFnVarArg) = |
826 | getAssignFnsForCC(CC: CalleeCC, TLI); |
827 | |
828 | CCAssignFn *CallerAssignFnFixed; |
829 | CCAssignFn *CallerAssignFnVarArg; |
830 | std::tie(args&: CallerAssignFnFixed, args&: CallerAssignFnVarArg) = |
831 | getAssignFnsForCC(CC: CallerCC, TLI); |
832 | |
833 | AArch64IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed, |
834 | CalleeAssignFnVarArg); |
835 | AArch64IncomingValueAssigner CallerAssigner(CallerAssignFnFixed, |
836 | CallerAssignFnVarArg); |
837 | |
838 | if (!resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner)) |
839 | return false; |
840 | |
841 | // Make sure that the caller and callee preserve all of the same registers. |
842 | auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); |
843 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); |
844 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); |
845 | if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) { |
846 | TRI->UpdateCustomCallPreservedMask(MF, Mask: &CallerPreserved); |
847 | TRI->UpdateCustomCallPreservedMask(MF, Mask: &CalleePreserved); |
848 | } |
849 | |
850 | return TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved); |
851 | } |
852 | |
853 | bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable( |
854 | CallLoweringInfo &Info, MachineFunction &MF, |
855 | SmallVectorImpl<ArgInfo> &OrigOutArgs) const { |
856 | // If there are no outgoing arguments, then we are done. |
857 | if (OrigOutArgs.empty()) |
858 | return true; |
859 | |
860 | const Function &CallerF = MF.getFunction(); |
861 | LLVMContext &Ctx = CallerF.getContext(); |
862 | CallingConv::ID CalleeCC = Info.CallConv; |
863 | CallingConv::ID CallerCC = CallerF.getCallingConv(); |
864 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
865 | const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
866 | |
867 | CCAssignFn *AssignFnFixed; |
868 | CCAssignFn *AssignFnVarArg; |
869 | std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) = getAssignFnsForCC(CC: CalleeCC, TLI); |
870 | |
871 | // We have outgoing arguments. Make sure that we can tail call with them. |
872 | SmallVector<CCValAssign, 16> OutLocs; |
873 | CCState OutInfo(CalleeCC, false, MF, OutLocs, Ctx); |
874 | |
875 | AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg, |
876 | Subtarget, /*IsReturn*/ false); |
877 | // determineAssignments() may modify argument flags, so make a copy. |
878 | SmallVector<ArgInfo, 8> OutArgs; |
879 | append_range(C&: OutArgs, R&: OrigOutArgs); |
880 | if (!determineAssignments(Assigner&: CalleeAssigner, Args&: OutArgs, CCInfo&: OutInfo)) { |
881 | LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n" ); |
882 | return false; |
883 | } |
884 | |
885 | // Make sure that they can fit on the caller's stack. |
886 | const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
887 | if (OutInfo.getStackSize() > FuncInfo->getBytesInStackArgArea()) { |
888 | LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n" ); |
889 | return false; |
890 | } |
891 | |
892 | // Verify that the parameters in callee-saved registers match. |
893 | // TODO: Port this over to CallLowering as general code once swiftself is |
894 | // supported. |
895 | auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); |
896 | const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC); |
897 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
898 | |
899 | if (Info.IsVarArg) { |
900 | // Be conservative and disallow variadic memory operands to match SDAG's |
901 | // behaviour. |
902 | // FIXME: If the caller's calling convention is C, then we can |
903 | // potentially use its argument area. However, for cases like fastcc, |
904 | // we can't do anything. |
905 | for (unsigned i = 0; i < OutLocs.size(); ++i) { |
906 | auto &ArgLoc = OutLocs[i]; |
907 | if (ArgLoc.isRegLoc()) |
908 | continue; |
909 | |
910 | LLVM_DEBUG( |
911 | dbgs() |
912 | << "... Cannot tail call vararg function with stack arguments\n" ); |
913 | return false; |
914 | } |
915 | } |
916 | |
917 | return parametersInCSRMatch(MRI, CallerPreservedMask, ArgLocs: OutLocs, OutVals: OutArgs); |
918 | } |
919 | |
920 | bool AArch64CallLowering::isEligibleForTailCallOptimization( |
921 | MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, |
922 | SmallVectorImpl<ArgInfo> &InArgs, |
923 | SmallVectorImpl<ArgInfo> &OutArgs) const { |
924 | |
925 | // Must pass all target-independent checks in order to tail call optimize. |
926 | if (!Info.IsTailCall) |
927 | return false; |
928 | |
929 | CallingConv::ID CalleeCC = Info.CallConv; |
930 | MachineFunction &MF = MIRBuilder.getMF(); |
931 | const Function &CallerF = MF.getFunction(); |
932 | |
933 | LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n" ); |
934 | |
935 | if (Info.SwiftErrorVReg) { |
936 | // TODO: We should handle this. |
937 | // Note that this is also handled by the check for no outgoing arguments. |
938 | // Proactively disabling this though, because the swifterror handling in |
939 | // lowerCall inserts a COPY *after* the location of the call. |
940 | LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n" ); |
941 | return false; |
942 | } |
943 | |
944 | if (!mayTailCallThisCC(CC: CalleeCC)) { |
945 | LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n" ); |
946 | return false; |
947 | } |
948 | |
949 | // Byval parameters hand the function a pointer directly into the stack area |
950 | // we want to reuse during a tail call. Working around this *is* possible (see |
951 | // X86). |
952 | // |
953 | // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try |
954 | // it? |
955 | // |
956 | // On Windows, "inreg" attributes signify non-aggregate indirect returns. |
957 | // In this case, it is necessary to save/restore X0 in the callee. Tail |
958 | // call opt interferes with this. So we disable tail call opt when the |
959 | // caller has an argument with "inreg" attribute. |
960 | // |
961 | // FIXME: Check whether the callee also has an "inreg" argument. |
962 | // |
963 | // When the caller has a swifterror argument, we don't want to tail call |
964 | // because would have to move into the swifterror register before the |
965 | // tail call. |
966 | if (any_of(Range: CallerF.args(), P: [](const Argument &A) { |
967 | return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr(); |
968 | })) { |
969 | LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, " |
970 | "inreg, or swifterror arguments\n" ); |
971 | return false; |
972 | } |
973 | |
974 | // Externally-defined functions with weak linkage should not be |
975 | // tail-called on AArch64 when the OS does not support dynamic |
976 | // pre-emption of symbols, as the AAELF spec requires normal calls |
977 | // to undefined weak functions to be replaced with a NOP or jump to the |
978 | // next instruction. The behaviour of branch instructions in this |
979 | // situation (as used for tail calls) is implementation-defined, so we |
980 | // cannot rely on the linker replacing the tail call with a return. |
981 | if (Info.Callee.isGlobal()) { |
982 | const GlobalValue *GV = Info.Callee.getGlobal(); |
983 | const Triple &TT = MF.getTarget().getTargetTriple(); |
984 | if (GV->hasExternalWeakLinkage() && |
985 | (!TT.isOSWindows() || TT.isOSBinFormatELF() || |
986 | TT.isOSBinFormatMachO())) { |
987 | LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function " |
988 | "with weak linkage for this OS.\n" ); |
989 | return false; |
990 | } |
991 | } |
992 | |
993 | // If we have -tailcallopt, then we're done. |
994 | if (canGuaranteeTCO(CC: CalleeCC, GuaranteeTailCalls: MF.getTarget().Options.GuaranteedTailCallOpt)) |
995 | return CalleeCC == CallerF.getCallingConv(); |
996 | |
997 | // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall). |
998 | // Try to find cases where we can do that. |
999 | |
1000 | // I want anyone implementing a new calling convention to think long and hard |
1001 | // about this assert. |
1002 | assert((!Info.IsVarArg || CalleeCC == CallingConv::C) && |
1003 | "Unexpected variadic calling convention" ); |
1004 | |
1005 | // Verify that the incoming and outgoing arguments from the callee are |
1006 | // safe to tail call. |
1007 | if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) { |
1008 | LLVM_DEBUG( |
1009 | dbgs() |
1010 | << "... Caller and callee have incompatible calling conventions.\n" ); |
1011 | return false; |
1012 | } |
1013 | |
1014 | if (!areCalleeOutgoingArgsTailCallable(Info, MF, OrigOutArgs&: OutArgs)) |
1015 | return false; |
1016 | |
1017 | LLVM_DEBUG( |
1018 | dbgs() << "... Call is eligible for tail call optimization.\n" ); |
1019 | return true; |
1020 | } |
1021 | |
1022 | static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, |
1023 | bool IsTailCall, |
1024 | std::optional<CallLowering::PtrAuthInfo> &PAI, |
1025 | MachineRegisterInfo &MRI) { |
1026 | const AArch64FunctionInfo *FuncInfo = CallerF.getInfo<AArch64FunctionInfo>(); |
1027 | |
1028 | if (!IsTailCall) { |
1029 | if (!PAI) |
1030 | return IsIndirect ? getBLRCallOpcode(MF: CallerF) : (unsigned)AArch64::BL; |
1031 | |
1032 | assert(IsIndirect && "Direct call should not be authenticated" ); |
1033 | assert((PAI->Key == AArch64PACKey::IA || PAI->Key == AArch64PACKey::IB) && |
1034 | "Invalid auth call key" ); |
1035 | return AArch64::BLRA; |
1036 | } |
1037 | |
1038 | if (!IsIndirect) |
1039 | return AArch64::TCRETURNdi; |
1040 | |
1041 | // When BTI or PAuthLR are enabled, there are restrictions on using x16 and |
1042 | // x17 to hold the function pointer. |
1043 | if (FuncInfo->branchTargetEnforcement()) { |
1044 | if (FuncInfo->branchProtectionPAuthLR()) { |
1045 | assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR" ); |
1046 | return AArch64::TCRETURNrix17; |
1047 | } |
1048 | if (PAI) |
1049 | return AArch64::AUTH_TCRETURN_BTI; |
1050 | return AArch64::TCRETURNrix16x17; |
1051 | } |
1052 | |
1053 | if (FuncInfo->branchProtectionPAuthLR()) { |
1054 | assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR" ); |
1055 | return AArch64::TCRETURNrinotx16; |
1056 | } |
1057 | |
1058 | if (PAI) |
1059 | return AArch64::AUTH_TCRETURN; |
1060 | return AArch64::TCRETURNri; |
1061 | } |
1062 | |
1063 | static const uint32_t * |
1064 | getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> &OutArgs, |
1065 | AArch64CallLowering::CallLoweringInfo &Info, |
1066 | const AArch64RegisterInfo &TRI, MachineFunction &MF) { |
1067 | const uint32_t *Mask; |
1068 | if (!OutArgs.empty() && OutArgs[0].Flags[0].isReturned()) { |
1069 | // For 'this' returns, use the X0-preserving mask if applicable |
1070 | Mask = TRI.getThisReturnPreservedMask(MF, Info.CallConv); |
1071 | if (!Mask) { |
1072 | OutArgs[0].Flags[0].setReturned(false); |
1073 | Mask = TRI.getCallPreservedMask(MF, Info.CallConv); |
1074 | } |
1075 | } else { |
1076 | Mask = TRI.getCallPreservedMask(MF, Info.CallConv); |
1077 | } |
1078 | return Mask; |
1079 | } |
1080 | |
1081 | bool AArch64CallLowering::lowerTailCall( |
1082 | MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, |
1083 | SmallVectorImpl<ArgInfo> &OutArgs) const { |
1084 | MachineFunction &MF = MIRBuilder.getMF(); |
1085 | const Function &F = MF.getFunction(); |
1086 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
1087 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
1088 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
1089 | |
1090 | // True when we're tail calling, but without -tailcallopt. |
1091 | bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt && |
1092 | Info.CallConv != CallingConv::Tail && |
1093 | Info.CallConv != CallingConv::SwiftTail; |
1094 | |
1095 | // Find out which ABI gets to decide where things go. |
1096 | CallingConv::ID CalleeCC = Info.CallConv; |
1097 | CCAssignFn *AssignFnFixed; |
1098 | CCAssignFn *AssignFnVarArg; |
1099 | std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) = getAssignFnsForCC(CC: CalleeCC, TLI); |
1100 | |
1101 | MachineInstrBuilder CallSeqStart; |
1102 | if (!IsSibCall) |
1103 | CallSeqStart = MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKDOWN); |
1104 | |
1105 | unsigned Opc = getCallOpcode(CallerF: MF, IsIndirect: Info.Callee.isReg(), IsTailCall: true, PAI&: Info.PAI, MRI); |
1106 | auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: Opc); |
1107 | MIB.add(MO: Info.Callee); |
1108 | |
1109 | // Tell the call which registers are clobbered. |
1110 | const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
1111 | auto TRI = Subtarget.getRegisterInfo(); |
1112 | |
1113 | // Byte offset for the tail call. When we are sibcalling, this will always |
1114 | // be 0. |
1115 | MIB.addImm(Val: 0); |
1116 | |
1117 | // Authenticated tail calls always take key/discriminator arguments. |
1118 | if (Opc == AArch64::AUTH_TCRETURN || Opc == AArch64::AUTH_TCRETURN_BTI) { |
1119 | assert((Info.PAI->Key == AArch64PACKey::IA || |
1120 | Info.PAI->Key == AArch64PACKey::IB) && |
1121 | "Invalid auth call key" ); |
1122 | MIB.addImm(Val: Info.PAI->Key); |
1123 | |
1124 | Register AddrDisc = 0; |
1125 | uint16_t IntDisc = 0; |
1126 | std::tie(args&: IntDisc, args&: AddrDisc) = |
1127 | extractPtrauthBlendDiscriminators(Disc: Info.PAI->Discriminator, MRI); |
1128 | |
1129 | MIB.addImm(Val: IntDisc); |
1130 | MIB.addUse(RegNo: AddrDisc); |
1131 | if (AddrDisc != AArch64::NoRegister) { |
1132 | MIB->getOperand(i: 4).setReg(constrainOperandRegClass( |
1133 | MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(), |
1134 | RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB, II: MIB->getDesc(), |
1135 | RegMO&: MIB->getOperand(i: 4), OpIdx: 4)); |
1136 | } |
1137 | } |
1138 | |
1139 | // Tell the call which registers are clobbered. |
1140 | const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC); |
1141 | if (Subtarget.hasCustomCallingConv()) |
1142 | TRI->UpdateCustomCallPreservedMask(MF, Mask: &Mask); |
1143 | MIB.addRegMask(Mask); |
1144 | |
1145 | if (Info.CFIType) |
1146 | MIB->setCFIType(MF, Type: Info.CFIType->getZExtValue()); |
1147 | |
1148 | if (TRI->isAnyArgRegReserved(MF)) |
1149 | TRI->emitReservedArgRegCallError(MF); |
1150 | |
1151 | // FPDiff is the byte offset of the call's argument area from the callee's. |
1152 | // Stores to callee stack arguments will be placed in FixedStackSlots offset |
1153 | // by this amount for a tail call. In a sibling call it must be 0 because the |
1154 | // caller will deallocate the entire stack and the callee still expects its |
1155 | // arguments to begin at SP+0. |
1156 | int FPDiff = 0; |
1157 | |
1158 | // This will be 0 for sibcalls, potentially nonzero for tail calls produced |
1159 | // by -tailcallopt. For sibcalls, the memory operands for the call are |
1160 | // already available in the caller's incoming argument space. |
1161 | unsigned NumBytes = 0; |
1162 | if (!IsSibCall) { |
1163 | // We aren't sibcalling, so we need to compute FPDiff. We need to do this |
1164 | // before handling assignments, because FPDiff must be known for memory |
1165 | // arguments. |
1166 | unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); |
1167 | SmallVector<CCValAssign, 16> OutLocs; |
1168 | CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext()); |
1169 | |
1170 | AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg, |
1171 | Subtarget, /*IsReturn*/ false); |
1172 | if (!determineAssignments(Assigner&: CalleeAssigner, Args&: OutArgs, CCInfo&: OutInfo)) |
1173 | return false; |
1174 | |
1175 | // The callee will pop the argument stack as a tail call. Thus, we must |
1176 | // keep it 16-byte aligned. |
1177 | NumBytes = alignTo(Value: OutInfo.getStackSize(), Align: 16); |
1178 | |
1179 | // FPDiff will be negative if this tail call requires more space than we |
1180 | // would automatically have in our incoming argument space. Positive if we |
1181 | // actually shrink the stack. |
1182 | FPDiff = NumReusableBytes - NumBytes; |
1183 | |
1184 | // Update the required reserved area if this is the tail call requiring the |
1185 | // most argument stack space. |
1186 | if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff) |
1187 | FuncInfo->setTailCallReservedStack(-FPDiff); |
1188 | |
1189 | // The stack pointer must be 16-byte aligned at all times it's used for a |
1190 | // memory operation, which in practice means at *all* times and in |
1191 | // particular across call boundaries. Therefore our own arguments started at |
1192 | // a 16-byte aligned SP and the delta applied for the tail call should |
1193 | // satisfy the same constraint. |
1194 | assert(FPDiff % 16 == 0 && "unaligned stack on tail call" ); |
1195 | } |
1196 | |
1197 | const auto &Forwards = FuncInfo->getForwardedMustTailRegParms(); |
1198 | |
1199 | AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg, |
1200 | Subtarget, /*IsReturn*/ false); |
1201 | |
1202 | // Do the actual argument marshalling. |
1203 | OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, |
1204 | /*IsTailCall*/ true, FPDiff); |
1205 | if (!determineAndHandleAssignments(Handler, Assigner, Args&: OutArgs, MIRBuilder, |
1206 | CallConv: CalleeCC, IsVarArg: Info.IsVarArg)) |
1207 | return false; |
1208 | |
1209 | Mask = getMaskForArgs(OutArgs, Info, TRI: *TRI, MF); |
1210 | |
1211 | if (Info.IsVarArg && Info.IsMustTailCall) { |
1212 | // Now we know what's being passed to the function. Add uses to the call for |
1213 | // the forwarded registers that we *aren't* passing as parameters. This will |
1214 | // preserve the copies we build earlier. |
1215 | for (const auto &F : Forwards) { |
1216 | Register ForwardedReg = F.PReg; |
1217 | // If the register is already passed, or aliases a register which is |
1218 | // already being passed, then skip it. |
1219 | if (any_of(Range: MIB->uses(), P: [&ForwardedReg, &TRI](const MachineOperand &Use) { |
1220 | if (!Use.isReg()) |
1221 | return false; |
1222 | return TRI->regsOverlap(RegA: Use.getReg(), RegB: ForwardedReg); |
1223 | })) |
1224 | continue; |
1225 | |
1226 | // We aren't passing it already, so we should add it to the call. |
1227 | MIRBuilder.buildCopy(Res: ForwardedReg, Op: Register(F.VReg)); |
1228 | MIB.addReg(RegNo: ForwardedReg, flags: RegState::Implicit); |
1229 | } |
1230 | } |
1231 | |
1232 | // If we have -tailcallopt, we need to adjust the stack. We'll do the call |
1233 | // sequence start and end here. |
1234 | if (!IsSibCall) { |
1235 | MIB->getOperand(i: 1).setImm(FPDiff); |
1236 | CallSeqStart.addImm(Val: 0).addImm(Val: 0); |
1237 | // End the call sequence *before* emitting the call. Normally, we would |
1238 | // tidy the frame up after the call. However, here, we've laid out the |
1239 | // parameters so that when SP is reset, they will be in the correct |
1240 | // location. |
1241 | MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKUP).addImm(Val: 0).addImm(Val: 0); |
1242 | } |
1243 | |
1244 | // Now we can add the actual call instruction to the correct basic block. |
1245 | MIRBuilder.insertInstr(MIB); |
1246 | |
1247 | // If Callee is a reg, since it is used by a target specific instruction, |
1248 | // it must have a register class matching the constraint of that instruction. |
1249 | if (MIB->getOperand(i: 0).isReg()) |
1250 | constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(), |
1251 | RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB, |
1252 | II: MIB->getDesc(), RegMO&: MIB->getOperand(i: 0), OpIdx: 0); |
1253 | |
1254 | MF.getFrameInfo().setHasTailCall(); |
1255 | Info.LoweredTailCall = true; |
1256 | return true; |
1257 | } |
1258 | |
1259 | bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, |
1260 | CallLoweringInfo &Info) const { |
1261 | MachineFunction &MF = MIRBuilder.getMF(); |
1262 | const Function &F = MF.getFunction(); |
1263 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
1264 | auto &DL = F.getDataLayout(); |
1265 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
1266 | const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
1267 | |
1268 | // Arm64EC has extra requirements for varargs calls; bail out for now. |
1269 | // |
1270 | // Arm64EC has special mangling rules for calls; bail out on all calls for |
1271 | // now. |
1272 | if (Subtarget.isWindowsArm64EC()) |
1273 | return false; |
1274 | |
1275 | // Arm64EC thunks have a special calling convention which is only implemented |
1276 | // in SelectionDAG; bail out for now. |
1277 | if (Info.CallConv == CallingConv::ARM64EC_Thunk_Native || |
1278 | Info.CallConv == CallingConv::ARM64EC_Thunk_X64) |
1279 | return false; |
1280 | |
1281 | SmallVector<ArgInfo, 8> OutArgs; |
1282 | for (auto &OrigArg : Info.OrigArgs) { |
1283 | splitToValueTypes(OrigArgInfo: OrigArg, SplitArgs&: OutArgs, DL, CallConv: Info.CallConv); |
1284 | // AAPCS requires that we zero-extend i1 to 8 bits by the caller. |
1285 | auto &Flags = OrigArg.Flags[0]; |
1286 | if (OrigArg.Ty->isIntegerTy(Bitwidth: 1) && !Flags.isSExt() && !Flags.isZExt()) { |
1287 | ArgInfo &OutArg = OutArgs.back(); |
1288 | assert(OutArg.Regs.size() == 1 && |
1289 | MRI.getType(OutArg.Regs[0]).getSizeInBits() == 1 && |
1290 | "Unexpected registers used for i1 arg" ); |
1291 | |
1292 | // We cannot use a ZExt ArgInfo flag here, because it will |
1293 | // zero-extend the argument to i32 instead of just i8. |
1294 | OutArg.Regs[0] = |
1295 | MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 8), Op: OutArg.Regs[0]).getReg(Idx: 0); |
1296 | LLVMContext &Ctx = MF.getFunction().getContext(); |
1297 | OutArg.Ty = Type::getInt8Ty(C&: Ctx); |
1298 | } |
1299 | } |
1300 | |
1301 | SmallVector<ArgInfo, 8> InArgs; |
1302 | if (!Info.OrigRet.Ty->isVoidTy()) |
1303 | splitToValueTypes(OrigArgInfo: Info.OrigRet, SplitArgs&: InArgs, DL, CallConv: Info.CallConv); |
1304 | |
1305 | // If we can lower as a tail call, do that instead. |
1306 | bool CanTailCallOpt = |
1307 | isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs); |
1308 | |
1309 | // We must emit a tail call if we have musttail. |
1310 | if (Info.IsMustTailCall && !CanTailCallOpt) { |
1311 | // There are types of incoming/outgoing arguments we can't handle yet, so |
1312 | // it doesn't make sense to actually die here like in ISelLowering. Instead, |
1313 | // fall back to SelectionDAG and let it try to handle this. |
1314 | LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n" ); |
1315 | return false; |
1316 | } |
1317 | |
1318 | Info.IsTailCall = CanTailCallOpt; |
1319 | if (CanTailCallOpt) |
1320 | return lowerTailCall(MIRBuilder, Info, OutArgs); |
1321 | |
1322 | // Find out which ABI gets to decide where things go. |
1323 | CCAssignFn *AssignFnFixed; |
1324 | CCAssignFn *AssignFnVarArg; |
1325 | std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) = |
1326 | getAssignFnsForCC(CC: Info.CallConv, TLI); |
1327 | |
1328 | MachineInstrBuilder CallSeqStart; |
1329 | CallSeqStart = MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKDOWN); |
1330 | |
1331 | // Create a temporarily-floating call instruction so we can add the implicit |
1332 | // uses of arg registers. |
1333 | |
1334 | unsigned Opc = 0; |
1335 | // Calls with operand bundle "clang.arc.attachedcall" are special. They should |
1336 | // be expanded to the call, directly followed by a special marker sequence and |
1337 | // a call to an ObjC library function. |
1338 | if (Info.CB && objcarc::hasAttachedCallOpBundle(CB: Info.CB)) |
1339 | Opc = Info.PAI ? AArch64::BLRA_RVMARKER : AArch64::BLR_RVMARKER; |
1340 | // A call to a returns twice function like setjmp must be followed by a bti |
1341 | // instruction. |
1342 | else if (Info.CB && Info.CB->hasFnAttr(Kind: Attribute::ReturnsTwice) && |
1343 | !Subtarget.noBTIAtReturnTwice() && |
1344 | MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) |
1345 | Opc = AArch64::BLR_BTI; |
1346 | else { |
1347 | // For an intrinsic call (e.g. memset), use GOT if "RtLibUseGOT" (-fno-plt) |
1348 | // is set. |
1349 | if (Info.Callee.isSymbol() && F.getParent()->getRtLibUseGOT()) { |
1350 | auto MIB = MIRBuilder.buildInstr(Opcode: TargetOpcode::G_GLOBAL_VALUE); |
1351 | DstOp(getLLTForType(Ty&: *F.getType(), DL)).addDefToMIB(MRI, MIB); |
1352 | MIB.addExternalSymbol(FnName: Info.Callee.getSymbolName(), TargetFlags: AArch64II::MO_GOT); |
1353 | Info.Callee = MachineOperand::CreateReg(Reg: MIB.getReg(Idx: 0), isDef: false); |
1354 | } |
1355 | Opc = getCallOpcode(CallerF: MF, IsIndirect: Info.Callee.isReg(), IsTailCall: false, PAI&: Info.PAI, MRI); |
1356 | } |
1357 | |
1358 | auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: Opc); |
1359 | unsigned CalleeOpNo = 0; |
1360 | |
1361 | if (Opc == AArch64::BLR_RVMARKER || Opc == AArch64::BLRA_RVMARKER) { |
1362 | // Add a target global address for the retainRV/claimRV runtime function |
1363 | // just before the call target. |
1364 | Function *ARCFn = *objcarc::getAttachedARCFunction(CB: Info.CB); |
1365 | MIB.addGlobalAddress(GV: ARCFn); |
1366 | ++CalleeOpNo; |
1367 | } else if (Info.CFIType) { |
1368 | MIB->setCFIType(MF, Type: Info.CFIType->getZExtValue()); |
1369 | } |
1370 | |
1371 | MIB.add(MO: Info.Callee); |
1372 | |
1373 | // Tell the call which registers are clobbered. |
1374 | const uint32_t *Mask; |
1375 | const auto *TRI = Subtarget.getRegisterInfo(); |
1376 | |
1377 | AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg, |
1378 | Subtarget, /*IsReturn*/ false); |
1379 | // Do the actual argument marshalling. |
1380 | OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, /*IsReturn*/ false); |
1381 | if (!determineAndHandleAssignments(Handler, Assigner, Args&: OutArgs, MIRBuilder, |
1382 | CallConv: Info.CallConv, IsVarArg: Info.IsVarArg)) |
1383 | return false; |
1384 | |
1385 | Mask = getMaskForArgs(OutArgs, Info, TRI: *TRI, MF); |
1386 | |
1387 | if (Opc == AArch64::BLRA || Opc == AArch64::BLRA_RVMARKER) { |
1388 | assert((Info.PAI->Key == AArch64PACKey::IA || |
1389 | Info.PAI->Key == AArch64PACKey::IB) && |
1390 | "Invalid auth call key" ); |
1391 | MIB.addImm(Val: Info.PAI->Key); |
1392 | |
1393 | Register AddrDisc = 0; |
1394 | uint16_t IntDisc = 0; |
1395 | std::tie(args&: IntDisc, args&: AddrDisc) = |
1396 | extractPtrauthBlendDiscriminators(Disc: Info.PAI->Discriminator, MRI); |
1397 | |
1398 | MIB.addImm(Val: IntDisc); |
1399 | MIB.addUse(RegNo: AddrDisc); |
1400 | if (AddrDisc != AArch64::NoRegister) { |
1401 | constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(), |
1402 | RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB, |
1403 | II: MIB->getDesc(), RegMO&: MIB->getOperand(i: CalleeOpNo + 3), |
1404 | OpIdx: CalleeOpNo + 3); |
1405 | } |
1406 | } |
1407 | |
1408 | // Tell the call which registers are clobbered. |
1409 | if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) |
1410 | TRI->UpdateCustomCallPreservedMask(MF, Mask: &Mask); |
1411 | MIB.addRegMask(Mask); |
1412 | |
1413 | if (TRI->isAnyArgRegReserved(MF)) |
1414 | TRI->emitReservedArgRegCallError(MF); |
1415 | |
1416 | // Now we can add the actual call instruction to the correct basic block. |
1417 | MIRBuilder.insertInstr(MIB); |
1418 | |
1419 | uint64_t CalleePopBytes = |
1420 | doesCalleeRestoreStack(CallConv: Info.CallConv, |
1421 | TailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt) |
1422 | ? alignTo(Value: Assigner.StackSize, Align: 16) |
1423 | : 0; |
1424 | |
1425 | CallSeqStart.addImm(Val: Assigner.StackSize).addImm(Val: 0); |
1426 | MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKUP) |
1427 | .addImm(Val: Assigner.StackSize) |
1428 | .addImm(Val: CalleePopBytes); |
1429 | |
1430 | // If Callee is a reg, since it is used by a target specific |
1431 | // instruction, it must have a register class matching the |
1432 | // constraint of that instruction. |
1433 | if (MIB->getOperand(i: CalleeOpNo).isReg()) |
1434 | constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *Subtarget.getInstrInfo(), |
1435 | RBI: *Subtarget.getRegBankInfo(), InsertPt&: *MIB, II: MIB->getDesc(), |
1436 | RegMO&: MIB->getOperand(i: CalleeOpNo), OpIdx: CalleeOpNo); |
1437 | |
1438 | // Finally we can copy the returned value back into its virtual-register. In |
1439 | // symmetry with the arguments, the physical register must be an |
1440 | // implicit-define of the call instruction. |
1441 | if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) { |
1442 | CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(CC: Info.CallConv); |
1443 | CallReturnHandler Handler(MIRBuilder, MRI, MIB); |
1444 | bool UsingReturnedArg = |
1445 | !OutArgs.empty() && OutArgs[0].Flags[0].isReturned(); |
1446 | |
1447 | AArch64OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn, Subtarget, |
1448 | /*IsReturn*/ false); |
1449 | ReturnedArgCallReturnHandler ReturnedArgHandler(MIRBuilder, MRI, MIB); |
1450 | if (!determineAndHandleAssignments( |
1451 | Handler&: UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, Args&: InArgs, |
1452 | MIRBuilder, CallConv: Info.CallConv, IsVarArg: Info.IsVarArg, |
1453 | ThisReturnRegs: UsingReturnedArg ? ArrayRef(OutArgs[0].Regs) : std::nullopt)) |
1454 | return false; |
1455 | } |
1456 | |
1457 | if (Info.SwiftErrorVReg) { |
1458 | MIB.addDef(RegNo: AArch64::X21, Flags: RegState::Implicit); |
1459 | MIRBuilder.buildCopy(Res: Info.SwiftErrorVReg, Op: Register(AArch64::X21)); |
1460 | } |
1461 | |
1462 | if (!Info.CanLowerReturn) { |
1463 | insertSRetLoads(MIRBuilder, RetTy: Info.OrigRet.Ty, VRegs: Info.OrigRet.Regs, |
1464 | DemoteReg: Info.DemoteRegister, FI: Info.DemoteStackIndex); |
1465 | } |
1466 | return true; |
1467 | } |
1468 | |
1469 | bool AArch64CallLowering::isTypeIsValidForThisReturn(EVT Ty) const { |
1470 | return Ty.getSizeInBits() == 64; |
1471 | } |
1472 | |