1 | //===--- AArch64CallLowering.cpp - Call lowering --------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file implements the lowering of LLVM calls to machine code calls for |
11 | /// GlobalISel. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "AArch64CallLowering.h" |
16 | #include "AArch64GlobalISelUtils.h" |
17 | #include "AArch64ISelLowering.h" |
18 | #include "AArch64MachineFunctionInfo.h" |
19 | #include "AArch64RegisterInfo.h" |
20 | #include "AArch64Subtarget.h" |
21 | #include "Utils/AArch64SMEAttributes.h" |
22 | #include "llvm/ADT/ArrayRef.h" |
23 | #include "llvm/ADT/SmallVector.h" |
24 | #include "llvm/Analysis/ObjCARCUtil.h" |
25 | #include "llvm/CodeGen/Analysis.h" |
26 | #include "llvm/CodeGen/CallingConvLower.h" |
27 | #include "llvm/CodeGen/FunctionLoweringInfo.h" |
28 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
29 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
30 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
31 | #include "llvm/CodeGen/LowLevelTypeUtils.h" |
32 | #include "llvm/CodeGen/MachineBasicBlock.h" |
33 | #include "llvm/CodeGen/MachineFrameInfo.h" |
34 | #include "llvm/CodeGen/MachineFunction.h" |
35 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
36 | #include "llvm/CodeGen/MachineMemOperand.h" |
37 | #include "llvm/CodeGen/MachineOperand.h" |
38 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
39 | #include "llvm/CodeGen/TargetOpcodes.h" |
40 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
41 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
42 | #include "llvm/CodeGen/ValueTypes.h" |
43 | #include "llvm/CodeGenTypes/MachineValueType.h" |
44 | #include "llvm/IR/Argument.h" |
45 | #include "llvm/IR/Attributes.h" |
46 | #include "llvm/IR/Function.h" |
47 | #include "llvm/IR/Type.h" |
48 | #include "llvm/IR/Value.h" |
49 | #include <algorithm> |
50 | #include <cassert> |
51 | #include <cstdint> |
52 | |
53 | #define DEBUG_TYPE "aarch64-call-lowering" |
54 | |
55 | using namespace llvm; |
56 | using namespace AArch64GISelUtils; |
57 | |
58 | extern cl::opt<bool> EnableSVEGISel; |
59 | |
60 | AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI) |
61 | : CallLowering(&TLI) {} |
62 | |
63 | static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT, |
64 | MVT &LocVT) { |
65 | // If ValVT is i1/i8/i16, we should set LocVT to i8/i8/i16. This is a legacy |
66 | // hack because the DAG calls the assignment function with pre-legalized |
67 | // register typed values, not the raw type. |
68 | // |
69 | // This hack is not applied to return values which are not passed on the |
70 | // stack. |
71 | if (OrigVT == MVT::i1 || OrigVT == MVT::i8) |
72 | ValVT = LocVT = MVT::i8; |
73 | else if (OrigVT == MVT::i16) |
74 | ValVT = LocVT = MVT::i16; |
75 | } |
76 | |
77 | // Account for i1/i8/i16 stack passed value hack |
78 | static LLT getStackValueStoreTypeHack(const CCValAssign &VA) { |
79 | const MVT ValVT = VA.getValVT(); |
80 | return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT) |
81 | : LLT(VA.getLocVT()); |
82 | } |
83 | |
84 | namespace { |
85 | |
86 | struct AArch64IncomingValueAssigner |
87 | : public CallLowering::IncomingValueAssigner { |
88 | AArch64IncomingValueAssigner(CCAssignFn *AssignFn_, |
89 | CCAssignFn *AssignFnVarArg_) |
90 | : IncomingValueAssigner(AssignFn_, AssignFnVarArg_) {} |
91 | |
92 | bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT, |
93 | CCValAssign::LocInfo LocInfo, |
94 | const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, |
95 | CCState &State) override { |
96 | applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT); |
97 | return IncomingValueAssigner::assignArg(ValNo, OrigVT, ValVT, LocVT, |
98 | LocInfo, Info, Flags, State); |
99 | } |
100 | }; |
101 | |
102 | struct AArch64OutgoingValueAssigner |
103 | : public CallLowering::OutgoingValueAssigner { |
104 | const AArch64Subtarget &Subtarget; |
105 | |
106 | /// Track if this is used for a return instead of function argument |
107 | /// passing. We apply a hack to i1/i8/i16 stack passed values, but do not use |
108 | /// stack passed returns for them and cannot apply the type adjustment. |
109 | bool IsReturn; |
110 | |
111 | AArch64OutgoingValueAssigner(CCAssignFn *AssignFn_, |
112 | CCAssignFn *AssignFnVarArg_, |
113 | const AArch64Subtarget &Subtarget_, |
114 | bool IsReturn) |
115 | : OutgoingValueAssigner(AssignFn_, AssignFnVarArg_), |
116 | Subtarget(Subtarget_), IsReturn(IsReturn) {} |
117 | |
118 | bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT, |
119 | CCValAssign::LocInfo LocInfo, |
120 | const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, |
121 | CCState &State) override { |
122 | const Function &F = State.getMachineFunction().getFunction(); |
123 | bool IsCalleeWin = |
124 | Subtarget.isCallingConvWin64(CC: State.getCallingConv(), IsVarArg: F.isVarArg()); |
125 | bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg(); |
126 | |
127 | bool Res; |
128 | if (Info.IsFixed && !UseVarArgsCCForFixed) { |
129 | if (!IsReturn) |
130 | applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT); |
131 | Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); |
132 | } else |
133 | Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State); |
134 | |
135 | StackSize = State.getStackSize(); |
136 | return Res; |
137 | } |
138 | }; |
139 | |
140 | struct IncomingArgHandler : public CallLowering::IncomingValueHandler { |
141 | IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) |
142 | : IncomingValueHandler(MIRBuilder, MRI) {} |
143 | |
144 | Register getStackAddress(uint64_t Size, int64_t Offset, |
145 | MachinePointerInfo &MPO, |
146 | ISD::ArgFlagsTy Flags) override { |
147 | auto &MFI = MIRBuilder.getMF().getFrameInfo(); |
148 | |
149 | // Byval is assumed to be writable memory, but other stack passed arguments |
150 | // are not. |
151 | const bool IsImmutable = !Flags.isByVal(); |
152 | |
153 | int FI = MFI.CreateFixedObject(Size, SPOffset: Offset, IsImmutable); |
154 | MPO = MachinePointerInfo::getFixedStack(MF&: MIRBuilder.getMF(), FI); |
155 | auto AddrReg = MIRBuilder.buildFrameIndex(Res: LLT::pointer(AddressSpace: 0, SizeInBits: 64), Idx: FI); |
156 | return AddrReg.getReg(Idx: 0); |
157 | } |
158 | |
159 | LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA, |
160 | ISD::ArgFlagsTy Flags) const override { |
161 | // For pointers, we just need to fixup the integer types reported in the |
162 | // CCValAssign. |
163 | if (Flags.isPointer()) |
164 | return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags); |
165 | return getStackValueStoreTypeHack(VA); |
166 | } |
167 | |
168 | void assignValueToReg(Register ValVReg, Register PhysReg, |
169 | const CCValAssign &VA) override { |
170 | markRegUsed(Reg: PhysReg); |
171 | IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA); |
172 | } |
173 | |
174 | void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, |
175 | const MachinePointerInfo &MPO, |
176 | const CCValAssign &VA) override { |
177 | MachineFunction &MF = MIRBuilder.getMF(); |
178 | |
179 | LLT ValTy(VA.getValVT()); |
180 | LLT LocTy(VA.getLocVT()); |
181 | |
182 | // Fixup the types for the DAG compatibility hack. |
183 | if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) |
184 | std::swap(a&: ValTy, b&: LocTy); |
185 | else { |
186 | // The calling code knows if this is a pointer or not, we're only touching |
187 | // the LocTy for the i8/i16 hack. |
188 | assert(LocTy.getSizeInBits() == MemTy.getSizeInBits()); |
189 | LocTy = MemTy; |
190 | } |
191 | |
192 | auto MMO = MF.getMachineMemOperand( |
193 | PtrInfo: MPO, f: MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, MemTy: LocTy, |
194 | base_alignment: inferAlignFromPtrInfo(MF, MPO)); |
195 | |
196 | switch (VA.getLocInfo()) { |
197 | case CCValAssign::LocInfo::ZExt: |
198 | MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_ZEXTLOAD, Res: ValVReg, Addr, MMO&: *MMO); |
199 | return; |
200 | case CCValAssign::LocInfo::SExt: |
201 | MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_SEXTLOAD, Res: ValVReg, Addr, MMO&: *MMO); |
202 | return; |
203 | default: |
204 | MIRBuilder.buildLoad(Res: ValVReg, Addr, MMO&: *MMO); |
205 | return; |
206 | } |
207 | } |
208 | |
209 | /// How the physical register gets marked varies between formal |
210 | /// parameters (it's a basic-block live-in), and a call instruction |
211 | /// (it's an implicit-def of the BL). |
212 | virtual void markRegUsed(Register Reg) = 0; |
213 | }; |
214 | |
215 | struct FormalArgHandler : public IncomingArgHandler { |
216 | FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) |
217 | : IncomingArgHandler(MIRBuilder, MRI) {} |
218 | |
219 | void markRegUsed(Register Reg) override { |
220 | MIRBuilder.getMRI()->addLiveIn(Reg: Reg.asMCReg()); |
221 | MIRBuilder.getMBB().addLiveIn(PhysReg: Reg.asMCReg()); |
222 | } |
223 | }; |
224 | |
225 | struct CallReturnHandler : public IncomingArgHandler { |
226 | CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, |
227 | MachineInstrBuilder MIB) |
228 | : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {} |
229 | |
230 | void markRegUsed(Register Reg) override { |
231 | MIB.addDef(RegNo: Reg, Flags: RegState::Implicit); |
232 | } |
233 | |
234 | MachineInstrBuilder MIB; |
235 | }; |
236 | |
237 | /// A special return arg handler for "returned" attribute arg calls. |
238 | struct ReturnedArgCallReturnHandler : public CallReturnHandler { |
239 | ReturnedArgCallReturnHandler(MachineIRBuilder &MIRBuilder, |
240 | MachineRegisterInfo &MRI, |
241 | MachineInstrBuilder MIB) |
242 | : CallReturnHandler(MIRBuilder, MRI, MIB) {} |
243 | |
244 | void markRegUsed(Register Reg) override {} |
245 | }; |
246 | |
247 | struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { |
248 | OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, |
249 | MachineInstrBuilder MIB, bool IsTailCall = false, |
250 | int FPDiff = 0) |
251 | : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), IsTailCall(IsTailCall), |
252 | FPDiff(FPDiff), |
253 | Subtarget(MIRBuilder.getMF().getSubtarget<AArch64Subtarget>()) {} |
254 | |
255 | Register getStackAddress(uint64_t Size, int64_t Offset, |
256 | MachinePointerInfo &MPO, |
257 | ISD::ArgFlagsTy Flags) override { |
258 | MachineFunction &MF = MIRBuilder.getMF(); |
259 | LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64); |
260 | LLT s64 = LLT::scalar(SizeInBits: 64); |
261 | |
262 | if (IsTailCall) { |
263 | assert(!Flags.isByVal() && "byval unhandled with tail calls" ); |
264 | |
265 | Offset += FPDiff; |
266 | int FI = MF.getFrameInfo().CreateFixedObject(Size, SPOffset: Offset, IsImmutable: true); |
267 | auto FIReg = MIRBuilder.buildFrameIndex(Res: p0, Idx: FI); |
268 | MPO = MachinePointerInfo::getFixedStack(MF, FI); |
269 | return FIReg.getReg(Idx: 0); |
270 | } |
271 | |
272 | if (!SPReg) |
273 | SPReg = MIRBuilder.buildCopy(Res: p0, Op: Register(AArch64::SP)).getReg(Idx: 0); |
274 | |
275 | auto OffsetReg = MIRBuilder.buildConstant(Res: s64, Val: Offset); |
276 | |
277 | auto AddrReg = MIRBuilder.buildPtrAdd(Res: p0, Op0: SPReg, Op1: OffsetReg); |
278 | |
279 | MPO = MachinePointerInfo::getStack(MF, Offset); |
280 | return AddrReg.getReg(Idx: 0); |
281 | } |
282 | |
283 | /// We need to fixup the reported store size for certain value types because |
284 | /// we invert the interpretation of ValVT and LocVT in certain cases. This is |
285 | /// for compatibility with the DAG call lowering implementation, which we're |
286 | /// currently building on top of. |
287 | LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA, |
288 | ISD::ArgFlagsTy Flags) const override { |
289 | if (Flags.isPointer()) |
290 | return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags); |
291 | return getStackValueStoreTypeHack(VA); |
292 | } |
293 | |
294 | void assignValueToReg(Register ValVReg, Register PhysReg, |
295 | const CCValAssign &VA) override { |
296 | MIB.addUse(RegNo: PhysReg, Flags: RegState::Implicit); |
297 | Register ExtReg = extendRegister(ValReg: ValVReg, VA); |
298 | MIRBuilder.buildCopy(Res: PhysReg, Op: ExtReg); |
299 | } |
300 | |
301 | /// Check whether a stack argument requires lowering in a tail call. |
302 | static bool shouldLowerTailCallStackArg(const MachineFunction &MF, |
303 | const CCValAssign &VA, |
304 | Register ValVReg, |
305 | Register StoreAddr) { |
306 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
307 | // Print the defining instruction for the value. |
308 | auto *DefMI = MRI.getVRegDef(Reg: ValVReg); |
309 | assert(DefMI && "No defining instruction" ); |
310 | for (;;) { |
311 | // Look through nodes that don't alter the bits of the incoming value. |
312 | unsigned Op = DefMI->getOpcode(); |
313 | if (Op == TargetOpcode::G_ZEXT || Op == TargetOpcode::G_ANYEXT || |
314 | Op == TargetOpcode::G_BITCAST || isAssertMI(MI: *DefMI)) { |
315 | DefMI = MRI.getVRegDef(Reg: DefMI->getOperand(i: 1).getReg()); |
316 | continue; |
317 | } |
318 | break; |
319 | } |
320 | |
321 | auto *Load = dyn_cast<GLoad>(Val: DefMI); |
322 | if (!Load) |
323 | return true; |
324 | Register LoadReg = Load->getPointerReg(); |
325 | auto *LoadAddrDef = MRI.getVRegDef(Reg: LoadReg); |
326 | if (LoadAddrDef->getOpcode() != TargetOpcode::G_FRAME_INDEX) |
327 | return true; |
328 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
329 | int LoadFI = LoadAddrDef->getOperand(i: 1).getIndex(); |
330 | |
331 | auto *StoreAddrDef = MRI.getVRegDef(Reg: StoreAddr); |
332 | if (StoreAddrDef->getOpcode() != TargetOpcode::G_FRAME_INDEX) |
333 | return true; |
334 | int StoreFI = StoreAddrDef->getOperand(i: 1).getIndex(); |
335 | |
336 | if (!MFI.isImmutableObjectIndex(ObjectIdx: LoadFI)) |
337 | return true; |
338 | if (MFI.getObjectOffset(ObjectIdx: LoadFI) != MFI.getObjectOffset(ObjectIdx: StoreFI)) |
339 | return true; |
340 | if (Load->getMemSize() != MFI.getObjectSize(ObjectIdx: StoreFI)) |
341 | return true; |
342 | |
343 | return false; |
344 | } |
345 | |
346 | void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, |
347 | const MachinePointerInfo &MPO, |
348 | const CCValAssign &VA) override { |
349 | MachineFunction &MF = MIRBuilder.getMF(); |
350 | if (!FPDiff && !shouldLowerTailCallStackArg(MF, VA, ValVReg, StoreAddr: Addr)) |
351 | return; |
352 | auto MMO = MF.getMachineMemOperand(PtrInfo: MPO, f: MachineMemOperand::MOStore, MemTy, |
353 | base_alignment: inferAlignFromPtrInfo(MF, MPO)); |
354 | MIRBuilder.buildStore(Val: ValVReg, Addr, MMO&: *MMO); |
355 | } |
356 | |
357 | void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex, |
358 | Register Addr, LLT MemTy, |
359 | const MachinePointerInfo &MPO, |
360 | const CCValAssign &VA) override { |
361 | unsigned MaxSize = MemTy.getSizeInBytes() * 8; |
362 | // For varargs, we always want to extend them to 8 bytes, in which case |
363 | // we disable setting a max. |
364 | if (!Arg.IsFixed) |
365 | MaxSize = 0; |
366 | |
367 | Register ValVReg = Arg.Regs[RegIndex]; |
368 | if (VA.getLocInfo() != CCValAssign::LocInfo::FPExt) { |
369 | MVT LocVT = VA.getLocVT(); |
370 | MVT ValVT = VA.getValVT(); |
371 | |
372 | if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) { |
373 | std::swap(a&: ValVT, b&: LocVT); |
374 | MemTy = LLT(VA.getValVT()); |
375 | } |
376 | |
377 | ValVReg = extendRegister(ValReg: ValVReg, VA, MaxSizeBits: MaxSize); |
378 | } else { |
379 | // The store does not cover the full allocated stack slot. |
380 | MemTy = LLT(VA.getValVT()); |
381 | } |
382 | |
383 | assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA); |
384 | } |
385 | |
386 | MachineInstrBuilder MIB; |
387 | |
388 | bool IsTailCall; |
389 | |
390 | /// For tail calls, the byte offset of the call's argument area from the |
391 | /// callee's. Unused elsewhere. |
392 | int FPDiff; |
393 | |
394 | // Cache the SP register vreg if we need it more than once in this call site. |
395 | Register SPReg; |
396 | |
397 | const AArch64Subtarget &Subtarget; |
398 | }; |
399 | } // namespace |
400 | |
401 | static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) { |
402 | return (CallConv == CallingConv::Fast && TailCallOpt) || |
403 | CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; |
404 | } |
405 | |
406 | bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, |
407 | const Value *Val, |
408 | ArrayRef<Register> VRegs, |
409 | FunctionLoweringInfo &FLI, |
410 | Register SwiftErrorVReg) const { |
411 | auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: AArch64::RET_ReallyLR); |
412 | assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) && |
413 | "Return value without a vreg" ); |
414 | |
415 | bool Success = true; |
416 | if (!FLI.CanLowerReturn) { |
417 | insertSRetStores(MIRBuilder, RetTy: Val->getType(), VRegs, DemoteReg: FLI.DemoteRegister); |
418 | } else if (!VRegs.empty()) { |
419 | MachineFunction &MF = MIRBuilder.getMF(); |
420 | const Function &F = MF.getFunction(); |
421 | const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
422 | |
423 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
424 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
425 | CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC: F.getCallingConv()); |
426 | auto &DL = F.getDataLayout(); |
427 | LLVMContext &Ctx = Val->getType()->getContext(); |
428 | |
429 | SmallVector<EVT, 4> SplitEVTs; |
430 | ComputeValueVTs(TLI, DL, Ty: Val->getType(), ValueVTs&: SplitEVTs); |
431 | assert(VRegs.size() == SplitEVTs.size() && |
432 | "For each split Type there should be exactly one VReg." ); |
433 | |
434 | SmallVector<ArgInfo, 8> SplitArgs; |
435 | CallingConv::ID CC = F.getCallingConv(); |
436 | |
437 | for (unsigned i = 0; i < SplitEVTs.size(); ++i) { |
438 | Register CurVReg = VRegs[i]; |
439 | ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Context&: Ctx), 0}; |
440 | setArgFlags(Arg&: CurArgInfo, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: F); |
441 | |
442 | // i1 is a special case because SDAG i1 true is naturally zero extended |
443 | // when widened using ANYEXT. We need to do it explicitly here. |
444 | auto &Flags = CurArgInfo.Flags[0]; |
445 | if (MRI.getType(Reg: CurVReg).getSizeInBits() == TypeSize::getFixed(ExactSize: 1) && |
446 | !Flags.isSExt() && !Flags.isZExt()) { |
447 | CurVReg = MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 8), Op: CurVReg).getReg(Idx: 0); |
448 | } else if (TLI.getNumRegistersForCallingConv(Context&: Ctx, CC, VT: SplitEVTs[i]) == |
449 | 1) { |
450 | // Some types will need extending as specified by the CC. |
451 | MVT NewVT = TLI.getRegisterTypeForCallingConv(Context&: Ctx, CC, VT: SplitEVTs[i]); |
452 | if (EVT(NewVT) != SplitEVTs[i]) { |
453 | unsigned ExtendOp = TargetOpcode::G_ANYEXT; |
454 | if (F.getAttributes().hasRetAttr(Kind: Attribute::SExt)) |
455 | ExtendOp = TargetOpcode::G_SEXT; |
456 | else if (F.getAttributes().hasRetAttr(Kind: Attribute::ZExt)) |
457 | ExtendOp = TargetOpcode::G_ZEXT; |
458 | |
459 | LLT NewLLT(NewVT); |
460 | LLT OldLLT = getLLTForType(Ty&: *CurArgInfo.Ty, DL); |
461 | CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Context&: Ctx); |
462 | // Instead of an extend, we might have a vector type which needs |
463 | // padding with more elements, e.g. <2 x half> -> <4 x half>. |
464 | if (NewVT.isVector()) { |
465 | if (OldLLT.isVector()) { |
466 | if (NewLLT.getNumElements() > OldLLT.getNumElements()) { |
467 | CurVReg = |
468 | MIRBuilder.buildPadVectorWithUndefElements(Res: NewLLT, Op0: CurVReg) |
469 | .getReg(Idx: 0); |
470 | } else { |
471 | // Just do a vector extend. |
472 | CurVReg = MIRBuilder.buildInstr(Opc: ExtendOp, DstOps: {NewLLT}, SrcOps: {CurVReg}) |
473 | .getReg(Idx: 0); |
474 | } |
475 | } else if (NewLLT.getNumElements() >= 2 && |
476 | NewLLT.getNumElements() <= 8) { |
477 | // We need to pad a <1 x S> type to <2/4/8 x S>. Since we don't |
478 | // have <1 x S> vector types in GISel we use a build_vector |
479 | // instead of a vector merge/concat. |
480 | CurVReg = |
481 | MIRBuilder.buildPadVectorWithUndefElements(Res: NewLLT, Op0: CurVReg) |
482 | .getReg(Idx: 0); |
483 | } else { |
484 | LLVM_DEBUG(dbgs() << "Could not handle ret ty\n" ); |
485 | return false; |
486 | } |
487 | } else { |
488 | // If the split EVT was a <1 x T> vector, and NewVT is T, then we |
489 | // don't have to do anything since we don't distinguish between the |
490 | // two. |
491 | if (NewLLT != MRI.getType(Reg: CurVReg)) { |
492 | // A scalar extend. |
493 | CurVReg = MIRBuilder.buildInstr(Opc: ExtendOp, DstOps: {NewLLT}, SrcOps: {CurVReg}) |
494 | .getReg(Idx: 0); |
495 | } |
496 | } |
497 | } |
498 | } |
499 | if (CurVReg != CurArgInfo.Regs[0]) { |
500 | CurArgInfo.Regs[0] = CurVReg; |
501 | // Reset the arg flags after modifying CurVReg. |
502 | setArgFlags(Arg&: CurArgInfo, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: F); |
503 | } |
504 | splitToValueTypes(OrigArgInfo: CurArgInfo, SplitArgs, DL, CallConv: CC); |
505 | } |
506 | |
507 | AArch64OutgoingValueAssigner Assigner(AssignFn, AssignFn, Subtarget, |
508 | /*IsReturn*/ true); |
509 | OutgoingArgHandler Handler(MIRBuilder, MRI, MIB); |
510 | Success = determineAndHandleAssignments(Handler, Assigner, Args&: SplitArgs, |
511 | MIRBuilder, CallConv: CC, IsVarArg: F.isVarArg()); |
512 | } |
513 | |
514 | if (SwiftErrorVReg) { |
515 | MIB.addUse(RegNo: AArch64::X21, Flags: RegState::Implicit); |
516 | MIRBuilder.buildCopy(Res: AArch64::X21, Op: SwiftErrorVReg); |
517 | } |
518 | |
519 | MIRBuilder.insertInstr(MIB); |
520 | return Success; |
521 | } |
522 | |
523 | bool AArch64CallLowering::canLowerReturn(MachineFunction &MF, |
524 | CallingConv::ID CallConv, |
525 | SmallVectorImpl<BaseArgInfo> &Outs, |
526 | bool IsVarArg) const { |
527 | SmallVector<CCValAssign, 16> ArgLocs; |
528 | const auto &TLI = *getTLI<AArch64TargetLowering>(); |
529 | CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, |
530 | MF.getFunction().getContext()); |
531 | |
532 | return checkReturn(CCInfo, Outs, Fn: TLI.CCAssignFnForReturn(CC: CallConv)); |
533 | } |
534 | |
535 | /// Helper function to compute forwarded registers for musttail calls. Computes |
536 | /// the forwarded registers, sets MBB liveness, and emits COPY instructions that |
537 | /// can be used to save + restore registers later. |
538 | static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder, |
539 | CCAssignFn *AssignFn) { |
540 | MachineBasicBlock &MBB = MIRBuilder.getMBB(); |
541 | MachineFunction &MF = MIRBuilder.getMF(); |
542 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
543 | |
544 | if (!MFI.hasMustTailInVarArgFunc()) |
545 | return; |
546 | |
547 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
548 | const Function &F = MF.getFunction(); |
549 | assert(F.isVarArg() && "Expected F to be vararg?" ); |
550 | |
551 | // Compute the set of forwarded registers. The rest are scratch. |
552 | SmallVector<CCValAssign, 16> ArgLocs; |
553 | CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs, |
554 | F.getContext()); |
555 | SmallVector<MVT, 2> RegParmTypes; |
556 | RegParmTypes.push_back(Elt: MVT::i64); |
557 | RegParmTypes.push_back(Elt: MVT::f128); |
558 | |
559 | // Later on, we can use this vector to restore the registers if necessary. |
560 | SmallVectorImpl<ForwardedRegister> &Forwards = |
561 | FuncInfo->getForwardedMustTailRegParms(); |
562 | CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, Fn: AssignFn); |
563 | |
564 | // Conservatively forward X8, since it might be used for an aggregate |
565 | // return. |
566 | if (!CCInfo.isAllocated(Reg: AArch64::X8)) { |
567 | Register X8VReg = MF.addLiveIn(PReg: AArch64::X8, RC: &AArch64::GPR64RegClass); |
568 | Forwards.push_back(Elt: ForwardedRegister(X8VReg, AArch64::X8, MVT::i64)); |
569 | } |
570 | |
571 | // Add the forwards to the MachineBasicBlock and MachineFunction. |
572 | for (const auto &F : Forwards) { |
573 | MBB.addLiveIn(PhysReg: F.PReg); |
574 | MIRBuilder.buildCopy(Res: Register(F.VReg), Op: Register(F.PReg)); |
575 | } |
576 | } |
577 | |
578 | bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const { |
579 | auto &F = MF.getFunction(); |
580 | if (!EnableSVEGISel && (F.getReturnType()->isScalableTy() || |
581 | llvm::any_of(Range: F.args(), P: [](const Argument &A) { |
582 | return A.getType()->isScalableTy(); |
583 | }))) |
584 | return true; |
585 | const auto &ST = MF.getSubtarget<AArch64Subtarget>(); |
586 | if (!ST.hasNEON() || !ST.hasFPARMv8()) { |
587 | LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n" ); |
588 | return true; |
589 | } |
590 | |
591 | SMEAttrs Attrs = MF.getInfo<AArch64FunctionInfo>()->getSMEFnAttrs(); |
592 | if (Attrs.hasZAState() || Attrs.hasZT0State() || |
593 | Attrs.hasStreamingInterfaceOrBody() || |
594 | Attrs.hasStreamingCompatibleInterface()) |
595 | return true; |
596 | |
597 | return false; |
598 | } |
599 | |
600 | void AArch64CallLowering::saveVarArgRegisters( |
601 | MachineIRBuilder &MIRBuilder, CallLowering::IncomingValueHandler &Handler, |
602 | CCState &CCInfo) const { |
603 | auto GPRArgRegs = AArch64::getGPRArgRegs(); |
604 | auto FPRArgRegs = AArch64::getFPRArgRegs(); |
605 | |
606 | MachineFunction &MF = MIRBuilder.getMF(); |
607 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
608 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
609 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
610 | auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
611 | bool IsWin64CC = Subtarget.isCallingConvWin64(CC: CCInfo.getCallingConv(), |
612 | IsVarArg: MF.getFunction().isVarArg()); |
613 | const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64); |
614 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
615 | |
616 | unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(Regs: GPRArgRegs); |
617 | unsigned NumVariadicGPRArgRegs = GPRArgRegs.size() - FirstVariadicGPR + 1; |
618 | |
619 | unsigned GPRSaveSize = 8 * (GPRArgRegs.size() - FirstVariadicGPR); |
620 | int GPRIdx = 0; |
621 | if (GPRSaveSize != 0) { |
622 | if (IsWin64CC) { |
623 | GPRIdx = MFI.CreateFixedObject(Size: GPRSaveSize, |
624 | SPOffset: -static_cast<int>(GPRSaveSize), IsImmutable: false); |
625 | if (GPRSaveSize & 15) |
626 | // The extra size here, if triggered, will always be 8. |
627 | MFI.CreateFixedObject(Size: 16 - (GPRSaveSize & 15), |
628 | SPOffset: -static_cast<int>(alignTo(Value: GPRSaveSize, Align: 16)), |
629 | IsImmutable: false); |
630 | } else |
631 | GPRIdx = MFI.CreateStackObject(Size: GPRSaveSize, Alignment: Align(8), isSpillSlot: false); |
632 | |
633 | auto FIN = MIRBuilder.buildFrameIndex(Res: p0, Idx: GPRIdx); |
634 | auto Offset = |
635 | MIRBuilder.buildConstant(Res: MRI.createGenericVirtualRegister(Ty: s64), Val: 8); |
636 | |
637 | for (unsigned i = FirstVariadicGPR; i < GPRArgRegs.size(); ++i) { |
638 | Register Val = MRI.createGenericVirtualRegister(Ty: s64); |
639 | Handler.assignValueToReg( |
640 | ValVReg: Val, PhysReg: GPRArgRegs[i], |
641 | VA: CCValAssign::getReg(ValNo: i + MF.getFunction().getNumOperands(), ValVT: MVT::i64, |
642 | Reg: GPRArgRegs[i], LocVT: MVT::i64, HTP: CCValAssign::Full)); |
643 | auto MPO = IsWin64CC ? MachinePointerInfo::getFixedStack( |
644 | MF, FI: GPRIdx, Offset: (i - FirstVariadicGPR) * 8) |
645 | : MachinePointerInfo::getStack(MF, Offset: i * 8); |
646 | MIRBuilder.buildStore(Val, Addr: FIN, PtrInfo: MPO, Alignment: inferAlignFromPtrInfo(MF, MPO)); |
647 | |
648 | FIN = MIRBuilder.buildPtrAdd(Res: MRI.createGenericVirtualRegister(Ty: p0), |
649 | Op0: FIN.getReg(Idx: 0), Op1: Offset); |
650 | } |
651 | } |
652 | FuncInfo->setVarArgsGPRIndex(GPRIdx); |
653 | FuncInfo->setVarArgsGPRSize(GPRSaveSize); |
654 | |
655 | if (Subtarget.hasFPARMv8() && !IsWin64CC) { |
656 | unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(Regs: FPRArgRegs); |
657 | |
658 | unsigned FPRSaveSize = 16 * (FPRArgRegs.size() - FirstVariadicFPR); |
659 | int FPRIdx = 0; |
660 | if (FPRSaveSize != 0) { |
661 | FPRIdx = MFI.CreateStackObject(Size: FPRSaveSize, Alignment: Align(16), isSpillSlot: false); |
662 | |
663 | auto FIN = MIRBuilder.buildFrameIndex(Res: p0, Idx: FPRIdx); |
664 | auto Offset = |
665 | MIRBuilder.buildConstant(Res: MRI.createGenericVirtualRegister(Ty: s64), Val: 16); |
666 | |
667 | for (unsigned i = FirstVariadicFPR; i < FPRArgRegs.size(); ++i) { |
668 | Register Val = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 128)); |
669 | Handler.assignValueToReg( |
670 | ValVReg: Val, PhysReg: FPRArgRegs[i], |
671 | VA: CCValAssign::getReg( |
672 | ValNo: i + MF.getFunction().getNumOperands() + NumVariadicGPRArgRegs, |
673 | ValVT: MVT::f128, Reg: FPRArgRegs[i], LocVT: MVT::f128, HTP: CCValAssign::Full)); |
674 | |
675 | auto MPO = MachinePointerInfo::getStack(MF, Offset: i * 16); |
676 | MIRBuilder.buildStore(Val, Addr: FIN, PtrInfo: MPO, Alignment: inferAlignFromPtrInfo(MF, MPO)); |
677 | |
678 | FIN = MIRBuilder.buildPtrAdd(Res: MRI.createGenericVirtualRegister(Ty: p0), |
679 | Op0: FIN.getReg(Idx: 0), Op1: Offset); |
680 | } |
681 | } |
682 | FuncInfo->setVarArgsFPRIndex(FPRIdx); |
683 | FuncInfo->setVarArgsFPRSize(FPRSaveSize); |
684 | } |
685 | } |
686 | |
687 | bool AArch64CallLowering::lowerFormalArguments( |
688 | MachineIRBuilder &MIRBuilder, const Function &F, |
689 | ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const { |
690 | MachineFunction &MF = MIRBuilder.getMF(); |
691 | MachineBasicBlock &MBB = MIRBuilder.getMBB(); |
692 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
693 | auto &DL = F.getDataLayout(); |
694 | auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
695 | |
696 | // Arm64EC has extra requirements for varargs calls which are only implemented |
697 | // in SelectionDAG; bail out for now. |
698 | if (F.isVarArg() && Subtarget.isWindowsArm64EC()) |
699 | return false; |
700 | |
701 | // Arm64EC thunks have a special calling convention which is only implemented |
702 | // in SelectionDAG; bail out for now. |
703 | if (F.getCallingConv() == CallingConv::ARM64EC_Thunk_Native || |
704 | F.getCallingConv() == CallingConv::ARM64EC_Thunk_X64) |
705 | return false; |
706 | |
707 | bool IsWin64 = |
708 | Subtarget.isCallingConvWin64(CC: F.getCallingConv(), IsVarArg: F.isVarArg()) && |
709 | !Subtarget.isWindowsArm64EC(); |
710 | |
711 | SmallVector<ArgInfo, 8> SplitArgs; |
712 | SmallVector<std::pair<Register, Register>> BoolArgs; |
713 | |
714 | // Insert the hidden sret parameter if the return value won't fit in the |
715 | // return registers. |
716 | if (!FLI.CanLowerReturn) |
717 | insertSRetIncomingArgument(F, SplitArgs, DemoteReg&: FLI.DemoteRegister, MRI, DL); |
718 | |
719 | unsigned i = 0; |
720 | for (auto &Arg : F.args()) { |
721 | if (DL.getTypeStoreSize(Ty: Arg.getType()).isZero()) |
722 | continue; |
723 | |
724 | ArgInfo OrigArg{VRegs[i], Arg, i}; |
725 | setArgFlags(Arg&: OrigArg, OpIdx: i + AttributeList::FirstArgIndex, DL, FuncInfo: F); |
726 | |
727 | // i1 arguments are zero-extended to i8 by the caller. Emit a |
728 | // hint to reflect this. |
729 | if (OrigArg.Ty->isIntegerTy(Bitwidth: 1)) { |
730 | assert(OrigArg.Regs.size() == 1 && |
731 | MRI.getType(OrigArg.Regs[0]).getSizeInBits() == 1 && |
732 | "Unexpected registers used for i1 arg" ); |
733 | |
734 | auto &Flags = OrigArg.Flags[0]; |
735 | if (!Flags.isZExt() && !Flags.isSExt()) { |
736 | // Lower i1 argument as i8, and insert AssertZExt + Trunc later. |
737 | Register OrigReg = OrigArg.Regs[0]; |
738 | Register WideReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 8)); |
739 | OrigArg.Regs[0] = WideReg; |
740 | BoolArgs.push_back(Elt: {OrigReg, WideReg}); |
741 | } |
742 | } |
743 | |
744 | if (Arg.hasAttribute(Kind: Attribute::SwiftAsync)) |
745 | MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); |
746 | |
747 | splitToValueTypes(OrigArgInfo: OrigArg, SplitArgs, DL, CallConv: F.getCallingConv()); |
748 | ++i; |
749 | } |
750 | |
751 | if (!MBB.empty()) |
752 | MIRBuilder.setInstr(*MBB.begin()); |
753 | |
754 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
755 | CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC: F.getCallingConv(), IsVarArg: IsWin64 && F.isVarArg()); |
756 | |
757 | AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn); |
758 | FormalArgHandler Handler(MIRBuilder, MRI); |
759 | SmallVector<CCValAssign, 16> ArgLocs; |
760 | CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); |
761 | if (!determineAssignments(Assigner, Args&: SplitArgs, CCInfo) || |
762 | !handleAssignments(Handler, Args&: SplitArgs, CCState&: CCInfo, ArgLocs, MIRBuilder)) |
763 | return false; |
764 | |
765 | if (!BoolArgs.empty()) { |
766 | for (auto &KV : BoolArgs) { |
767 | Register OrigReg = KV.first; |
768 | Register WideReg = KV.second; |
769 | LLT WideTy = MRI.getType(Reg: WideReg); |
770 | assert(MRI.getType(OrigReg).getScalarSizeInBits() == 1 && |
771 | "Unexpected bit size of a bool arg" ); |
772 | MIRBuilder.buildTrunc( |
773 | Res: OrigReg, Op: MIRBuilder.buildAssertZExt(Res: WideTy, Op: WideReg, Size: 1).getReg(Idx: 0)); |
774 | } |
775 | } |
776 | |
777 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
778 | uint64_t StackSize = Assigner.StackSize; |
779 | if (F.isVarArg()) { |
780 | if ((!Subtarget.isTargetDarwin() && !Subtarget.isWindowsArm64EC()) || IsWin64) { |
781 | // The AAPCS variadic function ABI is identical to the non-variadic |
782 | // one. As a result there may be more arguments in registers and we should |
783 | // save them for future reference. |
784 | // Win64 variadic functions also pass arguments in registers, but all |
785 | // float arguments are passed in integer registers. |
786 | saveVarArgRegisters(MIRBuilder, Handler, CCInfo); |
787 | } else if (Subtarget.isWindowsArm64EC()) { |
788 | return false; |
789 | } |
790 | |
791 | // We currently pass all varargs at 8-byte alignment, or 4 in ILP32. |
792 | StackSize = alignTo(Value: Assigner.StackSize, Align: Subtarget.isTargetILP32() ? 4 : 8); |
793 | |
794 | auto &MFI = MIRBuilder.getMF().getFrameInfo(); |
795 | FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(Size: 4, SPOffset: StackSize, IsImmutable: true)); |
796 | } |
797 | |
798 | if (doesCalleeRestoreStack(CallConv: F.getCallingConv(), |
799 | TailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt)) { |
800 | // We have a non-standard ABI, so why not make full use of the stack that |
801 | // we're going to pop? It must be aligned to 16 B in any case. |
802 | StackSize = alignTo(Value: StackSize, Align: 16); |
803 | |
804 | // If we're expected to restore the stack (e.g. fastcc), then we'll be |
805 | // adding a multiple of 16. |
806 | FuncInfo->setArgumentStackToRestore(StackSize); |
807 | |
808 | // Our own callers will guarantee that the space is free by giving an |
809 | // aligned value to CALLSEQ_START. |
810 | } |
811 | |
812 | // When we tail call, we need to check if the callee's arguments |
813 | // will fit on the caller's stack. So, whenever we lower formal arguments, |
814 | // we should keep track of this information, since we might lower a tail call |
815 | // in this function later. |
816 | FuncInfo->setBytesInStackArgArea(StackSize); |
817 | |
818 | if (Subtarget.hasCustomCallingConv()) |
819 | Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF); |
820 | |
821 | handleMustTailForwardedRegisters(MIRBuilder, AssignFn); |
822 | |
823 | // Move back to the end of the basic block. |
824 | MIRBuilder.setMBB(MBB); |
825 | |
826 | return true; |
827 | } |
828 | |
829 | /// Return true if the calling convention is one that we can guarantee TCO for. |
830 | static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { |
831 | return (CC == CallingConv::Fast && GuaranteeTailCalls) || |
832 | CC == CallingConv::Tail || CC == CallingConv::SwiftTail; |
833 | } |
834 | |
835 | /// Return true if we might ever do TCO for calls with this calling convention. |
836 | static bool mayTailCallThisCC(CallingConv::ID CC) { |
837 | switch (CC) { |
838 | case CallingConv::C: |
839 | case CallingConv::PreserveMost: |
840 | case CallingConv::PreserveAll: |
841 | case CallingConv::PreserveNone: |
842 | case CallingConv::Swift: |
843 | case CallingConv::SwiftTail: |
844 | case CallingConv::Tail: |
845 | case CallingConv::Fast: |
846 | return true; |
847 | default: |
848 | return false; |
849 | } |
850 | } |
851 | |
852 | /// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for |
853 | /// CC. |
854 | static std::pair<CCAssignFn *, CCAssignFn *> |
855 | getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) { |
856 | return {TLI.CCAssignFnForCall(CC, IsVarArg: false), TLI.CCAssignFnForCall(CC, IsVarArg: true)}; |
857 | } |
858 | |
859 | bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay( |
860 | CallLoweringInfo &Info, MachineFunction &MF, |
861 | SmallVectorImpl<ArgInfo> &InArgs) const { |
862 | const Function &CallerF = MF.getFunction(); |
863 | CallingConv::ID CalleeCC = Info.CallConv; |
864 | CallingConv::ID CallerCC = CallerF.getCallingConv(); |
865 | |
866 | // If the calling conventions match, then everything must be the same. |
867 | if (CalleeCC == CallerCC) |
868 | return true; |
869 | |
870 | // Check if the caller and callee will handle arguments in the same way. |
871 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
872 | CCAssignFn *CalleeAssignFnFixed; |
873 | CCAssignFn *CalleeAssignFnVarArg; |
874 | std::tie(args&: CalleeAssignFnFixed, args&: CalleeAssignFnVarArg) = |
875 | getAssignFnsForCC(CC: CalleeCC, TLI); |
876 | |
877 | CCAssignFn *CallerAssignFnFixed; |
878 | CCAssignFn *CallerAssignFnVarArg; |
879 | std::tie(args&: CallerAssignFnFixed, args&: CallerAssignFnVarArg) = |
880 | getAssignFnsForCC(CC: CallerCC, TLI); |
881 | |
882 | AArch64IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed, |
883 | CalleeAssignFnVarArg); |
884 | AArch64IncomingValueAssigner CallerAssigner(CallerAssignFnFixed, |
885 | CallerAssignFnVarArg); |
886 | |
887 | if (!resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner)) |
888 | return false; |
889 | |
890 | // Make sure that the caller and callee preserve all of the same registers. |
891 | auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); |
892 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); |
893 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); |
894 | if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) { |
895 | TRI->UpdateCustomCallPreservedMask(MF, Mask: &CallerPreserved); |
896 | TRI->UpdateCustomCallPreservedMask(MF, Mask: &CalleePreserved); |
897 | } |
898 | |
899 | return TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved); |
900 | } |
901 | |
902 | bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable( |
903 | CallLoweringInfo &Info, MachineFunction &MF, |
904 | SmallVectorImpl<ArgInfo> &OrigOutArgs) const { |
905 | // If there are no outgoing arguments, then we are done. |
906 | if (OrigOutArgs.empty()) |
907 | return true; |
908 | |
909 | const Function &CallerF = MF.getFunction(); |
910 | LLVMContext &Ctx = CallerF.getContext(); |
911 | CallingConv::ID CalleeCC = Info.CallConv; |
912 | CallingConv::ID CallerCC = CallerF.getCallingConv(); |
913 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
914 | const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
915 | |
916 | CCAssignFn *AssignFnFixed; |
917 | CCAssignFn *AssignFnVarArg; |
918 | std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) = getAssignFnsForCC(CC: CalleeCC, TLI); |
919 | |
920 | // We have outgoing arguments. Make sure that we can tail call with them. |
921 | SmallVector<CCValAssign, 16> OutLocs; |
922 | CCState OutInfo(CalleeCC, false, MF, OutLocs, Ctx); |
923 | |
924 | AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg, |
925 | Subtarget, /*IsReturn*/ false); |
926 | // determineAssignments() may modify argument flags, so make a copy. |
927 | SmallVector<ArgInfo, 8> OutArgs; |
928 | append_range(C&: OutArgs, R&: OrigOutArgs); |
929 | if (!determineAssignments(Assigner&: CalleeAssigner, Args&: OutArgs, CCInfo&: OutInfo)) { |
930 | LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n" ); |
931 | return false; |
932 | } |
933 | |
934 | // Make sure that they can fit on the caller's stack. |
935 | const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
936 | if (OutInfo.getStackSize() > FuncInfo->getBytesInStackArgArea()) { |
937 | LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n" ); |
938 | return false; |
939 | } |
940 | |
941 | // Verify that the parameters in callee-saved registers match. |
942 | // TODO: Port this over to CallLowering as general code once swiftself is |
943 | // supported. |
944 | auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); |
945 | const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC); |
946 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
947 | |
948 | if (Info.IsVarArg) { |
949 | // Be conservative and disallow variadic memory operands to match SDAG's |
950 | // behaviour. |
951 | // FIXME: If the caller's calling convention is C, then we can |
952 | // potentially use its argument area. However, for cases like fastcc, |
953 | // we can't do anything. |
954 | for (unsigned i = 0; i < OutLocs.size(); ++i) { |
955 | auto &ArgLoc = OutLocs[i]; |
956 | if (ArgLoc.isRegLoc()) |
957 | continue; |
958 | |
959 | LLVM_DEBUG( |
960 | dbgs() |
961 | << "... Cannot tail call vararg function with stack arguments\n" ); |
962 | return false; |
963 | } |
964 | } |
965 | |
966 | return parametersInCSRMatch(MRI, CallerPreservedMask, ArgLocs: OutLocs, OutVals: OutArgs); |
967 | } |
968 | |
969 | bool AArch64CallLowering::isEligibleForTailCallOptimization( |
970 | MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, |
971 | SmallVectorImpl<ArgInfo> &InArgs, |
972 | SmallVectorImpl<ArgInfo> &OutArgs) const { |
973 | |
974 | // Must pass all target-independent checks in order to tail call optimize. |
975 | if (!Info.IsTailCall) |
976 | return false; |
977 | |
978 | CallingConv::ID CalleeCC = Info.CallConv; |
979 | MachineFunction &MF = MIRBuilder.getMF(); |
980 | const Function &CallerF = MF.getFunction(); |
981 | |
982 | LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n" ); |
983 | |
984 | if (Info.SwiftErrorVReg) { |
985 | // TODO: We should handle this. |
986 | // Note that this is also handled by the check for no outgoing arguments. |
987 | // Proactively disabling this though, because the swifterror handling in |
988 | // lowerCall inserts a COPY *after* the location of the call. |
989 | LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n" ); |
990 | return false; |
991 | } |
992 | |
993 | if (!mayTailCallThisCC(CC: CalleeCC)) { |
994 | LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n" ); |
995 | return false; |
996 | } |
997 | |
998 | // Byval parameters hand the function a pointer directly into the stack area |
999 | // we want to reuse during a tail call. Working around this *is* possible (see |
1000 | // X86). |
1001 | // |
1002 | // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try |
1003 | // it? |
1004 | // |
1005 | // On Windows, "inreg" attributes signify non-aggregate indirect returns. |
1006 | // In this case, it is necessary to save/restore X0 in the callee. Tail |
1007 | // call opt interferes with this. So we disable tail call opt when the |
1008 | // caller has an argument with "inreg" attribute. |
1009 | // |
1010 | // FIXME: Check whether the callee also has an "inreg" argument. |
1011 | // |
1012 | // When the caller has a swifterror argument, we don't want to tail call |
1013 | // because would have to move into the swifterror register before the |
1014 | // tail call. |
1015 | if (any_of(Range: CallerF.args(), P: [](const Argument &A) { |
1016 | return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr(); |
1017 | })) { |
1018 | LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, " |
1019 | "inreg, or swifterror arguments\n" ); |
1020 | return false; |
1021 | } |
1022 | |
1023 | // Externally-defined functions with weak linkage should not be |
1024 | // tail-called on AArch64 when the OS does not support dynamic |
1025 | // pre-emption of symbols, as the AAELF spec requires normal calls |
1026 | // to undefined weak functions to be replaced with a NOP or jump to the |
1027 | // next instruction. The behaviour of branch instructions in this |
1028 | // situation (as used for tail calls) is implementation-defined, so we |
1029 | // cannot rely on the linker replacing the tail call with a return. |
1030 | if (Info.Callee.isGlobal()) { |
1031 | const GlobalValue *GV = Info.Callee.getGlobal(); |
1032 | const Triple &TT = MF.getTarget().getTargetTriple(); |
1033 | if (GV->hasExternalWeakLinkage() && |
1034 | (!TT.isOSWindows() || TT.isOSBinFormatELF() || |
1035 | TT.isOSBinFormatMachO())) { |
1036 | LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function " |
1037 | "with weak linkage for this OS.\n" ); |
1038 | return false; |
1039 | } |
1040 | } |
1041 | |
1042 | // If we have -tailcallopt, then we're done. |
1043 | if (canGuaranteeTCO(CC: CalleeCC, GuaranteeTailCalls: MF.getTarget().Options.GuaranteedTailCallOpt)) |
1044 | return CalleeCC == CallerF.getCallingConv(); |
1045 | |
1046 | // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall). |
1047 | // Try to find cases where we can do that. |
1048 | |
1049 | // I want anyone implementing a new calling convention to think long and hard |
1050 | // about this assert. |
1051 | assert((!Info.IsVarArg || CalleeCC == CallingConv::C) && |
1052 | "Unexpected variadic calling convention" ); |
1053 | |
1054 | // Verify that the incoming and outgoing arguments from the callee are |
1055 | // safe to tail call. |
1056 | if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) { |
1057 | LLVM_DEBUG( |
1058 | dbgs() |
1059 | << "... Caller and callee have incompatible calling conventions.\n" ); |
1060 | return false; |
1061 | } |
1062 | |
1063 | if (!areCalleeOutgoingArgsTailCallable(Info, MF, OrigOutArgs&: OutArgs)) |
1064 | return false; |
1065 | |
1066 | LLVM_DEBUG( |
1067 | dbgs() << "... Call is eligible for tail call optimization.\n" ); |
1068 | return true; |
1069 | } |
1070 | |
1071 | static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, |
1072 | bool IsTailCall, |
1073 | std::optional<CallLowering::PtrAuthInfo> &PAI, |
1074 | MachineRegisterInfo &MRI) { |
1075 | const AArch64FunctionInfo *FuncInfo = CallerF.getInfo<AArch64FunctionInfo>(); |
1076 | |
1077 | if (!IsTailCall) { |
1078 | if (!PAI) |
1079 | return IsIndirect ? getBLRCallOpcode(MF: CallerF) : (unsigned)AArch64::BL; |
1080 | |
1081 | assert(IsIndirect && "Direct call should not be authenticated" ); |
1082 | assert((PAI->Key == AArch64PACKey::IA || PAI->Key == AArch64PACKey::IB) && |
1083 | "Invalid auth call key" ); |
1084 | return AArch64::BLRA; |
1085 | } |
1086 | |
1087 | if (!IsIndirect) |
1088 | return AArch64::TCRETURNdi; |
1089 | |
1090 | // When BTI or PAuthLR are enabled, there are restrictions on using x16 and |
1091 | // x17 to hold the function pointer. |
1092 | if (FuncInfo->branchTargetEnforcement()) { |
1093 | if (FuncInfo->branchProtectionPAuthLR()) { |
1094 | assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR" ); |
1095 | return AArch64::TCRETURNrix17; |
1096 | } |
1097 | if (PAI) |
1098 | return AArch64::AUTH_TCRETURN_BTI; |
1099 | return AArch64::TCRETURNrix16x17; |
1100 | } |
1101 | |
1102 | if (FuncInfo->branchProtectionPAuthLR()) { |
1103 | assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR" ); |
1104 | return AArch64::TCRETURNrinotx16; |
1105 | } |
1106 | |
1107 | if (PAI) |
1108 | return AArch64::AUTH_TCRETURN; |
1109 | return AArch64::TCRETURNri; |
1110 | } |
1111 | |
1112 | static const uint32_t * |
1113 | getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> &OutArgs, |
1114 | AArch64CallLowering::CallLoweringInfo &Info, |
1115 | const AArch64RegisterInfo &TRI, MachineFunction &MF) { |
1116 | const uint32_t *Mask; |
1117 | if (!OutArgs.empty() && OutArgs[0].Flags[0].isReturned()) { |
1118 | // For 'this' returns, use the X0-preserving mask if applicable |
1119 | Mask = TRI.getThisReturnPreservedMask(MF, Info.CallConv); |
1120 | if (!Mask) { |
1121 | OutArgs[0].Flags[0].setReturned(false); |
1122 | Mask = TRI.getCallPreservedMask(MF, Info.CallConv); |
1123 | } |
1124 | } else { |
1125 | Mask = TRI.getCallPreservedMask(MF, Info.CallConv); |
1126 | } |
1127 | return Mask; |
1128 | } |
1129 | |
1130 | bool AArch64CallLowering::lowerTailCall( |
1131 | MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, |
1132 | SmallVectorImpl<ArgInfo> &OutArgs) const { |
1133 | MachineFunction &MF = MIRBuilder.getMF(); |
1134 | const Function &F = MF.getFunction(); |
1135 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
1136 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
1137 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
1138 | |
1139 | // True when we're tail calling, but without -tailcallopt. |
1140 | bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt && |
1141 | Info.CallConv != CallingConv::Tail && |
1142 | Info.CallConv != CallingConv::SwiftTail; |
1143 | |
1144 | // Find out which ABI gets to decide where things go. |
1145 | CallingConv::ID CalleeCC = Info.CallConv; |
1146 | CCAssignFn *AssignFnFixed; |
1147 | CCAssignFn *AssignFnVarArg; |
1148 | std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) = getAssignFnsForCC(CC: CalleeCC, TLI); |
1149 | |
1150 | MachineInstrBuilder CallSeqStart; |
1151 | if (!IsSibCall) |
1152 | CallSeqStart = MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKDOWN); |
1153 | |
1154 | unsigned Opc = getCallOpcode(CallerF: MF, IsIndirect: Info.Callee.isReg(), IsTailCall: true, PAI&: Info.PAI, MRI); |
1155 | auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: Opc); |
1156 | MIB.add(MO: Info.Callee); |
1157 | |
1158 | // Tell the call which registers are clobbered. |
1159 | const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
1160 | auto TRI = Subtarget.getRegisterInfo(); |
1161 | |
1162 | // Byte offset for the tail call. When we are sibcalling, this will always |
1163 | // be 0. |
1164 | MIB.addImm(Val: 0); |
1165 | |
1166 | // Authenticated tail calls always take key/discriminator arguments. |
1167 | if (Opc == AArch64::AUTH_TCRETURN || Opc == AArch64::AUTH_TCRETURN_BTI) { |
1168 | assert((Info.PAI->Key == AArch64PACKey::IA || |
1169 | Info.PAI->Key == AArch64PACKey::IB) && |
1170 | "Invalid auth call key" ); |
1171 | MIB.addImm(Val: Info.PAI->Key); |
1172 | |
1173 | Register AddrDisc = 0; |
1174 | uint16_t IntDisc = 0; |
1175 | std::tie(args&: IntDisc, args&: AddrDisc) = |
1176 | extractPtrauthBlendDiscriminators(Disc: Info.PAI->Discriminator, MRI); |
1177 | |
1178 | MIB.addImm(Val: IntDisc); |
1179 | MIB.addUse(RegNo: AddrDisc); |
1180 | if (AddrDisc != AArch64::NoRegister) { |
1181 | MIB->getOperand(i: 4).setReg(constrainOperandRegClass( |
1182 | MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(), |
1183 | RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB, II: MIB->getDesc(), |
1184 | RegMO&: MIB->getOperand(i: 4), OpIdx: 4)); |
1185 | } |
1186 | } |
1187 | |
1188 | // Tell the call which registers are clobbered. |
1189 | const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC); |
1190 | if (Subtarget.hasCustomCallingConv()) |
1191 | TRI->UpdateCustomCallPreservedMask(MF, Mask: &Mask); |
1192 | MIB.addRegMask(Mask); |
1193 | |
1194 | if (Info.CFIType) |
1195 | MIB->setCFIType(MF, Type: Info.CFIType->getZExtValue()); |
1196 | |
1197 | if (TRI->isAnyArgRegReserved(MF)) |
1198 | TRI->emitReservedArgRegCallError(MF); |
1199 | |
1200 | // FPDiff is the byte offset of the call's argument area from the callee's. |
1201 | // Stores to callee stack arguments will be placed in FixedStackSlots offset |
1202 | // by this amount for a tail call. In a sibling call it must be 0 because the |
1203 | // caller will deallocate the entire stack and the callee still expects its |
1204 | // arguments to begin at SP+0. |
1205 | int FPDiff = 0; |
1206 | |
1207 | // This will be 0 for sibcalls, potentially nonzero for tail calls produced |
1208 | // by -tailcallopt. For sibcalls, the memory operands for the call are |
1209 | // already available in the caller's incoming argument space. |
1210 | unsigned NumBytes = 0; |
1211 | if (!IsSibCall) { |
1212 | // We aren't sibcalling, so we need to compute FPDiff. We need to do this |
1213 | // before handling assignments, because FPDiff must be known for memory |
1214 | // arguments. |
1215 | unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); |
1216 | SmallVector<CCValAssign, 16> OutLocs; |
1217 | CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext()); |
1218 | |
1219 | AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg, |
1220 | Subtarget, /*IsReturn*/ false); |
1221 | if (!determineAssignments(Assigner&: CalleeAssigner, Args&: OutArgs, CCInfo&: OutInfo)) |
1222 | return false; |
1223 | |
1224 | // The callee will pop the argument stack as a tail call. Thus, we must |
1225 | // keep it 16-byte aligned. |
1226 | NumBytes = alignTo(Value: OutInfo.getStackSize(), Align: 16); |
1227 | |
1228 | // FPDiff will be negative if this tail call requires more space than we |
1229 | // would automatically have in our incoming argument space. Positive if we |
1230 | // actually shrink the stack. |
1231 | FPDiff = NumReusableBytes - NumBytes; |
1232 | |
1233 | // Update the required reserved area if this is the tail call requiring the |
1234 | // most argument stack space. |
1235 | if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff) |
1236 | FuncInfo->setTailCallReservedStack(-FPDiff); |
1237 | |
1238 | // The stack pointer must be 16-byte aligned at all times it's used for a |
1239 | // memory operation, which in practice means at *all* times and in |
1240 | // particular across call boundaries. Therefore our own arguments started at |
1241 | // a 16-byte aligned SP and the delta applied for the tail call should |
1242 | // satisfy the same constraint. |
1243 | assert(FPDiff % 16 == 0 && "unaligned stack on tail call" ); |
1244 | } |
1245 | |
1246 | const auto &Forwards = FuncInfo->getForwardedMustTailRegParms(); |
1247 | |
1248 | AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg, |
1249 | Subtarget, /*IsReturn*/ false); |
1250 | |
1251 | // Do the actual argument marshalling. |
1252 | OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, |
1253 | /*IsTailCall*/ true, FPDiff); |
1254 | if (!determineAndHandleAssignments(Handler, Assigner, Args&: OutArgs, MIRBuilder, |
1255 | CallConv: CalleeCC, IsVarArg: Info.IsVarArg)) |
1256 | return false; |
1257 | |
1258 | Mask = getMaskForArgs(OutArgs, Info, TRI: *TRI, MF); |
1259 | |
1260 | if (Info.IsVarArg && Info.IsMustTailCall) { |
1261 | // Now we know what's being passed to the function. Add uses to the call for |
1262 | // the forwarded registers that we *aren't* passing as parameters. This will |
1263 | // preserve the copies we build earlier. |
1264 | for (const auto &F : Forwards) { |
1265 | Register ForwardedReg = F.PReg; |
1266 | // If the register is already passed, or aliases a register which is |
1267 | // already being passed, then skip it. |
1268 | if (any_of(Range: MIB->uses(), P: [&ForwardedReg, &TRI](const MachineOperand &Use) { |
1269 | if (!Use.isReg()) |
1270 | return false; |
1271 | return TRI->regsOverlap(RegA: Use.getReg(), RegB: ForwardedReg); |
1272 | })) |
1273 | continue; |
1274 | |
1275 | // We aren't passing it already, so we should add it to the call. |
1276 | MIRBuilder.buildCopy(Res: ForwardedReg, Op: Register(F.VReg)); |
1277 | MIB.addReg(RegNo: ForwardedReg, flags: RegState::Implicit); |
1278 | } |
1279 | } |
1280 | |
1281 | // If we have -tailcallopt, we need to adjust the stack. We'll do the call |
1282 | // sequence start and end here. |
1283 | if (!IsSibCall) { |
1284 | MIB->getOperand(i: 1).setImm(FPDiff); |
1285 | CallSeqStart.addImm(Val: 0).addImm(Val: 0); |
1286 | // End the call sequence *before* emitting the call. Normally, we would |
1287 | // tidy the frame up after the call. However, here, we've laid out the |
1288 | // parameters so that when SP is reset, they will be in the correct |
1289 | // location. |
1290 | MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKUP).addImm(Val: 0).addImm(Val: 0); |
1291 | } |
1292 | |
1293 | // Now we can add the actual call instruction to the correct basic block. |
1294 | MIRBuilder.insertInstr(MIB); |
1295 | |
1296 | // If Callee is a reg, since it is used by a target specific instruction, |
1297 | // it must have a register class matching the constraint of that instruction. |
1298 | if (MIB->getOperand(i: 0).isReg()) |
1299 | constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(), |
1300 | RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB, |
1301 | II: MIB->getDesc(), RegMO&: MIB->getOperand(i: 0), OpIdx: 0); |
1302 | |
1303 | MF.getFrameInfo().setHasTailCall(); |
1304 | Info.LoweredTailCall = true; |
1305 | return true; |
1306 | } |
1307 | |
1308 | bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, |
1309 | CallLoweringInfo &Info) const { |
1310 | MachineFunction &MF = MIRBuilder.getMF(); |
1311 | const Function &F = MF.getFunction(); |
1312 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
1313 | auto &DL = F.getDataLayout(); |
1314 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
1315 | const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
1316 | |
1317 | // Arm64EC has extra requirements for varargs calls; bail out for now. |
1318 | // |
1319 | // Arm64EC has special mangling rules for calls; bail out on all calls for |
1320 | // now. |
1321 | if (Subtarget.isWindowsArm64EC()) |
1322 | return false; |
1323 | |
1324 | // Arm64EC thunks have a special calling convention which is only implemented |
1325 | // in SelectionDAG; bail out for now. |
1326 | if (Info.CallConv == CallingConv::ARM64EC_Thunk_Native || |
1327 | Info.CallConv == CallingConv::ARM64EC_Thunk_X64) |
1328 | return false; |
1329 | |
1330 | SmallVector<ArgInfo, 8> OutArgs; |
1331 | for (auto &OrigArg : Info.OrigArgs) { |
1332 | splitToValueTypes(OrigArgInfo: OrigArg, SplitArgs&: OutArgs, DL, CallConv: Info.CallConv); |
1333 | // AAPCS requires that we zero-extend i1 to 8 bits by the caller. |
1334 | auto &Flags = OrigArg.Flags[0]; |
1335 | if (OrigArg.Ty->isIntegerTy(Bitwidth: 1) && !Flags.isSExt() && !Flags.isZExt()) { |
1336 | ArgInfo &OutArg = OutArgs.back(); |
1337 | assert(OutArg.Regs.size() == 1 && |
1338 | MRI.getType(OutArg.Regs[0]).getSizeInBits() == 1 && |
1339 | "Unexpected registers used for i1 arg" ); |
1340 | |
1341 | // We cannot use a ZExt ArgInfo flag here, because it will |
1342 | // zero-extend the argument to i32 instead of just i8. |
1343 | OutArg.Regs[0] = |
1344 | MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 8), Op: OutArg.Regs[0]).getReg(Idx: 0); |
1345 | LLVMContext &Ctx = MF.getFunction().getContext(); |
1346 | OutArg.Ty = Type::getInt8Ty(C&: Ctx); |
1347 | } |
1348 | } |
1349 | |
1350 | SmallVector<ArgInfo, 8> InArgs; |
1351 | if (!Info.OrigRet.Ty->isVoidTy()) |
1352 | splitToValueTypes(OrigArgInfo: Info.OrigRet, SplitArgs&: InArgs, DL, CallConv: Info.CallConv); |
1353 | |
1354 | // If we can lower as a tail call, do that instead. |
1355 | bool CanTailCallOpt = |
1356 | isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs); |
1357 | |
1358 | // We must emit a tail call if we have musttail. |
1359 | if (Info.IsMustTailCall && !CanTailCallOpt) { |
1360 | // There are types of incoming/outgoing arguments we can't handle yet, so |
1361 | // it doesn't make sense to actually die here like in ISelLowering. Instead, |
1362 | // fall back to SelectionDAG and let it try to handle this. |
1363 | LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n" ); |
1364 | return false; |
1365 | } |
1366 | |
1367 | Info.IsTailCall = CanTailCallOpt; |
1368 | if (CanTailCallOpt) |
1369 | return lowerTailCall(MIRBuilder, Info, OutArgs); |
1370 | |
1371 | // Find out which ABI gets to decide where things go. |
1372 | CCAssignFn *AssignFnFixed; |
1373 | CCAssignFn *AssignFnVarArg; |
1374 | std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) = |
1375 | getAssignFnsForCC(CC: Info.CallConv, TLI); |
1376 | |
1377 | MachineInstrBuilder CallSeqStart; |
1378 | CallSeqStart = MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKDOWN); |
1379 | |
1380 | // Create a temporarily-floating call instruction so we can add the implicit |
1381 | // uses of arg registers. |
1382 | |
1383 | unsigned Opc = 0; |
1384 | // Calls with operand bundle "clang.arc.attachedcall" are special. They should |
1385 | // be expanded to the call, directly followed by a special marker sequence and |
1386 | // a call to an ObjC library function. |
1387 | if (Info.CB && objcarc::hasAttachedCallOpBundle(CB: Info.CB)) |
1388 | Opc = Info.PAI ? AArch64::BLRA_RVMARKER : AArch64::BLR_RVMARKER; |
1389 | // A call to a returns twice function like setjmp must be followed by a bti |
1390 | // instruction. |
1391 | else if (Info.CB && Info.CB->hasFnAttr(Kind: Attribute::ReturnsTwice) && |
1392 | !Subtarget.noBTIAtReturnTwice() && |
1393 | MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) |
1394 | Opc = AArch64::BLR_BTI; |
1395 | else { |
1396 | // For an intrinsic call (e.g. memset), use GOT if "RtLibUseGOT" (-fno-plt) |
1397 | // is set. |
1398 | if (Info.Callee.isSymbol() && F.getParent()->getRtLibUseGOT()) { |
1399 | auto MIB = MIRBuilder.buildInstr(Opcode: TargetOpcode::G_GLOBAL_VALUE); |
1400 | DstOp(getLLTForType(Ty&: *F.getType(), DL)).addDefToMIB(MRI, MIB); |
1401 | MIB.addExternalSymbol(FnName: Info.Callee.getSymbolName(), TargetFlags: AArch64II::MO_GOT); |
1402 | Info.Callee = MachineOperand::CreateReg(Reg: MIB.getReg(Idx: 0), isDef: false); |
1403 | } |
1404 | Opc = getCallOpcode(CallerF: MF, IsIndirect: Info.Callee.isReg(), IsTailCall: false, PAI&: Info.PAI, MRI); |
1405 | } |
1406 | |
1407 | auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: Opc); |
1408 | unsigned CalleeOpNo = 0; |
1409 | |
1410 | if (Opc == AArch64::BLR_RVMARKER || Opc == AArch64::BLRA_RVMARKER) { |
1411 | // Add a target global address for the retainRV/claimRV runtime function |
1412 | // just before the call target. |
1413 | Function *ARCFn = *objcarc::getAttachedARCFunction(CB: Info.CB); |
1414 | MIB.addGlobalAddress(GV: ARCFn); |
1415 | ++CalleeOpNo; |
1416 | |
1417 | // We may or may not need to emit both the marker and the retain/claim call. |
1418 | // Tell the pseudo expansion using an additional boolean op. |
1419 | MIB.addImm(Val: objcarc::attachedCallOpBundleNeedsMarker(CB: Info.CB)); |
1420 | ++CalleeOpNo; |
1421 | } else if (Info.CFIType) { |
1422 | MIB->setCFIType(MF, Type: Info.CFIType->getZExtValue()); |
1423 | } |
1424 | |
1425 | MIB.add(MO: Info.Callee); |
1426 | |
1427 | // Tell the call which registers are clobbered. |
1428 | const uint32_t *Mask; |
1429 | const auto *TRI = Subtarget.getRegisterInfo(); |
1430 | |
1431 | AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg, |
1432 | Subtarget, /*IsReturn*/ false); |
1433 | // Do the actual argument marshalling. |
1434 | OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, /*IsReturn*/ false); |
1435 | if (!determineAndHandleAssignments(Handler, Assigner, Args&: OutArgs, MIRBuilder, |
1436 | CallConv: Info.CallConv, IsVarArg: Info.IsVarArg)) |
1437 | return false; |
1438 | |
1439 | Mask = getMaskForArgs(OutArgs, Info, TRI: *TRI, MF); |
1440 | |
1441 | if (Opc == AArch64::BLRA || Opc == AArch64::BLRA_RVMARKER) { |
1442 | assert((Info.PAI->Key == AArch64PACKey::IA || |
1443 | Info.PAI->Key == AArch64PACKey::IB) && |
1444 | "Invalid auth call key" ); |
1445 | MIB.addImm(Val: Info.PAI->Key); |
1446 | |
1447 | Register AddrDisc = 0; |
1448 | uint16_t IntDisc = 0; |
1449 | std::tie(args&: IntDisc, args&: AddrDisc) = |
1450 | extractPtrauthBlendDiscriminators(Disc: Info.PAI->Discriminator, MRI); |
1451 | |
1452 | MIB.addImm(Val: IntDisc); |
1453 | MIB.addUse(RegNo: AddrDisc); |
1454 | if (AddrDisc != AArch64::NoRegister) { |
1455 | constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(), |
1456 | RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB, |
1457 | II: MIB->getDesc(), RegMO&: MIB->getOperand(i: CalleeOpNo + 3), |
1458 | OpIdx: CalleeOpNo + 3); |
1459 | } |
1460 | } |
1461 | |
1462 | // Tell the call which registers are clobbered. |
1463 | if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) |
1464 | TRI->UpdateCustomCallPreservedMask(MF, Mask: &Mask); |
1465 | MIB.addRegMask(Mask); |
1466 | |
1467 | if (TRI->isAnyArgRegReserved(MF)) |
1468 | TRI->emitReservedArgRegCallError(MF); |
1469 | |
1470 | // Now we can add the actual call instruction to the correct basic block. |
1471 | MIRBuilder.insertInstr(MIB); |
1472 | |
1473 | uint64_t CalleePopBytes = |
1474 | doesCalleeRestoreStack(CallConv: Info.CallConv, |
1475 | TailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt) |
1476 | ? alignTo(Value: Assigner.StackSize, Align: 16) |
1477 | : 0; |
1478 | |
1479 | CallSeqStart.addImm(Val: Assigner.StackSize).addImm(Val: 0); |
1480 | MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKUP) |
1481 | .addImm(Val: Assigner.StackSize) |
1482 | .addImm(Val: CalleePopBytes); |
1483 | |
1484 | // If Callee is a reg, since it is used by a target specific |
1485 | // instruction, it must have a register class matching the |
1486 | // constraint of that instruction. |
1487 | if (MIB->getOperand(i: CalleeOpNo).isReg()) |
1488 | constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *Subtarget.getInstrInfo(), |
1489 | RBI: *Subtarget.getRegBankInfo(), InsertPt&: *MIB, II: MIB->getDesc(), |
1490 | RegMO&: MIB->getOperand(i: CalleeOpNo), OpIdx: CalleeOpNo); |
1491 | |
1492 | // Finally we can copy the returned value back into its virtual-register. In |
1493 | // symmetry with the arguments, the physical register must be an |
1494 | // implicit-define of the call instruction. |
1495 | if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) { |
1496 | CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(CC: Info.CallConv); |
1497 | CallReturnHandler Handler(MIRBuilder, MRI, MIB); |
1498 | bool UsingReturnedArg = |
1499 | !OutArgs.empty() && OutArgs[0].Flags[0].isReturned(); |
1500 | |
1501 | AArch64OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn, Subtarget, |
1502 | /*IsReturn*/ false); |
1503 | ReturnedArgCallReturnHandler ReturnedArgHandler(MIRBuilder, MRI, MIB); |
1504 | if (!determineAndHandleAssignments( |
1505 | Handler&: UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, Args&: InArgs, |
1506 | MIRBuilder, CallConv: Info.CallConv, IsVarArg: Info.IsVarArg, |
1507 | ThisReturnRegs: UsingReturnedArg ? ArrayRef(OutArgs[0].Regs) |
1508 | : ArrayRef<Register>())) |
1509 | return false; |
1510 | } |
1511 | |
1512 | if (Info.SwiftErrorVReg) { |
1513 | MIB.addDef(RegNo: AArch64::X21, Flags: RegState::Implicit); |
1514 | MIRBuilder.buildCopy(Res: Info.SwiftErrorVReg, Op: Register(AArch64::X21)); |
1515 | } |
1516 | |
1517 | if (!Info.CanLowerReturn) { |
1518 | insertSRetLoads(MIRBuilder, RetTy: Info.OrigRet.Ty, VRegs: Info.OrigRet.Regs, |
1519 | DemoteReg: Info.DemoteRegister, FI: Info.DemoteStackIndex); |
1520 | } |
1521 | return true; |
1522 | } |
1523 | |
1524 | bool AArch64CallLowering::isTypeIsValidForThisReturn(EVT Ty) const { |
1525 | return Ty.getSizeInBits() == 64; |
1526 | } |
1527 | |