| 1 | //===--- AArch64CallLowering.cpp - Call lowering --------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// This file implements the lowering of LLVM calls to machine code calls for |
| 11 | /// GlobalISel. |
| 12 | /// |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "AArch64CallLowering.h" |
| 16 | #include "AArch64GlobalISelUtils.h" |
| 17 | #include "AArch64ISelLowering.h" |
| 18 | #include "AArch64MachineFunctionInfo.h" |
| 19 | #include "AArch64RegisterInfo.h" |
| 20 | #include "AArch64Subtarget.h" |
| 21 | #include "Utils/AArch64SMEAttributes.h" |
| 22 | #include "llvm/ADT/ArrayRef.h" |
| 23 | #include "llvm/ADT/SmallVector.h" |
| 24 | #include "llvm/Analysis/ObjCARCUtil.h" |
| 25 | #include "llvm/CodeGen/Analysis.h" |
| 26 | #include "llvm/CodeGen/CallingConvLower.h" |
| 27 | #include "llvm/CodeGen/FunctionLoweringInfo.h" |
| 28 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
| 29 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
| 30 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
| 31 | #include "llvm/CodeGen/LowLevelTypeUtils.h" |
| 32 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 33 | #include "llvm/CodeGen/MachineFrameInfo.h" |
| 34 | #include "llvm/CodeGen/MachineFunction.h" |
| 35 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 36 | #include "llvm/CodeGen/MachineMemOperand.h" |
| 37 | #include "llvm/CodeGen/MachineOperand.h" |
| 38 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 39 | #include "llvm/CodeGen/TargetOpcodes.h" |
| 40 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
| 41 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
| 42 | #include "llvm/CodeGen/ValueTypes.h" |
| 43 | #include "llvm/CodeGenTypes/MachineValueType.h" |
| 44 | #include "llvm/IR/Argument.h" |
| 45 | #include "llvm/IR/Attributes.h" |
| 46 | #include "llvm/IR/Function.h" |
| 47 | #include "llvm/IR/Type.h" |
| 48 | #include "llvm/IR/Value.h" |
| 49 | #include <algorithm> |
| 50 | #include <cassert> |
| 51 | #include <cstdint> |
| 52 | |
| 53 | #define DEBUG_TYPE "aarch64-call-lowering" |
| 54 | |
| 55 | using namespace llvm; |
| 56 | using namespace AArch64GISelUtils; |
| 57 | |
| 58 | extern cl::opt<bool> EnableSVEGISel; |
| 59 | |
| 60 | AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI) |
| 61 | : CallLowering(&TLI) {} |
| 62 | |
| 63 | static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT, |
| 64 | MVT &LocVT) { |
| 65 | // If ValVT is i1/i8/i16, we should set LocVT to i8/i8/i16. This is a legacy |
| 66 | // hack because the DAG calls the assignment function with pre-legalized |
| 67 | // register typed values, not the raw type. |
| 68 | // |
| 69 | // This hack is not applied to return values which are not passed on the |
| 70 | // stack. |
| 71 | if (OrigVT == MVT::i1 || OrigVT == MVT::i8) |
| 72 | ValVT = LocVT = MVT::i8; |
| 73 | else if (OrigVT == MVT::i16) |
| 74 | ValVT = LocVT = MVT::i16; |
| 75 | } |
| 76 | |
| 77 | // Account for i1/i8/i16 stack passed value hack |
| 78 | static LLT getStackValueStoreTypeHack(const CCValAssign &VA) { |
| 79 | const MVT ValVT = VA.getValVT(); |
| 80 | return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT) |
| 81 | : LLT(VA.getLocVT()); |
| 82 | } |
| 83 | |
| 84 | namespace { |
| 85 | |
| 86 | struct AArch64IncomingValueAssigner |
| 87 | : public CallLowering::IncomingValueAssigner { |
| 88 | AArch64IncomingValueAssigner(CCAssignFn *AssignFn_, |
| 89 | CCAssignFn *AssignFnVarArg_) |
| 90 | : IncomingValueAssigner(AssignFn_, AssignFnVarArg_) {} |
| 91 | |
| 92 | bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT, |
| 93 | CCValAssign::LocInfo LocInfo, |
| 94 | const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, |
| 95 | CCState &State) override { |
| 96 | applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT); |
| 97 | return IncomingValueAssigner::assignArg(ValNo, OrigVT, ValVT, LocVT, |
| 98 | LocInfo, Info, Flags, State); |
| 99 | } |
| 100 | }; |
| 101 | |
| 102 | struct AArch64OutgoingValueAssigner |
| 103 | : public CallLowering::OutgoingValueAssigner { |
| 104 | const AArch64Subtarget &Subtarget; |
| 105 | |
| 106 | /// Track if this is used for a return instead of function argument |
| 107 | /// passing. We apply a hack to i1/i8/i16 stack passed values, but do not use |
| 108 | /// stack passed returns for them and cannot apply the type adjustment. |
| 109 | bool IsReturn; |
| 110 | |
| 111 | AArch64OutgoingValueAssigner(CCAssignFn *AssignFn_, |
| 112 | CCAssignFn *AssignFnVarArg_, |
| 113 | const AArch64Subtarget &Subtarget_, |
| 114 | bool IsReturn) |
| 115 | : OutgoingValueAssigner(AssignFn_, AssignFnVarArg_), |
| 116 | Subtarget(Subtarget_), IsReturn(IsReturn) {} |
| 117 | |
| 118 | bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT, |
| 119 | CCValAssign::LocInfo LocInfo, |
| 120 | const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, |
| 121 | CCState &State) override { |
| 122 | const Function &F = State.getMachineFunction().getFunction(); |
| 123 | bool IsCalleeWin = |
| 124 | Subtarget.isCallingConvWin64(CC: State.getCallingConv(), IsVarArg: F.isVarArg()); |
| 125 | bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg(); |
| 126 | |
| 127 | bool Res; |
| 128 | if (Info.IsFixed && !UseVarArgsCCForFixed) { |
| 129 | if (!IsReturn) |
| 130 | applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT); |
| 131 | Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); |
| 132 | } else |
| 133 | Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State); |
| 134 | |
| 135 | StackSize = State.getStackSize(); |
| 136 | return Res; |
| 137 | } |
| 138 | }; |
| 139 | |
| 140 | struct IncomingArgHandler : public CallLowering::IncomingValueHandler { |
| 141 | IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) |
| 142 | : IncomingValueHandler(MIRBuilder, MRI) {} |
| 143 | |
| 144 | Register getStackAddress(uint64_t Size, int64_t Offset, |
| 145 | MachinePointerInfo &MPO, |
| 146 | ISD::ArgFlagsTy Flags) override { |
| 147 | auto &MFI = MIRBuilder.getMF().getFrameInfo(); |
| 148 | |
| 149 | // Byval is assumed to be writable memory, but other stack passed arguments |
| 150 | // are not. |
| 151 | const bool IsImmutable = !Flags.isByVal(); |
| 152 | |
| 153 | int FI = MFI.CreateFixedObject(Size, SPOffset: Offset, IsImmutable); |
| 154 | MPO = MachinePointerInfo::getFixedStack(MF&: MIRBuilder.getMF(), FI); |
| 155 | auto AddrReg = MIRBuilder.buildFrameIndex(Res: LLT::pointer(AddressSpace: 0, SizeInBits: 64), Idx: FI); |
| 156 | return AddrReg.getReg(Idx: 0); |
| 157 | } |
| 158 | |
| 159 | LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA, |
| 160 | ISD::ArgFlagsTy Flags) const override { |
| 161 | // For pointers, we just need to fixup the integer types reported in the |
| 162 | // CCValAssign. |
| 163 | if (Flags.isPointer()) |
| 164 | return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags); |
| 165 | return getStackValueStoreTypeHack(VA); |
| 166 | } |
| 167 | |
| 168 | void assignValueToReg(Register ValVReg, Register PhysReg, |
| 169 | const CCValAssign &VA) override { |
| 170 | markRegUsed(Reg: PhysReg); |
| 171 | IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA); |
| 172 | } |
| 173 | |
| 174 | void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, |
| 175 | const MachinePointerInfo &MPO, |
| 176 | const CCValAssign &VA) override { |
| 177 | MachineFunction &MF = MIRBuilder.getMF(); |
| 178 | |
| 179 | LLT ValTy(VA.getValVT()); |
| 180 | LLT LocTy(VA.getLocVT()); |
| 181 | |
| 182 | // Fixup the types for the DAG compatibility hack. |
| 183 | if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) |
| 184 | std::swap(a&: ValTy, b&: LocTy); |
| 185 | else { |
| 186 | // The calling code knows if this is a pointer or not, we're only touching |
| 187 | // the LocTy for the i8/i16 hack. |
| 188 | assert(LocTy.getSizeInBits() == MemTy.getSizeInBits()); |
| 189 | LocTy = MemTy; |
| 190 | } |
| 191 | |
| 192 | auto MMO = MF.getMachineMemOperand( |
| 193 | PtrInfo: MPO, f: MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, MemTy: LocTy, |
| 194 | base_alignment: inferAlignFromPtrInfo(MF, MPO)); |
| 195 | |
| 196 | switch (VA.getLocInfo()) { |
| 197 | case CCValAssign::LocInfo::ZExt: |
| 198 | MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_ZEXTLOAD, Res: ValVReg, Addr, MMO&: *MMO); |
| 199 | return; |
| 200 | case CCValAssign::LocInfo::SExt: |
| 201 | MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_SEXTLOAD, Res: ValVReg, Addr, MMO&: *MMO); |
| 202 | return; |
| 203 | default: |
| 204 | MIRBuilder.buildLoad(Res: ValVReg, Addr, MMO&: *MMO); |
| 205 | return; |
| 206 | } |
| 207 | } |
| 208 | |
| 209 | /// How the physical register gets marked varies between formal |
| 210 | /// parameters (it's a basic-block live-in), and a call instruction |
| 211 | /// (it's an implicit-def of the BL). |
| 212 | virtual void markRegUsed(Register Reg) = 0; |
| 213 | }; |
| 214 | |
| 215 | struct FormalArgHandler : public IncomingArgHandler { |
| 216 | FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) |
| 217 | : IncomingArgHandler(MIRBuilder, MRI) {} |
| 218 | |
| 219 | void markRegUsed(Register Reg) override { |
| 220 | MIRBuilder.getMRI()->addLiveIn(Reg: Reg.asMCReg()); |
| 221 | MIRBuilder.getMBB().addLiveIn(PhysReg: Reg.asMCReg()); |
| 222 | } |
| 223 | }; |
| 224 | |
| 225 | struct CallReturnHandler : public IncomingArgHandler { |
| 226 | CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, |
| 227 | MachineInstrBuilder MIB) |
| 228 | : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {} |
| 229 | |
| 230 | void markRegUsed(Register Reg) override { |
| 231 | MIB.addDef(RegNo: Reg, Flags: RegState::Implicit); |
| 232 | } |
| 233 | |
| 234 | MachineInstrBuilder MIB; |
| 235 | }; |
| 236 | |
| 237 | /// A special return arg handler for "returned" attribute arg calls. |
| 238 | struct ReturnedArgCallReturnHandler : public CallReturnHandler { |
| 239 | ReturnedArgCallReturnHandler(MachineIRBuilder &MIRBuilder, |
| 240 | MachineRegisterInfo &MRI, |
| 241 | MachineInstrBuilder MIB) |
| 242 | : CallReturnHandler(MIRBuilder, MRI, MIB) {} |
| 243 | |
| 244 | void markRegUsed(Register Reg) override {} |
| 245 | }; |
| 246 | |
| 247 | struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { |
| 248 | OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, |
| 249 | MachineInstrBuilder MIB, bool IsTailCall = false, |
| 250 | int FPDiff = 0) |
| 251 | : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), IsTailCall(IsTailCall), |
| 252 | FPDiff(FPDiff), |
| 253 | Subtarget(MIRBuilder.getMF().getSubtarget<AArch64Subtarget>()) {} |
| 254 | |
| 255 | Register getStackAddress(uint64_t Size, int64_t Offset, |
| 256 | MachinePointerInfo &MPO, |
| 257 | ISD::ArgFlagsTy Flags) override { |
| 258 | MachineFunction &MF = MIRBuilder.getMF(); |
| 259 | LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64); |
| 260 | LLT s64 = LLT::scalar(SizeInBits: 64); |
| 261 | |
| 262 | if (IsTailCall) { |
| 263 | assert(!Flags.isByVal() && "byval unhandled with tail calls" ); |
| 264 | |
| 265 | Offset += FPDiff; |
| 266 | int FI = MF.getFrameInfo().CreateFixedObject(Size, SPOffset: Offset, IsImmutable: true); |
| 267 | auto FIReg = MIRBuilder.buildFrameIndex(Res: p0, Idx: FI); |
| 268 | MPO = MachinePointerInfo::getFixedStack(MF, FI); |
| 269 | return FIReg.getReg(Idx: 0); |
| 270 | } |
| 271 | |
| 272 | if (!SPReg) |
| 273 | SPReg = MIRBuilder.buildCopy(Res: p0, Op: Register(AArch64::SP)).getReg(Idx: 0); |
| 274 | |
| 275 | auto OffsetReg = MIRBuilder.buildConstant(Res: s64, Val: Offset); |
| 276 | |
| 277 | auto AddrReg = MIRBuilder.buildPtrAdd(Res: p0, Op0: SPReg, Op1: OffsetReg); |
| 278 | |
| 279 | MPO = MachinePointerInfo::getStack(MF, Offset); |
| 280 | return AddrReg.getReg(Idx: 0); |
| 281 | } |
| 282 | |
| 283 | /// We need to fixup the reported store size for certain value types because |
| 284 | /// we invert the interpretation of ValVT and LocVT in certain cases. This is |
| 285 | /// for compatibility with the DAG call lowering implementation, which we're |
| 286 | /// currently building on top of. |
| 287 | LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA, |
| 288 | ISD::ArgFlagsTy Flags) const override { |
| 289 | if (Flags.isPointer()) |
| 290 | return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags); |
| 291 | return getStackValueStoreTypeHack(VA); |
| 292 | } |
| 293 | |
| 294 | void assignValueToReg(Register ValVReg, Register PhysReg, |
| 295 | const CCValAssign &VA) override { |
| 296 | MIB.addUse(RegNo: PhysReg, Flags: RegState::Implicit); |
| 297 | Register ExtReg = extendRegister(ValReg: ValVReg, VA); |
| 298 | MIRBuilder.buildCopy(Res: PhysReg, Op: ExtReg); |
| 299 | } |
| 300 | |
| 301 | /// Check whether a stack argument requires lowering in a tail call. |
| 302 | static bool shouldLowerTailCallStackArg(const MachineFunction &MF, |
| 303 | const CCValAssign &VA, |
| 304 | Register ValVReg, |
| 305 | Register StoreAddr) { |
| 306 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 307 | // Print the defining instruction for the value. |
| 308 | auto *DefMI = MRI.getVRegDef(Reg: ValVReg); |
| 309 | assert(DefMI && "No defining instruction" ); |
| 310 | for (;;) { |
| 311 | // Look through nodes that don't alter the bits of the incoming value. |
| 312 | unsigned Op = DefMI->getOpcode(); |
| 313 | if (Op == TargetOpcode::G_ZEXT || Op == TargetOpcode::G_ANYEXT || |
| 314 | Op == TargetOpcode::G_BITCAST || isAssertMI(MI: *DefMI)) { |
| 315 | DefMI = MRI.getVRegDef(Reg: DefMI->getOperand(i: 1).getReg()); |
| 316 | continue; |
| 317 | } |
| 318 | break; |
| 319 | } |
| 320 | |
| 321 | auto *Load = dyn_cast<GLoad>(Val: DefMI); |
| 322 | if (!Load) |
| 323 | return true; |
| 324 | Register LoadReg = Load->getPointerReg(); |
| 325 | auto *LoadAddrDef = MRI.getVRegDef(Reg: LoadReg); |
| 326 | if (LoadAddrDef->getOpcode() != TargetOpcode::G_FRAME_INDEX) |
| 327 | return true; |
| 328 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 329 | int LoadFI = LoadAddrDef->getOperand(i: 1).getIndex(); |
| 330 | |
| 331 | auto *StoreAddrDef = MRI.getVRegDef(Reg: StoreAddr); |
| 332 | if (StoreAddrDef->getOpcode() != TargetOpcode::G_FRAME_INDEX) |
| 333 | return true; |
| 334 | int StoreFI = StoreAddrDef->getOperand(i: 1).getIndex(); |
| 335 | |
| 336 | if (!MFI.isImmutableObjectIndex(ObjectIdx: LoadFI)) |
| 337 | return true; |
| 338 | if (MFI.getObjectOffset(ObjectIdx: LoadFI) != MFI.getObjectOffset(ObjectIdx: StoreFI)) |
| 339 | return true; |
| 340 | if (Load->getMemSize() != MFI.getObjectSize(ObjectIdx: StoreFI)) |
| 341 | return true; |
| 342 | |
| 343 | return false; |
| 344 | } |
| 345 | |
| 346 | void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, |
| 347 | const MachinePointerInfo &MPO, |
| 348 | const CCValAssign &VA) override { |
| 349 | MachineFunction &MF = MIRBuilder.getMF(); |
| 350 | if (!FPDiff && !shouldLowerTailCallStackArg(MF, VA, ValVReg, StoreAddr: Addr)) |
| 351 | return; |
| 352 | auto MMO = MF.getMachineMemOperand(PtrInfo: MPO, f: MachineMemOperand::MOStore, MemTy, |
| 353 | base_alignment: inferAlignFromPtrInfo(MF, MPO)); |
| 354 | MIRBuilder.buildStore(Val: ValVReg, Addr, MMO&: *MMO); |
| 355 | } |
| 356 | |
| 357 | void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex, |
| 358 | Register Addr, LLT MemTy, |
| 359 | const MachinePointerInfo &MPO, |
| 360 | const CCValAssign &VA) override { |
| 361 | unsigned MaxSize = MemTy.getSizeInBytes() * 8; |
| 362 | // For varargs, we always want to extend them to 8 bytes, in which case |
| 363 | // we disable setting a max. |
| 364 | if (!Arg.IsFixed) |
| 365 | MaxSize = 0; |
| 366 | |
| 367 | Register ValVReg = Arg.Regs[RegIndex]; |
| 368 | if (VA.getLocInfo() != CCValAssign::LocInfo::FPExt) { |
| 369 | MVT LocVT = VA.getLocVT(); |
| 370 | MVT ValVT = VA.getValVT(); |
| 371 | |
| 372 | if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) { |
| 373 | std::swap(a&: ValVT, b&: LocVT); |
| 374 | MemTy = LLT(VA.getValVT()); |
| 375 | } |
| 376 | |
| 377 | ValVReg = extendRegister(ValReg: ValVReg, VA, MaxSizeBits: MaxSize); |
| 378 | } else { |
| 379 | // The store does not cover the full allocated stack slot. |
| 380 | MemTy = LLT(VA.getValVT()); |
| 381 | } |
| 382 | |
| 383 | assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA); |
| 384 | } |
| 385 | |
| 386 | MachineInstrBuilder MIB; |
| 387 | |
| 388 | bool IsTailCall; |
| 389 | |
| 390 | /// For tail calls, the byte offset of the call's argument area from the |
| 391 | /// callee's. Unused elsewhere. |
| 392 | int FPDiff; |
| 393 | |
| 394 | // Cache the SP register vreg if we need it more than once in this call site. |
| 395 | Register SPReg; |
| 396 | |
| 397 | const AArch64Subtarget &Subtarget; |
| 398 | }; |
| 399 | } // namespace |
| 400 | |
| 401 | static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) { |
| 402 | return (CallConv == CallingConv::Fast && TailCallOpt) || |
| 403 | CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; |
| 404 | } |
| 405 | |
| 406 | bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, |
| 407 | const Value *Val, |
| 408 | ArrayRef<Register> VRegs, |
| 409 | FunctionLoweringInfo &FLI, |
| 410 | Register SwiftErrorVReg) const { |
| 411 | auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: AArch64::RET_ReallyLR); |
| 412 | assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) && |
| 413 | "Return value without a vreg" ); |
| 414 | |
| 415 | bool Success = true; |
| 416 | if (!FLI.CanLowerReturn) { |
| 417 | insertSRetStores(MIRBuilder, RetTy: Val->getType(), VRegs, DemoteReg: FLI.DemoteRegister); |
| 418 | } else if (!VRegs.empty()) { |
| 419 | MachineFunction &MF = MIRBuilder.getMF(); |
| 420 | const Function &F = MF.getFunction(); |
| 421 | const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
| 422 | |
| 423 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 424 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
| 425 | CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC: F.getCallingConv()); |
| 426 | auto &DL = F.getDataLayout(); |
| 427 | LLVMContext &Ctx = Val->getType()->getContext(); |
| 428 | |
| 429 | SmallVector<EVT, 4> SplitEVTs; |
| 430 | ComputeValueVTs(TLI, DL, Ty: Val->getType(), ValueVTs&: SplitEVTs); |
| 431 | assert(VRegs.size() == SplitEVTs.size() && |
| 432 | "For each split Type there should be exactly one VReg." ); |
| 433 | |
| 434 | SmallVector<ArgInfo, 8> SplitArgs; |
| 435 | CallingConv::ID CC = F.getCallingConv(); |
| 436 | |
| 437 | for (unsigned i = 0; i < SplitEVTs.size(); ++i) { |
| 438 | Register CurVReg = VRegs[i]; |
| 439 | ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Context&: Ctx), 0}; |
| 440 | setArgFlags(Arg&: CurArgInfo, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: F); |
| 441 | |
| 442 | // i1 is a special case because SDAG i1 true is naturally zero extended |
| 443 | // when widened using ANYEXT. We need to do it explicitly here. |
| 444 | auto &Flags = CurArgInfo.Flags[0]; |
| 445 | if (MRI.getType(Reg: CurVReg).getSizeInBits() == TypeSize::getFixed(ExactSize: 1) && |
| 446 | !Flags.isSExt() && !Flags.isZExt()) { |
| 447 | CurVReg = MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 8), Op: CurVReg).getReg(Idx: 0); |
| 448 | } else if (TLI.getNumRegistersForCallingConv(Context&: Ctx, CC, VT: SplitEVTs[i]) == |
| 449 | 1) { |
| 450 | // Some types will need extending as specified by the CC. |
| 451 | MVT NewVT = TLI.getRegisterTypeForCallingConv(Context&: Ctx, CC, VT: SplitEVTs[i]); |
| 452 | if (EVT(NewVT) != SplitEVTs[i]) { |
| 453 | unsigned ExtendOp = TargetOpcode::G_ANYEXT; |
| 454 | if (F.getAttributes().hasRetAttr(Kind: Attribute::SExt)) |
| 455 | ExtendOp = TargetOpcode::G_SEXT; |
| 456 | else if (F.getAttributes().hasRetAttr(Kind: Attribute::ZExt)) |
| 457 | ExtendOp = TargetOpcode::G_ZEXT; |
| 458 | |
| 459 | LLT NewLLT(NewVT); |
| 460 | LLT OldLLT = getLLTForType(Ty&: *CurArgInfo.Ty, DL); |
| 461 | CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Context&: Ctx); |
| 462 | // Instead of an extend, we might have a vector type which needs |
| 463 | // padding with more elements, e.g. <2 x half> -> <4 x half>. |
| 464 | if (NewVT.isVector()) { |
| 465 | if (OldLLT.isVector()) { |
| 466 | if (NewLLT.getNumElements() > OldLLT.getNumElements()) { |
| 467 | CurVReg = |
| 468 | MIRBuilder.buildPadVectorWithUndefElements(Res: NewLLT, Op0: CurVReg) |
| 469 | .getReg(Idx: 0); |
| 470 | } else { |
| 471 | // Just do a vector extend. |
| 472 | CurVReg = MIRBuilder.buildInstr(Opc: ExtendOp, DstOps: {NewLLT}, SrcOps: {CurVReg}) |
| 473 | .getReg(Idx: 0); |
| 474 | } |
| 475 | } else if (NewLLT.getNumElements() >= 2 && |
| 476 | NewLLT.getNumElements() <= 8) { |
| 477 | // We need to pad a <1 x S> type to <2/4/8 x S>. Since we don't |
| 478 | // have <1 x S> vector types in GISel we use a build_vector |
| 479 | // instead of a vector merge/concat. |
| 480 | CurVReg = |
| 481 | MIRBuilder.buildPadVectorWithUndefElements(Res: NewLLT, Op0: CurVReg) |
| 482 | .getReg(Idx: 0); |
| 483 | } else { |
| 484 | LLVM_DEBUG(dbgs() << "Could not handle ret ty\n" ); |
| 485 | return false; |
| 486 | } |
| 487 | } else { |
| 488 | // If the split EVT was a <1 x T> vector, and NewVT is T, then we |
| 489 | // don't have to do anything since we don't distinguish between the |
| 490 | // two. |
| 491 | if (NewLLT != MRI.getType(Reg: CurVReg)) { |
| 492 | // A scalar extend. |
| 493 | CurVReg = MIRBuilder.buildInstr(Opc: ExtendOp, DstOps: {NewLLT}, SrcOps: {CurVReg}) |
| 494 | .getReg(Idx: 0); |
| 495 | } |
| 496 | } |
| 497 | } |
| 498 | } |
| 499 | if (CurVReg != CurArgInfo.Regs[0]) { |
| 500 | CurArgInfo.Regs[0] = CurVReg; |
| 501 | // Reset the arg flags after modifying CurVReg. |
| 502 | setArgFlags(Arg&: CurArgInfo, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: F); |
| 503 | } |
| 504 | splitToValueTypes(OrigArgInfo: CurArgInfo, SplitArgs, DL, CallConv: CC); |
| 505 | } |
| 506 | |
| 507 | AArch64OutgoingValueAssigner Assigner(AssignFn, AssignFn, Subtarget, |
| 508 | /*IsReturn*/ true); |
| 509 | OutgoingArgHandler Handler(MIRBuilder, MRI, MIB); |
| 510 | Success = determineAndHandleAssignments(Handler, Assigner, Args&: SplitArgs, |
| 511 | MIRBuilder, CallConv: CC, IsVarArg: F.isVarArg()); |
| 512 | } |
| 513 | |
| 514 | if (SwiftErrorVReg) { |
| 515 | MIB.addUse(RegNo: AArch64::X21, Flags: RegState::Implicit); |
| 516 | MIRBuilder.buildCopy(Res: AArch64::X21, Op: SwiftErrorVReg); |
| 517 | } |
| 518 | |
| 519 | MIRBuilder.insertInstr(MIB); |
| 520 | return Success; |
| 521 | } |
| 522 | |
| 523 | bool AArch64CallLowering::canLowerReturn(MachineFunction &MF, |
| 524 | CallingConv::ID CallConv, |
| 525 | SmallVectorImpl<BaseArgInfo> &Outs, |
| 526 | bool IsVarArg) const { |
| 527 | SmallVector<CCValAssign, 16> ArgLocs; |
| 528 | const auto &TLI = *getTLI<AArch64TargetLowering>(); |
| 529 | CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, |
| 530 | MF.getFunction().getContext()); |
| 531 | |
| 532 | return checkReturn(CCInfo, Outs, Fn: TLI.CCAssignFnForReturn(CC: CallConv)); |
| 533 | } |
| 534 | |
| 535 | /// Helper function to compute forwarded registers for musttail calls. Computes |
| 536 | /// the forwarded registers, sets MBB liveness, and emits COPY instructions that |
| 537 | /// can be used to save + restore registers later. |
| 538 | static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder, |
| 539 | CCAssignFn *AssignFn) { |
| 540 | MachineBasicBlock &MBB = MIRBuilder.getMBB(); |
| 541 | MachineFunction &MF = MIRBuilder.getMF(); |
| 542 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 543 | |
| 544 | if (!MFI.hasMustTailInVarArgFunc()) |
| 545 | return; |
| 546 | |
| 547 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
| 548 | const Function &F = MF.getFunction(); |
| 549 | assert(F.isVarArg() && "Expected F to be vararg?" ); |
| 550 | |
| 551 | // Compute the set of forwarded registers. The rest are scratch. |
| 552 | SmallVector<CCValAssign, 16> ArgLocs; |
| 553 | CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs, |
| 554 | F.getContext()); |
| 555 | SmallVector<MVT, 2> RegParmTypes; |
| 556 | RegParmTypes.push_back(Elt: MVT::i64); |
| 557 | RegParmTypes.push_back(Elt: MVT::f128); |
| 558 | |
| 559 | // Later on, we can use this vector to restore the registers if necessary. |
| 560 | SmallVectorImpl<ForwardedRegister> &Forwards = |
| 561 | FuncInfo->getForwardedMustTailRegParms(); |
| 562 | CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, Fn: AssignFn); |
| 563 | |
| 564 | // Conservatively forward X8, since it might be used for an aggregate |
| 565 | // return. |
| 566 | if (!CCInfo.isAllocated(Reg: AArch64::X8)) { |
| 567 | Register X8VReg = MF.addLiveIn(PReg: AArch64::X8, RC: &AArch64::GPR64RegClass); |
| 568 | Forwards.push_back(Elt: ForwardedRegister(X8VReg, AArch64::X8, MVT::i64)); |
| 569 | } |
| 570 | |
| 571 | // Add the forwards to the MachineBasicBlock and MachineFunction. |
| 572 | for (const auto &F : Forwards) { |
| 573 | MBB.addLiveIn(PhysReg: F.PReg); |
| 574 | MIRBuilder.buildCopy(Res: Register(F.VReg), Op: Register(F.PReg)); |
| 575 | } |
| 576 | } |
| 577 | |
| 578 | bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const { |
| 579 | auto &F = MF.getFunction(); |
| 580 | if (!EnableSVEGISel && (F.getReturnType()->isScalableTy() || |
| 581 | llvm::any_of(Range: F.args(), P: [](const Argument &A) { |
| 582 | return A.getType()->isScalableTy(); |
| 583 | }))) |
| 584 | return true; |
| 585 | const auto &ST = MF.getSubtarget<AArch64Subtarget>(); |
| 586 | if (!ST.hasNEON() || !ST.hasFPARMv8()) { |
| 587 | LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n" ); |
| 588 | return true; |
| 589 | } |
| 590 | |
| 591 | SMEAttrs Attrs = MF.getInfo<AArch64FunctionInfo>()->getSMEFnAttrs(); |
| 592 | if (Attrs.hasZAState() || Attrs.hasZT0State() || |
| 593 | Attrs.hasStreamingInterfaceOrBody() || |
| 594 | Attrs.hasStreamingCompatibleInterface()) |
| 595 | return true; |
| 596 | |
| 597 | return false; |
| 598 | } |
| 599 | |
| 600 | void AArch64CallLowering::saveVarArgRegisters( |
| 601 | MachineIRBuilder &MIRBuilder, CallLowering::IncomingValueHandler &Handler, |
| 602 | CCState &CCInfo) const { |
| 603 | auto GPRArgRegs = AArch64::getGPRArgRegs(); |
| 604 | auto FPRArgRegs = AArch64::getFPRArgRegs(); |
| 605 | |
| 606 | MachineFunction &MF = MIRBuilder.getMF(); |
| 607 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 608 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 609 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
| 610 | auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
| 611 | bool IsWin64CC = Subtarget.isCallingConvWin64(CC: CCInfo.getCallingConv(), |
| 612 | IsVarArg: MF.getFunction().isVarArg()); |
| 613 | const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64); |
| 614 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
| 615 | |
| 616 | unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(Regs: GPRArgRegs); |
| 617 | unsigned NumVariadicGPRArgRegs = GPRArgRegs.size() - FirstVariadicGPR + 1; |
| 618 | |
| 619 | unsigned GPRSaveSize = 8 * (GPRArgRegs.size() - FirstVariadicGPR); |
| 620 | int GPRIdx = 0; |
| 621 | if (GPRSaveSize != 0) { |
| 622 | if (IsWin64CC) { |
| 623 | GPRIdx = MFI.CreateFixedObject(Size: GPRSaveSize, |
| 624 | SPOffset: -static_cast<int>(GPRSaveSize), IsImmutable: false); |
| 625 | if (GPRSaveSize & 15) |
| 626 | // The extra size here, if triggered, will always be 8. |
| 627 | MFI.CreateFixedObject(Size: 16 - (GPRSaveSize & 15), |
| 628 | SPOffset: -static_cast<int>(alignTo(Value: GPRSaveSize, Align: 16)), |
| 629 | IsImmutable: false); |
| 630 | } else |
| 631 | GPRIdx = MFI.CreateStackObject(Size: GPRSaveSize, Alignment: Align(8), isSpillSlot: false); |
| 632 | |
| 633 | auto FIN = MIRBuilder.buildFrameIndex(Res: p0, Idx: GPRIdx); |
| 634 | auto Offset = |
| 635 | MIRBuilder.buildConstant(Res: MRI.createGenericVirtualRegister(Ty: s64), Val: 8); |
| 636 | |
| 637 | for (unsigned i = FirstVariadicGPR; i < GPRArgRegs.size(); ++i) { |
| 638 | Register Val = MRI.createGenericVirtualRegister(Ty: s64); |
| 639 | Handler.assignValueToReg( |
| 640 | ValVReg: Val, PhysReg: GPRArgRegs[i], |
| 641 | VA: CCValAssign::getReg(ValNo: i + MF.getFunction().getNumOperands(), ValVT: MVT::i64, |
| 642 | Reg: GPRArgRegs[i], LocVT: MVT::i64, HTP: CCValAssign::Full)); |
| 643 | auto MPO = IsWin64CC ? MachinePointerInfo::getFixedStack( |
| 644 | MF, FI: GPRIdx, Offset: (i - FirstVariadicGPR) * 8) |
| 645 | : MachinePointerInfo::getStack(MF, Offset: i * 8); |
| 646 | MIRBuilder.buildStore(Val, Addr: FIN, PtrInfo: MPO, Alignment: inferAlignFromPtrInfo(MF, MPO)); |
| 647 | |
| 648 | FIN = MIRBuilder.buildPtrAdd(Res: MRI.createGenericVirtualRegister(Ty: p0), |
| 649 | Op0: FIN.getReg(Idx: 0), Op1: Offset); |
| 650 | } |
| 651 | } |
| 652 | FuncInfo->setVarArgsGPRIndex(GPRIdx); |
| 653 | FuncInfo->setVarArgsGPRSize(GPRSaveSize); |
| 654 | |
| 655 | if (Subtarget.hasFPARMv8() && !IsWin64CC) { |
| 656 | unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(Regs: FPRArgRegs); |
| 657 | |
| 658 | unsigned FPRSaveSize = 16 * (FPRArgRegs.size() - FirstVariadicFPR); |
| 659 | int FPRIdx = 0; |
| 660 | if (FPRSaveSize != 0) { |
| 661 | FPRIdx = MFI.CreateStackObject(Size: FPRSaveSize, Alignment: Align(16), isSpillSlot: false); |
| 662 | |
| 663 | auto FIN = MIRBuilder.buildFrameIndex(Res: p0, Idx: FPRIdx); |
| 664 | auto Offset = |
| 665 | MIRBuilder.buildConstant(Res: MRI.createGenericVirtualRegister(Ty: s64), Val: 16); |
| 666 | |
| 667 | for (unsigned i = FirstVariadicFPR; i < FPRArgRegs.size(); ++i) { |
| 668 | Register Val = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 128)); |
| 669 | Handler.assignValueToReg( |
| 670 | ValVReg: Val, PhysReg: FPRArgRegs[i], |
| 671 | VA: CCValAssign::getReg( |
| 672 | ValNo: i + MF.getFunction().getNumOperands() + NumVariadicGPRArgRegs, |
| 673 | ValVT: MVT::f128, Reg: FPRArgRegs[i], LocVT: MVT::f128, HTP: CCValAssign::Full)); |
| 674 | |
| 675 | auto MPO = MachinePointerInfo::getStack(MF, Offset: i * 16); |
| 676 | MIRBuilder.buildStore(Val, Addr: FIN, PtrInfo: MPO, Alignment: inferAlignFromPtrInfo(MF, MPO)); |
| 677 | |
| 678 | FIN = MIRBuilder.buildPtrAdd(Res: MRI.createGenericVirtualRegister(Ty: p0), |
| 679 | Op0: FIN.getReg(Idx: 0), Op1: Offset); |
| 680 | } |
| 681 | } |
| 682 | FuncInfo->setVarArgsFPRIndex(FPRIdx); |
| 683 | FuncInfo->setVarArgsFPRSize(FPRSaveSize); |
| 684 | } |
| 685 | } |
| 686 | |
| 687 | bool AArch64CallLowering::lowerFormalArguments( |
| 688 | MachineIRBuilder &MIRBuilder, const Function &F, |
| 689 | ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const { |
| 690 | MachineFunction &MF = MIRBuilder.getMF(); |
| 691 | MachineBasicBlock &MBB = MIRBuilder.getMBB(); |
| 692 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 693 | auto &DL = F.getDataLayout(); |
| 694 | auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
| 695 | |
| 696 | // Arm64EC has extra requirements for varargs calls which are only implemented |
| 697 | // in SelectionDAG; bail out for now. |
| 698 | if (F.isVarArg() && Subtarget.isWindowsArm64EC()) |
| 699 | return false; |
| 700 | |
| 701 | // Arm64EC thunks have a special calling convention which is only implemented |
| 702 | // in SelectionDAG; bail out for now. |
| 703 | if (F.getCallingConv() == CallingConv::ARM64EC_Thunk_Native || |
| 704 | F.getCallingConv() == CallingConv::ARM64EC_Thunk_X64) |
| 705 | return false; |
| 706 | |
| 707 | bool IsWin64 = |
| 708 | Subtarget.isCallingConvWin64(CC: F.getCallingConv(), IsVarArg: F.isVarArg()) && |
| 709 | !Subtarget.isWindowsArm64EC(); |
| 710 | |
| 711 | SmallVector<ArgInfo, 8> SplitArgs; |
| 712 | SmallVector<std::pair<Register, Register>> BoolArgs; |
| 713 | |
| 714 | // Insert the hidden sret parameter if the return value won't fit in the |
| 715 | // return registers. |
| 716 | if (!FLI.CanLowerReturn) |
| 717 | insertSRetIncomingArgument(F, SplitArgs, DemoteReg&: FLI.DemoteRegister, MRI, DL); |
| 718 | |
| 719 | unsigned i = 0; |
| 720 | for (auto &Arg : F.args()) { |
| 721 | if (DL.getTypeStoreSize(Ty: Arg.getType()).isZero()) |
| 722 | continue; |
| 723 | |
| 724 | ArgInfo OrigArg{VRegs[i], Arg, i}; |
| 725 | setArgFlags(Arg&: OrigArg, OpIdx: i + AttributeList::FirstArgIndex, DL, FuncInfo: F); |
| 726 | |
| 727 | // i1 arguments are zero-extended to i8 by the caller. Emit a |
| 728 | // hint to reflect this. |
| 729 | if (OrigArg.Ty->isIntegerTy(Bitwidth: 1)) { |
| 730 | assert(OrigArg.Regs.size() == 1 && |
| 731 | MRI.getType(OrigArg.Regs[0]).getSizeInBits() == 1 && |
| 732 | "Unexpected registers used for i1 arg" ); |
| 733 | |
| 734 | auto &Flags = OrigArg.Flags[0]; |
| 735 | if (!Flags.isZExt() && !Flags.isSExt()) { |
| 736 | // Lower i1 argument as i8, and insert AssertZExt + Trunc later. |
| 737 | Register OrigReg = OrigArg.Regs[0]; |
| 738 | Register WideReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 8)); |
| 739 | OrigArg.Regs[0] = WideReg; |
| 740 | BoolArgs.push_back(Elt: {OrigReg, WideReg}); |
| 741 | } |
| 742 | } |
| 743 | |
| 744 | if (Arg.hasAttribute(Kind: Attribute::SwiftAsync)) |
| 745 | MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); |
| 746 | |
| 747 | splitToValueTypes(OrigArgInfo: OrigArg, SplitArgs, DL, CallConv: F.getCallingConv()); |
| 748 | ++i; |
| 749 | } |
| 750 | |
| 751 | if (!MBB.empty()) |
| 752 | MIRBuilder.setInstr(*MBB.begin()); |
| 753 | |
| 754 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
| 755 | CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC: F.getCallingConv(), IsVarArg: IsWin64 && F.isVarArg()); |
| 756 | |
| 757 | AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn); |
| 758 | FormalArgHandler Handler(MIRBuilder, MRI); |
| 759 | SmallVector<CCValAssign, 16> ArgLocs; |
| 760 | CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); |
| 761 | if (!determineAssignments(Assigner, Args&: SplitArgs, CCInfo) || |
| 762 | !handleAssignments(Handler, Args&: SplitArgs, CCState&: CCInfo, ArgLocs, MIRBuilder)) |
| 763 | return false; |
| 764 | |
| 765 | if (!BoolArgs.empty()) { |
| 766 | for (auto &KV : BoolArgs) { |
| 767 | Register OrigReg = KV.first; |
| 768 | Register WideReg = KV.second; |
| 769 | LLT WideTy = MRI.getType(Reg: WideReg); |
| 770 | assert(MRI.getType(OrigReg).getScalarSizeInBits() == 1 && |
| 771 | "Unexpected bit size of a bool arg" ); |
| 772 | MIRBuilder.buildTrunc( |
| 773 | Res: OrigReg, Op: MIRBuilder.buildAssertZExt(Res: WideTy, Op: WideReg, Size: 1).getReg(Idx: 0)); |
| 774 | } |
| 775 | } |
| 776 | |
| 777 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
| 778 | uint64_t StackSize = Assigner.StackSize; |
| 779 | if (F.isVarArg()) { |
| 780 | if ((!Subtarget.isTargetDarwin() && !Subtarget.isWindowsArm64EC()) || IsWin64) { |
| 781 | // The AAPCS variadic function ABI is identical to the non-variadic |
| 782 | // one. As a result there may be more arguments in registers and we should |
| 783 | // save them for future reference. |
| 784 | // Win64 variadic functions also pass arguments in registers, but all |
| 785 | // float arguments are passed in integer registers. |
| 786 | saveVarArgRegisters(MIRBuilder, Handler, CCInfo); |
| 787 | } else if (Subtarget.isWindowsArm64EC()) { |
| 788 | return false; |
| 789 | } |
| 790 | |
| 791 | // We currently pass all varargs at 8-byte alignment, or 4 in ILP32. |
| 792 | StackSize = alignTo(Value: Assigner.StackSize, Align: Subtarget.isTargetILP32() ? 4 : 8); |
| 793 | |
| 794 | auto &MFI = MIRBuilder.getMF().getFrameInfo(); |
| 795 | FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(Size: 4, SPOffset: StackSize, IsImmutable: true)); |
| 796 | } |
| 797 | |
| 798 | if (doesCalleeRestoreStack(CallConv: F.getCallingConv(), |
| 799 | TailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt)) { |
| 800 | // We have a non-standard ABI, so why not make full use of the stack that |
| 801 | // we're going to pop? It must be aligned to 16 B in any case. |
| 802 | StackSize = alignTo(Value: StackSize, Align: 16); |
| 803 | |
| 804 | // If we're expected to restore the stack (e.g. fastcc), then we'll be |
| 805 | // adding a multiple of 16. |
| 806 | FuncInfo->setArgumentStackToRestore(StackSize); |
| 807 | |
| 808 | // Our own callers will guarantee that the space is free by giving an |
| 809 | // aligned value to CALLSEQ_START. |
| 810 | } |
| 811 | |
| 812 | // When we tail call, we need to check if the callee's arguments |
| 813 | // will fit on the caller's stack. So, whenever we lower formal arguments, |
| 814 | // we should keep track of this information, since we might lower a tail call |
| 815 | // in this function later. |
| 816 | FuncInfo->setBytesInStackArgArea(StackSize); |
| 817 | |
| 818 | if (Subtarget.hasCustomCallingConv()) |
| 819 | Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF); |
| 820 | |
| 821 | handleMustTailForwardedRegisters(MIRBuilder, AssignFn); |
| 822 | |
| 823 | // Move back to the end of the basic block. |
| 824 | MIRBuilder.setMBB(MBB); |
| 825 | |
| 826 | return true; |
| 827 | } |
| 828 | |
| 829 | /// Return true if the calling convention is one that we can guarantee TCO for. |
| 830 | static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { |
| 831 | return (CC == CallingConv::Fast && GuaranteeTailCalls) || |
| 832 | CC == CallingConv::Tail || CC == CallingConv::SwiftTail; |
| 833 | } |
| 834 | |
| 835 | /// Return true if we might ever do TCO for calls with this calling convention. |
| 836 | static bool mayTailCallThisCC(CallingConv::ID CC) { |
| 837 | switch (CC) { |
| 838 | case CallingConv::C: |
| 839 | case CallingConv::PreserveMost: |
| 840 | case CallingConv::PreserveAll: |
| 841 | case CallingConv::PreserveNone: |
| 842 | case CallingConv::Swift: |
| 843 | case CallingConv::SwiftTail: |
| 844 | case CallingConv::Tail: |
| 845 | case CallingConv::Fast: |
| 846 | return true; |
| 847 | default: |
| 848 | return false; |
| 849 | } |
| 850 | } |
| 851 | |
| 852 | /// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for |
| 853 | /// CC. |
| 854 | static std::pair<CCAssignFn *, CCAssignFn *> |
| 855 | getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) { |
| 856 | return {TLI.CCAssignFnForCall(CC, IsVarArg: false), TLI.CCAssignFnForCall(CC, IsVarArg: true)}; |
| 857 | } |
| 858 | |
| 859 | bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay( |
| 860 | CallLoweringInfo &Info, MachineFunction &MF, |
| 861 | SmallVectorImpl<ArgInfo> &InArgs) const { |
| 862 | const Function &CallerF = MF.getFunction(); |
| 863 | CallingConv::ID CalleeCC = Info.CallConv; |
| 864 | CallingConv::ID CallerCC = CallerF.getCallingConv(); |
| 865 | |
| 866 | // If the calling conventions match, then everything must be the same. |
| 867 | if (CalleeCC == CallerCC) |
| 868 | return true; |
| 869 | |
| 870 | // Check if the caller and callee will handle arguments in the same way. |
| 871 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
| 872 | CCAssignFn *CalleeAssignFnFixed; |
| 873 | CCAssignFn *CalleeAssignFnVarArg; |
| 874 | std::tie(args&: CalleeAssignFnFixed, args&: CalleeAssignFnVarArg) = |
| 875 | getAssignFnsForCC(CC: CalleeCC, TLI); |
| 876 | |
| 877 | CCAssignFn *CallerAssignFnFixed; |
| 878 | CCAssignFn *CallerAssignFnVarArg; |
| 879 | std::tie(args&: CallerAssignFnFixed, args&: CallerAssignFnVarArg) = |
| 880 | getAssignFnsForCC(CC: CallerCC, TLI); |
| 881 | |
| 882 | AArch64IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed, |
| 883 | CalleeAssignFnVarArg); |
| 884 | AArch64IncomingValueAssigner CallerAssigner(CallerAssignFnFixed, |
| 885 | CallerAssignFnVarArg); |
| 886 | |
| 887 | if (!resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner)) |
| 888 | return false; |
| 889 | |
| 890 | // Make sure that the caller and callee preserve all of the same registers. |
| 891 | auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); |
| 892 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); |
| 893 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); |
| 894 | if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) { |
| 895 | TRI->UpdateCustomCallPreservedMask(MF, Mask: &CallerPreserved); |
| 896 | TRI->UpdateCustomCallPreservedMask(MF, Mask: &CalleePreserved); |
| 897 | } |
| 898 | |
| 899 | return TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved); |
| 900 | } |
| 901 | |
| 902 | bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable( |
| 903 | CallLoweringInfo &Info, MachineFunction &MF, |
| 904 | SmallVectorImpl<ArgInfo> &OrigOutArgs) const { |
| 905 | // If there are no outgoing arguments, then we are done. |
| 906 | if (OrigOutArgs.empty()) |
| 907 | return true; |
| 908 | |
| 909 | const Function &CallerF = MF.getFunction(); |
| 910 | LLVMContext &Ctx = CallerF.getContext(); |
| 911 | CallingConv::ID CalleeCC = Info.CallConv; |
| 912 | CallingConv::ID CallerCC = CallerF.getCallingConv(); |
| 913 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
| 914 | const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
| 915 | |
| 916 | CCAssignFn *AssignFnFixed; |
| 917 | CCAssignFn *AssignFnVarArg; |
| 918 | std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) = getAssignFnsForCC(CC: CalleeCC, TLI); |
| 919 | |
| 920 | // We have outgoing arguments. Make sure that we can tail call with them. |
| 921 | SmallVector<CCValAssign, 16> OutLocs; |
| 922 | CCState OutInfo(CalleeCC, false, MF, OutLocs, Ctx); |
| 923 | |
| 924 | AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg, |
| 925 | Subtarget, /*IsReturn*/ false); |
| 926 | // determineAssignments() may modify argument flags, so make a copy. |
| 927 | SmallVector<ArgInfo, 8> OutArgs; |
| 928 | append_range(C&: OutArgs, R&: OrigOutArgs); |
| 929 | if (!determineAssignments(Assigner&: CalleeAssigner, Args&: OutArgs, CCInfo&: OutInfo)) { |
| 930 | LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n" ); |
| 931 | return false; |
| 932 | } |
| 933 | |
| 934 | // Make sure that they can fit on the caller's stack. |
| 935 | const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
| 936 | if (OutInfo.getStackSize() > FuncInfo->getBytesInStackArgArea()) { |
| 937 | LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n" ); |
| 938 | return false; |
| 939 | } |
| 940 | |
| 941 | // Verify that the parameters in callee-saved registers match. |
| 942 | // TODO: Port this over to CallLowering as general code once swiftself is |
| 943 | // supported. |
| 944 | auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); |
| 945 | const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC); |
| 946 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 947 | |
| 948 | if (Info.IsVarArg) { |
| 949 | // Be conservative and disallow variadic memory operands to match SDAG's |
| 950 | // behaviour. |
| 951 | // FIXME: If the caller's calling convention is C, then we can |
| 952 | // potentially use its argument area. However, for cases like fastcc, |
| 953 | // we can't do anything. |
| 954 | for (unsigned i = 0; i < OutLocs.size(); ++i) { |
| 955 | auto &ArgLoc = OutLocs[i]; |
| 956 | if (ArgLoc.isRegLoc()) |
| 957 | continue; |
| 958 | |
| 959 | LLVM_DEBUG( |
| 960 | dbgs() |
| 961 | << "... Cannot tail call vararg function with stack arguments\n" ); |
| 962 | return false; |
| 963 | } |
| 964 | } |
| 965 | |
| 966 | return parametersInCSRMatch(MRI, CallerPreservedMask, ArgLocs: OutLocs, OutVals: OutArgs); |
| 967 | } |
| 968 | |
| 969 | bool AArch64CallLowering::isEligibleForTailCallOptimization( |
| 970 | MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, |
| 971 | SmallVectorImpl<ArgInfo> &InArgs, |
| 972 | SmallVectorImpl<ArgInfo> &OutArgs) const { |
| 973 | |
| 974 | // Must pass all target-independent checks in order to tail call optimize. |
| 975 | if (!Info.IsTailCall) |
| 976 | return false; |
| 977 | |
| 978 | CallingConv::ID CalleeCC = Info.CallConv; |
| 979 | MachineFunction &MF = MIRBuilder.getMF(); |
| 980 | const Function &CallerF = MF.getFunction(); |
| 981 | |
| 982 | LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n" ); |
| 983 | |
| 984 | if (Info.SwiftErrorVReg) { |
| 985 | // TODO: We should handle this. |
| 986 | // Note that this is also handled by the check for no outgoing arguments. |
| 987 | // Proactively disabling this though, because the swifterror handling in |
| 988 | // lowerCall inserts a COPY *after* the location of the call. |
| 989 | LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n" ); |
| 990 | return false; |
| 991 | } |
| 992 | |
| 993 | if (!mayTailCallThisCC(CC: CalleeCC)) { |
| 994 | LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n" ); |
| 995 | return false; |
| 996 | } |
| 997 | |
| 998 | // Byval parameters hand the function a pointer directly into the stack area |
| 999 | // we want to reuse during a tail call. Working around this *is* possible (see |
| 1000 | // X86). |
| 1001 | // |
| 1002 | // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try |
| 1003 | // it? |
| 1004 | // |
| 1005 | // On Windows, "inreg" attributes signify non-aggregate indirect returns. |
| 1006 | // In this case, it is necessary to save/restore X0 in the callee. Tail |
| 1007 | // call opt interferes with this. So we disable tail call opt when the |
| 1008 | // caller has an argument with "inreg" attribute. |
| 1009 | // |
| 1010 | // FIXME: Check whether the callee also has an "inreg" argument. |
| 1011 | // |
| 1012 | // When the caller has a swifterror argument, we don't want to tail call |
| 1013 | // because would have to move into the swifterror register before the |
| 1014 | // tail call. |
| 1015 | if (any_of(Range: CallerF.args(), P: [](const Argument &A) { |
| 1016 | return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr(); |
| 1017 | })) { |
| 1018 | LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, " |
| 1019 | "inreg, or swifterror arguments\n" ); |
| 1020 | return false; |
| 1021 | } |
| 1022 | |
| 1023 | // Externally-defined functions with weak linkage should not be |
| 1024 | // tail-called on AArch64 when the OS does not support dynamic |
| 1025 | // pre-emption of symbols, as the AAELF spec requires normal calls |
| 1026 | // to undefined weak functions to be replaced with a NOP or jump to the |
| 1027 | // next instruction. The behaviour of branch instructions in this |
| 1028 | // situation (as used for tail calls) is implementation-defined, so we |
| 1029 | // cannot rely on the linker replacing the tail call with a return. |
| 1030 | if (Info.Callee.isGlobal()) { |
| 1031 | const GlobalValue *GV = Info.Callee.getGlobal(); |
| 1032 | const Triple &TT = MF.getTarget().getTargetTriple(); |
| 1033 | if (GV->hasExternalWeakLinkage() && |
| 1034 | (!TT.isOSWindows() || TT.isOSBinFormatELF() || |
| 1035 | TT.isOSBinFormatMachO())) { |
| 1036 | LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function " |
| 1037 | "with weak linkage for this OS.\n" ); |
| 1038 | return false; |
| 1039 | } |
| 1040 | } |
| 1041 | |
| 1042 | // If we have -tailcallopt, then we're done. |
| 1043 | if (canGuaranteeTCO(CC: CalleeCC, GuaranteeTailCalls: MF.getTarget().Options.GuaranteedTailCallOpt)) |
| 1044 | return CalleeCC == CallerF.getCallingConv(); |
| 1045 | |
| 1046 | // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall). |
| 1047 | // Try to find cases where we can do that. |
| 1048 | |
| 1049 | // I want anyone implementing a new calling convention to think long and hard |
| 1050 | // about this assert. |
| 1051 | assert((!Info.IsVarArg || CalleeCC == CallingConv::C) && |
| 1052 | "Unexpected variadic calling convention" ); |
| 1053 | |
| 1054 | // Verify that the incoming and outgoing arguments from the callee are |
| 1055 | // safe to tail call. |
| 1056 | if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) { |
| 1057 | LLVM_DEBUG( |
| 1058 | dbgs() |
| 1059 | << "... Caller and callee have incompatible calling conventions.\n" ); |
| 1060 | return false; |
| 1061 | } |
| 1062 | |
| 1063 | if (!areCalleeOutgoingArgsTailCallable(Info, MF, OrigOutArgs&: OutArgs)) |
| 1064 | return false; |
| 1065 | |
| 1066 | LLVM_DEBUG( |
| 1067 | dbgs() << "... Call is eligible for tail call optimization.\n" ); |
| 1068 | return true; |
| 1069 | } |
| 1070 | |
| 1071 | static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, |
| 1072 | bool IsTailCall, |
| 1073 | std::optional<CallLowering::PtrAuthInfo> &PAI, |
| 1074 | MachineRegisterInfo &MRI) { |
| 1075 | const AArch64FunctionInfo *FuncInfo = CallerF.getInfo<AArch64FunctionInfo>(); |
| 1076 | |
| 1077 | if (!IsTailCall) { |
| 1078 | if (!PAI) |
| 1079 | return IsIndirect ? getBLRCallOpcode(MF: CallerF) : (unsigned)AArch64::BL; |
| 1080 | |
| 1081 | assert(IsIndirect && "Direct call should not be authenticated" ); |
| 1082 | assert((PAI->Key == AArch64PACKey::IA || PAI->Key == AArch64PACKey::IB) && |
| 1083 | "Invalid auth call key" ); |
| 1084 | return AArch64::BLRA; |
| 1085 | } |
| 1086 | |
| 1087 | if (!IsIndirect) |
| 1088 | return AArch64::TCRETURNdi; |
| 1089 | |
| 1090 | // When BTI or PAuthLR are enabled, there are restrictions on using x16 and |
| 1091 | // x17 to hold the function pointer. |
| 1092 | if (FuncInfo->branchTargetEnforcement()) { |
| 1093 | if (FuncInfo->branchProtectionPAuthLR()) { |
| 1094 | assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR" ); |
| 1095 | return AArch64::TCRETURNrix17; |
| 1096 | } |
| 1097 | if (PAI) |
| 1098 | return AArch64::AUTH_TCRETURN_BTI; |
| 1099 | return AArch64::TCRETURNrix16x17; |
| 1100 | } |
| 1101 | |
| 1102 | if (FuncInfo->branchProtectionPAuthLR()) { |
| 1103 | assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR" ); |
| 1104 | return AArch64::TCRETURNrinotx16; |
| 1105 | } |
| 1106 | |
| 1107 | if (PAI) |
| 1108 | return AArch64::AUTH_TCRETURN; |
| 1109 | return AArch64::TCRETURNri; |
| 1110 | } |
| 1111 | |
| 1112 | static const uint32_t * |
| 1113 | getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> &OutArgs, |
| 1114 | AArch64CallLowering::CallLoweringInfo &Info, |
| 1115 | const AArch64RegisterInfo &TRI, MachineFunction &MF) { |
| 1116 | const uint32_t *Mask; |
| 1117 | if (!OutArgs.empty() && OutArgs[0].Flags[0].isReturned()) { |
| 1118 | // For 'this' returns, use the X0-preserving mask if applicable |
| 1119 | Mask = TRI.getThisReturnPreservedMask(MF, Info.CallConv); |
| 1120 | if (!Mask) { |
| 1121 | OutArgs[0].Flags[0].setReturned(false); |
| 1122 | Mask = TRI.getCallPreservedMask(MF, Info.CallConv); |
| 1123 | } |
| 1124 | } else { |
| 1125 | Mask = TRI.getCallPreservedMask(MF, Info.CallConv); |
| 1126 | } |
| 1127 | return Mask; |
| 1128 | } |
| 1129 | |
| 1130 | bool AArch64CallLowering::lowerTailCall( |
| 1131 | MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, |
| 1132 | SmallVectorImpl<ArgInfo> &OutArgs) const { |
| 1133 | MachineFunction &MF = MIRBuilder.getMF(); |
| 1134 | const Function &F = MF.getFunction(); |
| 1135 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 1136 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
| 1137 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
| 1138 | |
| 1139 | // True when we're tail calling, but without -tailcallopt. |
| 1140 | bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt && |
| 1141 | Info.CallConv != CallingConv::Tail && |
| 1142 | Info.CallConv != CallingConv::SwiftTail; |
| 1143 | |
| 1144 | // Find out which ABI gets to decide where things go. |
| 1145 | CallingConv::ID CalleeCC = Info.CallConv; |
| 1146 | CCAssignFn *AssignFnFixed; |
| 1147 | CCAssignFn *AssignFnVarArg; |
| 1148 | std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) = getAssignFnsForCC(CC: CalleeCC, TLI); |
| 1149 | |
| 1150 | MachineInstrBuilder CallSeqStart; |
| 1151 | if (!IsSibCall) |
| 1152 | CallSeqStart = MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKDOWN); |
| 1153 | |
| 1154 | unsigned Opc = getCallOpcode(CallerF: MF, IsIndirect: Info.Callee.isReg(), IsTailCall: true, PAI&: Info.PAI, MRI); |
| 1155 | auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: Opc); |
| 1156 | MIB.add(MO: Info.Callee); |
| 1157 | |
| 1158 | // Tell the call which registers are clobbered. |
| 1159 | const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
| 1160 | auto TRI = Subtarget.getRegisterInfo(); |
| 1161 | |
| 1162 | // Byte offset for the tail call. When we are sibcalling, this will always |
| 1163 | // be 0. |
| 1164 | MIB.addImm(Val: 0); |
| 1165 | |
| 1166 | // Authenticated tail calls always take key/discriminator arguments. |
| 1167 | if (Opc == AArch64::AUTH_TCRETURN || Opc == AArch64::AUTH_TCRETURN_BTI) { |
| 1168 | assert((Info.PAI->Key == AArch64PACKey::IA || |
| 1169 | Info.PAI->Key == AArch64PACKey::IB) && |
| 1170 | "Invalid auth call key" ); |
| 1171 | MIB.addImm(Val: Info.PAI->Key); |
| 1172 | |
| 1173 | Register AddrDisc = 0; |
| 1174 | uint16_t IntDisc = 0; |
| 1175 | std::tie(args&: IntDisc, args&: AddrDisc) = |
| 1176 | extractPtrauthBlendDiscriminators(Disc: Info.PAI->Discriminator, MRI); |
| 1177 | |
| 1178 | MIB.addImm(Val: IntDisc); |
| 1179 | MIB.addUse(RegNo: AddrDisc); |
| 1180 | if (AddrDisc != AArch64::NoRegister) { |
| 1181 | MIB->getOperand(i: 4).setReg(constrainOperandRegClass( |
| 1182 | MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(), |
| 1183 | RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB, II: MIB->getDesc(), |
| 1184 | RegMO&: MIB->getOperand(i: 4), OpIdx: 4)); |
| 1185 | } |
| 1186 | } |
| 1187 | |
| 1188 | // Tell the call which registers are clobbered. |
| 1189 | const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC); |
| 1190 | if (Subtarget.hasCustomCallingConv()) |
| 1191 | TRI->UpdateCustomCallPreservedMask(MF, Mask: &Mask); |
| 1192 | MIB.addRegMask(Mask); |
| 1193 | |
| 1194 | if (Info.CFIType) |
| 1195 | MIB->setCFIType(MF, Type: Info.CFIType->getZExtValue()); |
| 1196 | |
| 1197 | if (TRI->isAnyArgRegReserved(MF)) |
| 1198 | TRI->emitReservedArgRegCallError(MF); |
| 1199 | |
| 1200 | // FPDiff is the byte offset of the call's argument area from the callee's. |
| 1201 | // Stores to callee stack arguments will be placed in FixedStackSlots offset |
| 1202 | // by this amount for a tail call. In a sibling call it must be 0 because the |
| 1203 | // caller will deallocate the entire stack and the callee still expects its |
| 1204 | // arguments to begin at SP+0. |
| 1205 | int FPDiff = 0; |
| 1206 | |
| 1207 | // This will be 0 for sibcalls, potentially nonzero for tail calls produced |
| 1208 | // by -tailcallopt. For sibcalls, the memory operands for the call are |
| 1209 | // already available in the caller's incoming argument space. |
| 1210 | unsigned NumBytes = 0; |
| 1211 | if (!IsSibCall) { |
| 1212 | // We aren't sibcalling, so we need to compute FPDiff. We need to do this |
| 1213 | // before handling assignments, because FPDiff must be known for memory |
| 1214 | // arguments. |
| 1215 | unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); |
| 1216 | SmallVector<CCValAssign, 16> OutLocs; |
| 1217 | CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext()); |
| 1218 | |
| 1219 | AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg, |
| 1220 | Subtarget, /*IsReturn*/ false); |
| 1221 | if (!determineAssignments(Assigner&: CalleeAssigner, Args&: OutArgs, CCInfo&: OutInfo)) |
| 1222 | return false; |
| 1223 | |
| 1224 | // The callee will pop the argument stack as a tail call. Thus, we must |
| 1225 | // keep it 16-byte aligned. |
| 1226 | NumBytes = alignTo(Value: OutInfo.getStackSize(), Align: 16); |
| 1227 | |
| 1228 | // FPDiff will be negative if this tail call requires more space than we |
| 1229 | // would automatically have in our incoming argument space. Positive if we |
| 1230 | // actually shrink the stack. |
| 1231 | FPDiff = NumReusableBytes - NumBytes; |
| 1232 | |
| 1233 | // Update the required reserved area if this is the tail call requiring the |
| 1234 | // most argument stack space. |
| 1235 | if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff) |
| 1236 | FuncInfo->setTailCallReservedStack(-FPDiff); |
| 1237 | |
| 1238 | // The stack pointer must be 16-byte aligned at all times it's used for a |
| 1239 | // memory operation, which in practice means at *all* times and in |
| 1240 | // particular across call boundaries. Therefore our own arguments started at |
| 1241 | // a 16-byte aligned SP and the delta applied for the tail call should |
| 1242 | // satisfy the same constraint. |
| 1243 | assert(FPDiff % 16 == 0 && "unaligned stack on tail call" ); |
| 1244 | } |
| 1245 | |
| 1246 | const auto &Forwards = FuncInfo->getForwardedMustTailRegParms(); |
| 1247 | |
| 1248 | AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg, |
| 1249 | Subtarget, /*IsReturn*/ false); |
| 1250 | |
| 1251 | // Do the actual argument marshalling. |
| 1252 | OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, |
| 1253 | /*IsTailCall*/ true, FPDiff); |
| 1254 | if (!determineAndHandleAssignments(Handler, Assigner, Args&: OutArgs, MIRBuilder, |
| 1255 | CallConv: CalleeCC, IsVarArg: Info.IsVarArg)) |
| 1256 | return false; |
| 1257 | |
| 1258 | Mask = getMaskForArgs(OutArgs, Info, TRI: *TRI, MF); |
| 1259 | |
| 1260 | if (Info.IsVarArg && Info.IsMustTailCall) { |
| 1261 | // Now we know what's being passed to the function. Add uses to the call for |
| 1262 | // the forwarded registers that we *aren't* passing as parameters. This will |
| 1263 | // preserve the copies we build earlier. |
| 1264 | for (const auto &F : Forwards) { |
| 1265 | Register ForwardedReg = F.PReg; |
| 1266 | // If the register is already passed, or aliases a register which is |
| 1267 | // already being passed, then skip it. |
| 1268 | if (any_of(Range: MIB->uses(), P: [&ForwardedReg, &TRI](const MachineOperand &Use) { |
| 1269 | if (!Use.isReg()) |
| 1270 | return false; |
| 1271 | return TRI->regsOverlap(RegA: Use.getReg(), RegB: ForwardedReg); |
| 1272 | })) |
| 1273 | continue; |
| 1274 | |
| 1275 | // We aren't passing it already, so we should add it to the call. |
| 1276 | MIRBuilder.buildCopy(Res: ForwardedReg, Op: Register(F.VReg)); |
| 1277 | MIB.addReg(RegNo: ForwardedReg, flags: RegState::Implicit); |
| 1278 | } |
| 1279 | } |
| 1280 | |
| 1281 | // If we have -tailcallopt, we need to adjust the stack. We'll do the call |
| 1282 | // sequence start and end here. |
| 1283 | if (!IsSibCall) { |
| 1284 | MIB->getOperand(i: 1).setImm(FPDiff); |
| 1285 | CallSeqStart.addImm(Val: 0).addImm(Val: 0); |
| 1286 | // End the call sequence *before* emitting the call. Normally, we would |
| 1287 | // tidy the frame up after the call. However, here, we've laid out the |
| 1288 | // parameters so that when SP is reset, they will be in the correct |
| 1289 | // location. |
| 1290 | MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKUP).addImm(Val: 0).addImm(Val: 0); |
| 1291 | } |
| 1292 | |
| 1293 | // Now we can add the actual call instruction to the correct basic block. |
| 1294 | MIRBuilder.insertInstr(MIB); |
| 1295 | |
| 1296 | // If Callee is a reg, since it is used by a target specific instruction, |
| 1297 | // it must have a register class matching the constraint of that instruction. |
| 1298 | if (MIB->getOperand(i: 0).isReg()) |
| 1299 | constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(), |
| 1300 | RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB, |
| 1301 | II: MIB->getDesc(), RegMO&: MIB->getOperand(i: 0), OpIdx: 0); |
| 1302 | |
| 1303 | MF.getFrameInfo().setHasTailCall(); |
| 1304 | Info.LoweredTailCall = true; |
| 1305 | return true; |
| 1306 | } |
| 1307 | |
| 1308 | bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, |
| 1309 | CallLoweringInfo &Info) const { |
| 1310 | MachineFunction &MF = MIRBuilder.getMF(); |
| 1311 | const Function &F = MF.getFunction(); |
| 1312 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 1313 | auto &DL = F.getDataLayout(); |
| 1314 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
| 1315 | const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
| 1316 | |
| 1317 | // Arm64EC has extra requirements for varargs calls; bail out for now. |
| 1318 | // |
| 1319 | // Arm64EC has special mangling rules for calls; bail out on all calls for |
| 1320 | // now. |
| 1321 | if (Subtarget.isWindowsArm64EC()) |
| 1322 | return false; |
| 1323 | |
| 1324 | // Arm64EC thunks have a special calling convention which is only implemented |
| 1325 | // in SelectionDAG; bail out for now. |
| 1326 | if (Info.CallConv == CallingConv::ARM64EC_Thunk_Native || |
| 1327 | Info.CallConv == CallingConv::ARM64EC_Thunk_X64) |
| 1328 | return false; |
| 1329 | |
| 1330 | SmallVector<ArgInfo, 8> OutArgs; |
| 1331 | for (auto &OrigArg : Info.OrigArgs) { |
| 1332 | splitToValueTypes(OrigArgInfo: OrigArg, SplitArgs&: OutArgs, DL, CallConv: Info.CallConv); |
| 1333 | // AAPCS requires that we zero-extend i1 to 8 bits by the caller. |
| 1334 | auto &Flags = OrigArg.Flags[0]; |
| 1335 | if (OrigArg.Ty->isIntegerTy(Bitwidth: 1) && !Flags.isSExt() && !Flags.isZExt()) { |
| 1336 | ArgInfo &OutArg = OutArgs.back(); |
| 1337 | assert(OutArg.Regs.size() == 1 && |
| 1338 | MRI.getType(OutArg.Regs[0]).getSizeInBits() == 1 && |
| 1339 | "Unexpected registers used for i1 arg" ); |
| 1340 | |
| 1341 | // We cannot use a ZExt ArgInfo flag here, because it will |
| 1342 | // zero-extend the argument to i32 instead of just i8. |
| 1343 | OutArg.Regs[0] = |
| 1344 | MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 8), Op: OutArg.Regs[0]).getReg(Idx: 0); |
| 1345 | LLVMContext &Ctx = MF.getFunction().getContext(); |
| 1346 | OutArg.Ty = Type::getInt8Ty(C&: Ctx); |
| 1347 | } |
| 1348 | } |
| 1349 | |
| 1350 | SmallVector<ArgInfo, 8> InArgs; |
| 1351 | if (!Info.OrigRet.Ty->isVoidTy()) |
| 1352 | splitToValueTypes(OrigArgInfo: Info.OrigRet, SplitArgs&: InArgs, DL, CallConv: Info.CallConv); |
| 1353 | |
| 1354 | // If we can lower as a tail call, do that instead. |
| 1355 | bool CanTailCallOpt = |
| 1356 | isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs); |
| 1357 | |
| 1358 | // We must emit a tail call if we have musttail. |
| 1359 | if (Info.IsMustTailCall && !CanTailCallOpt) { |
| 1360 | // There are types of incoming/outgoing arguments we can't handle yet, so |
| 1361 | // it doesn't make sense to actually die here like in ISelLowering. Instead, |
| 1362 | // fall back to SelectionDAG and let it try to handle this. |
| 1363 | LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n" ); |
| 1364 | return false; |
| 1365 | } |
| 1366 | |
| 1367 | Info.IsTailCall = CanTailCallOpt; |
| 1368 | if (CanTailCallOpt) |
| 1369 | return lowerTailCall(MIRBuilder, Info, OutArgs); |
| 1370 | |
| 1371 | // Find out which ABI gets to decide where things go. |
| 1372 | CCAssignFn *AssignFnFixed; |
| 1373 | CCAssignFn *AssignFnVarArg; |
| 1374 | std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) = |
| 1375 | getAssignFnsForCC(CC: Info.CallConv, TLI); |
| 1376 | |
| 1377 | MachineInstrBuilder CallSeqStart; |
| 1378 | CallSeqStart = MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKDOWN); |
| 1379 | |
| 1380 | // Create a temporarily-floating call instruction so we can add the implicit |
| 1381 | // uses of arg registers. |
| 1382 | |
| 1383 | unsigned Opc = 0; |
| 1384 | // Calls with operand bundle "clang.arc.attachedcall" are special. They should |
| 1385 | // be expanded to the call, directly followed by a special marker sequence and |
| 1386 | // a call to an ObjC library function. |
| 1387 | if (Info.CB && objcarc::hasAttachedCallOpBundle(CB: Info.CB)) |
| 1388 | Opc = Info.PAI ? AArch64::BLRA_RVMARKER : AArch64::BLR_RVMARKER; |
| 1389 | // A call to a returns twice function like setjmp must be followed by a bti |
| 1390 | // instruction. |
| 1391 | else if (Info.CB && Info.CB->hasFnAttr(Kind: Attribute::ReturnsTwice) && |
| 1392 | !Subtarget.noBTIAtReturnTwice() && |
| 1393 | MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) |
| 1394 | Opc = AArch64::BLR_BTI; |
| 1395 | else { |
| 1396 | // For an intrinsic call (e.g. memset), use GOT if "RtLibUseGOT" (-fno-plt) |
| 1397 | // is set. |
| 1398 | if (Info.Callee.isSymbol() && F.getParent()->getRtLibUseGOT()) { |
| 1399 | auto MIB = MIRBuilder.buildInstr(Opcode: TargetOpcode::G_GLOBAL_VALUE); |
| 1400 | DstOp(getLLTForType(Ty&: *F.getType(), DL)).addDefToMIB(MRI, MIB); |
| 1401 | MIB.addExternalSymbol(FnName: Info.Callee.getSymbolName(), TargetFlags: AArch64II::MO_GOT); |
| 1402 | Info.Callee = MachineOperand::CreateReg(Reg: MIB.getReg(Idx: 0), isDef: false); |
| 1403 | } |
| 1404 | Opc = getCallOpcode(CallerF: MF, IsIndirect: Info.Callee.isReg(), IsTailCall: false, PAI&: Info.PAI, MRI); |
| 1405 | } |
| 1406 | |
| 1407 | auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: Opc); |
| 1408 | unsigned CalleeOpNo = 0; |
| 1409 | |
| 1410 | if (Opc == AArch64::BLR_RVMARKER || Opc == AArch64::BLRA_RVMARKER) { |
| 1411 | // Add a target global address for the retainRV/claimRV runtime function |
| 1412 | // just before the call target. |
| 1413 | Function *ARCFn = *objcarc::getAttachedARCFunction(CB: Info.CB); |
| 1414 | MIB.addGlobalAddress(GV: ARCFn); |
| 1415 | ++CalleeOpNo; |
| 1416 | |
| 1417 | // We may or may not need to emit both the marker and the retain/claim call. |
| 1418 | // Tell the pseudo expansion using an additional boolean op. |
| 1419 | MIB.addImm(Val: objcarc::attachedCallOpBundleNeedsMarker(CB: Info.CB)); |
| 1420 | ++CalleeOpNo; |
| 1421 | } else if (Info.CFIType) { |
| 1422 | MIB->setCFIType(MF, Type: Info.CFIType->getZExtValue()); |
| 1423 | } |
| 1424 | |
| 1425 | MIB.add(MO: Info.Callee); |
| 1426 | |
| 1427 | // Tell the call which registers are clobbered. |
| 1428 | const uint32_t *Mask; |
| 1429 | const auto *TRI = Subtarget.getRegisterInfo(); |
| 1430 | |
| 1431 | AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg, |
| 1432 | Subtarget, /*IsReturn*/ false); |
| 1433 | // Do the actual argument marshalling. |
| 1434 | OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, /*IsReturn*/ false); |
| 1435 | if (!determineAndHandleAssignments(Handler, Assigner, Args&: OutArgs, MIRBuilder, |
| 1436 | CallConv: Info.CallConv, IsVarArg: Info.IsVarArg)) |
| 1437 | return false; |
| 1438 | |
| 1439 | Mask = getMaskForArgs(OutArgs, Info, TRI: *TRI, MF); |
| 1440 | |
| 1441 | if (Opc == AArch64::BLRA || Opc == AArch64::BLRA_RVMARKER) { |
| 1442 | assert((Info.PAI->Key == AArch64PACKey::IA || |
| 1443 | Info.PAI->Key == AArch64PACKey::IB) && |
| 1444 | "Invalid auth call key" ); |
| 1445 | MIB.addImm(Val: Info.PAI->Key); |
| 1446 | |
| 1447 | Register AddrDisc = 0; |
| 1448 | uint16_t IntDisc = 0; |
| 1449 | std::tie(args&: IntDisc, args&: AddrDisc) = |
| 1450 | extractPtrauthBlendDiscriminators(Disc: Info.PAI->Discriminator, MRI); |
| 1451 | |
| 1452 | MIB.addImm(Val: IntDisc); |
| 1453 | MIB.addUse(RegNo: AddrDisc); |
| 1454 | if (AddrDisc != AArch64::NoRegister) { |
| 1455 | constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(), |
| 1456 | RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB, |
| 1457 | II: MIB->getDesc(), RegMO&: MIB->getOperand(i: CalleeOpNo + 3), |
| 1458 | OpIdx: CalleeOpNo + 3); |
| 1459 | } |
| 1460 | } |
| 1461 | |
| 1462 | // Tell the call which registers are clobbered. |
| 1463 | if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) |
| 1464 | TRI->UpdateCustomCallPreservedMask(MF, Mask: &Mask); |
| 1465 | MIB.addRegMask(Mask); |
| 1466 | |
| 1467 | if (TRI->isAnyArgRegReserved(MF)) |
| 1468 | TRI->emitReservedArgRegCallError(MF); |
| 1469 | |
| 1470 | // Now we can add the actual call instruction to the correct basic block. |
| 1471 | MIRBuilder.insertInstr(MIB); |
| 1472 | |
| 1473 | uint64_t CalleePopBytes = |
| 1474 | doesCalleeRestoreStack(CallConv: Info.CallConv, |
| 1475 | TailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt) |
| 1476 | ? alignTo(Value: Assigner.StackSize, Align: 16) |
| 1477 | : 0; |
| 1478 | |
| 1479 | CallSeqStart.addImm(Val: Assigner.StackSize).addImm(Val: 0); |
| 1480 | MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKUP) |
| 1481 | .addImm(Val: Assigner.StackSize) |
| 1482 | .addImm(Val: CalleePopBytes); |
| 1483 | |
| 1484 | // If Callee is a reg, since it is used by a target specific |
| 1485 | // instruction, it must have a register class matching the |
| 1486 | // constraint of that instruction. |
| 1487 | if (MIB->getOperand(i: CalleeOpNo).isReg()) |
| 1488 | constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *Subtarget.getInstrInfo(), |
| 1489 | RBI: *Subtarget.getRegBankInfo(), InsertPt&: *MIB, II: MIB->getDesc(), |
| 1490 | RegMO&: MIB->getOperand(i: CalleeOpNo), OpIdx: CalleeOpNo); |
| 1491 | |
| 1492 | // Finally we can copy the returned value back into its virtual-register. In |
| 1493 | // symmetry with the arguments, the physical register must be an |
| 1494 | // implicit-define of the call instruction. |
| 1495 | if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) { |
| 1496 | CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(CC: Info.CallConv); |
| 1497 | CallReturnHandler Handler(MIRBuilder, MRI, MIB); |
| 1498 | bool UsingReturnedArg = |
| 1499 | !OutArgs.empty() && OutArgs[0].Flags[0].isReturned(); |
| 1500 | |
| 1501 | AArch64OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn, Subtarget, |
| 1502 | /*IsReturn*/ false); |
| 1503 | ReturnedArgCallReturnHandler ReturnedArgHandler(MIRBuilder, MRI, MIB); |
| 1504 | if (!determineAndHandleAssignments( |
| 1505 | Handler&: UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, Args&: InArgs, |
| 1506 | MIRBuilder, CallConv: Info.CallConv, IsVarArg: Info.IsVarArg, |
| 1507 | ThisReturnRegs: UsingReturnedArg ? ArrayRef(OutArgs[0].Regs) |
| 1508 | : ArrayRef<Register>())) |
| 1509 | return false; |
| 1510 | } |
| 1511 | |
| 1512 | if (Info.SwiftErrorVReg) { |
| 1513 | MIB.addDef(RegNo: AArch64::X21, Flags: RegState::Implicit); |
| 1514 | MIRBuilder.buildCopy(Res: Info.SwiftErrorVReg, Op: Register(AArch64::X21)); |
| 1515 | } |
| 1516 | |
| 1517 | if (!Info.CanLowerReturn) { |
| 1518 | insertSRetLoads(MIRBuilder, RetTy: Info.OrigRet.Ty, VRegs: Info.OrigRet.Regs, |
| 1519 | DemoteReg: Info.DemoteRegister, FI: Info.DemoteStackIndex); |
| 1520 | } |
| 1521 | return true; |
| 1522 | } |
| 1523 | |
| 1524 | bool AArch64CallLowering::isTypeIsValidForThisReturn(EVT Ty) const { |
| 1525 | return Ty.getSizeInBits() == 64; |
| 1526 | } |
| 1527 | |