| 1 | //===--- AArch64CallLowering.cpp - Call lowering --------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// This file implements the lowering of LLVM calls to machine code calls for |
| 11 | /// GlobalISel. |
| 12 | /// |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "AArch64CallLowering.h" |
| 16 | #include "AArch64GlobalISelUtils.h" |
| 17 | #include "AArch64ISelLowering.h" |
| 18 | #include "AArch64MachineFunctionInfo.h" |
| 19 | #include "AArch64RegisterInfo.h" |
| 20 | #include "AArch64SMEAttributes.h" |
| 21 | #include "AArch64Subtarget.h" |
| 22 | #include "AArch64TargetMachine.h" |
| 23 | #include "llvm/ADT/ArrayRef.h" |
| 24 | #include "llvm/ADT/SmallVector.h" |
| 25 | #include "llvm/Analysis/ObjCARCUtil.h" |
| 26 | #include "llvm/CodeGen/Analysis.h" |
| 27 | #include "llvm/CodeGen/CallingConvLower.h" |
| 28 | #include "llvm/CodeGen/FunctionLoweringInfo.h" |
| 29 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
| 30 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
| 31 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
| 32 | #include "llvm/CodeGen/LowLevelTypeUtils.h" |
| 33 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 34 | #include "llvm/CodeGen/MachineFrameInfo.h" |
| 35 | #include "llvm/CodeGen/MachineFunction.h" |
| 36 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 37 | #include "llvm/CodeGen/MachineMemOperand.h" |
| 38 | #include "llvm/CodeGen/MachineOperand.h" |
| 39 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 40 | #include "llvm/CodeGen/TargetOpcodes.h" |
| 41 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
| 42 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
| 43 | #include "llvm/CodeGen/ValueTypes.h" |
| 44 | #include "llvm/CodeGenTypes/MachineValueType.h" |
| 45 | #include "llvm/IR/Argument.h" |
| 46 | #include "llvm/IR/Attributes.h" |
| 47 | #include "llvm/IR/Function.h" |
| 48 | #include "llvm/IR/Type.h" |
| 49 | #include "llvm/IR/Value.h" |
| 50 | #include <algorithm> |
| 51 | #include <cassert> |
| 52 | #include <cstdint> |
| 53 | |
| 54 | #define DEBUG_TYPE "aarch64-call-lowering" |
| 55 | |
| 56 | using namespace llvm; |
| 57 | using namespace AArch64GISelUtils; |
| 58 | |
| 59 | extern cl::opt<bool> EnableSVEGISel; |
| 60 | |
| 61 | AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI) |
| 62 | : CallLowering(&TLI) {} |
| 63 | |
| 64 | static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT, |
| 65 | MVT &LocVT) { |
| 66 | // If ValVT is i1/i8/i16, we should set LocVT to i8/i8/i16. This is a legacy |
| 67 | // hack because the DAG calls the assignment function with pre-legalized |
| 68 | // register typed values, not the raw type. |
| 69 | // |
| 70 | // This hack is not applied to return values which are not passed on the |
| 71 | // stack. |
| 72 | if (OrigVT == MVT::i1 || OrigVT == MVT::i8) |
| 73 | ValVT = LocVT = MVT::i8; |
| 74 | else if (OrigVT == MVT::i16) |
| 75 | ValVT = LocVT = MVT::i16; |
| 76 | } |
| 77 | |
| 78 | // Account for i1/i8/i16 stack passed value hack |
| 79 | static LLT getStackValueStoreTypeHack(const CCValAssign &VA) { |
| 80 | const MVT ValVT = VA.getValVT(); |
| 81 | return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT) |
| 82 | : LLT(VA.getLocVT()); |
| 83 | } |
| 84 | |
| 85 | namespace { |
| 86 | |
| 87 | struct AArch64IncomingValueAssigner |
| 88 | : public CallLowering::IncomingValueAssigner { |
| 89 | AArch64IncomingValueAssigner(CCAssignFn *AssignFn_, |
| 90 | CCAssignFn *AssignFnVarArg_) |
| 91 | : IncomingValueAssigner(AssignFn_, AssignFnVarArg_) {} |
| 92 | |
| 93 | bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT, |
| 94 | CCValAssign::LocInfo LocInfo, |
| 95 | const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, |
| 96 | CCState &State) override { |
| 97 | applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT); |
| 98 | return IncomingValueAssigner::assignArg(ValNo, OrigVT, ValVT, LocVT, |
| 99 | LocInfo, Info, Flags, State); |
| 100 | } |
| 101 | }; |
| 102 | |
| 103 | struct AArch64OutgoingValueAssigner |
| 104 | : public CallLowering::OutgoingValueAssigner { |
| 105 | const AArch64Subtarget &Subtarget; |
| 106 | |
| 107 | /// Track if this is used for a return instead of function argument |
| 108 | /// passing. We apply a hack to i1/i8/i16 stack passed values, but do not use |
| 109 | /// stack passed returns for them and cannot apply the type adjustment. |
| 110 | bool IsReturn; |
| 111 | |
| 112 | AArch64OutgoingValueAssigner(CCAssignFn *AssignFn_, |
| 113 | CCAssignFn *AssignFnVarArg_, |
| 114 | const AArch64Subtarget &Subtarget_, |
| 115 | bool IsReturn) |
| 116 | : OutgoingValueAssigner(AssignFn_, AssignFnVarArg_), |
| 117 | Subtarget(Subtarget_), IsReturn(IsReturn) {} |
| 118 | |
| 119 | bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT, |
| 120 | CCValAssign::LocInfo LocInfo, |
| 121 | const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags, |
| 122 | CCState &State) override { |
| 123 | const Function &F = State.getMachineFunction().getFunction(); |
| 124 | bool IsCalleeWin = |
| 125 | Subtarget.isCallingConvWin64(CC: State.getCallingConv(), IsVarArg: F.isVarArg()); |
| 126 | bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg(); |
| 127 | |
| 128 | bool Res; |
| 129 | if (!Flags.isVarArg() && !UseVarArgsCCForFixed) { |
| 130 | if (!IsReturn) |
| 131 | applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT); |
| 132 | Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, Info.Ty, State); |
| 133 | } else |
| 134 | Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, Info.Ty, State); |
| 135 | |
| 136 | StackSize = State.getStackSize(); |
| 137 | return Res; |
| 138 | } |
| 139 | }; |
| 140 | |
| 141 | struct IncomingArgHandler : public CallLowering::IncomingValueHandler { |
| 142 | IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) |
| 143 | : IncomingValueHandler(MIRBuilder, MRI) {} |
| 144 | |
| 145 | Register getStackAddress(uint64_t Size, int64_t Offset, |
| 146 | MachinePointerInfo &MPO, |
| 147 | ISD::ArgFlagsTy Flags) override { |
| 148 | auto &MFI = MIRBuilder.getMF().getFrameInfo(); |
| 149 | |
| 150 | // Byval is assumed to be writable memory, but other stack passed arguments |
| 151 | // are not. |
| 152 | const bool IsImmutable = !Flags.isByVal(); |
| 153 | |
| 154 | int FI = MFI.CreateFixedObject(Size, SPOffset: Offset, IsImmutable); |
| 155 | MPO = MachinePointerInfo::getFixedStack(MF&: MIRBuilder.getMF(), FI); |
| 156 | auto AddrReg = MIRBuilder.buildFrameIndex(Res: LLT::pointer(AddressSpace: 0, SizeInBits: 64), Idx: FI); |
| 157 | return AddrReg.getReg(Idx: 0); |
| 158 | } |
| 159 | |
| 160 | LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA, |
| 161 | ISD::ArgFlagsTy Flags) const override { |
| 162 | // For pointers, we just need to fixup the integer types reported in the |
| 163 | // CCValAssign. |
| 164 | if (Flags.isPointer()) |
| 165 | return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags); |
| 166 | return getStackValueStoreTypeHack(VA); |
| 167 | } |
| 168 | |
| 169 | void assignValueToReg(Register ValVReg, Register PhysReg, |
| 170 | const CCValAssign &VA) override { |
| 171 | markRegUsed(Reg: PhysReg); |
| 172 | IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA); |
| 173 | } |
| 174 | |
| 175 | void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, |
| 176 | const MachinePointerInfo &MPO, |
| 177 | const CCValAssign &VA) override { |
| 178 | MachineFunction &MF = MIRBuilder.getMF(); |
| 179 | |
| 180 | LLT ValTy(VA.getValVT()); |
| 181 | LLT LocTy(VA.getLocVT()); |
| 182 | |
| 183 | // Fixup the types for the DAG compatibility hack. |
| 184 | if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) |
| 185 | std::swap(a&: ValTy, b&: LocTy); |
| 186 | else { |
| 187 | // The calling code knows if this is a pointer or not, we're only touching |
| 188 | // the LocTy for the i8/i16 hack. |
| 189 | assert(LocTy.getSizeInBits() == MemTy.getSizeInBits()); |
| 190 | LocTy = MemTy; |
| 191 | } |
| 192 | |
| 193 | auto MMO = MF.getMachineMemOperand( |
| 194 | PtrInfo: MPO, f: MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, MemTy: LocTy, |
| 195 | base_alignment: inferAlignFromPtrInfo(MF, MPO)); |
| 196 | |
| 197 | switch (VA.getLocInfo()) { |
| 198 | case CCValAssign::LocInfo::ZExt: |
| 199 | MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_ZEXTLOAD, Res: ValVReg, Addr, MMO&: *MMO); |
| 200 | return; |
| 201 | case CCValAssign::LocInfo::SExt: |
| 202 | MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_SEXTLOAD, Res: ValVReg, Addr, MMO&: *MMO); |
| 203 | return; |
| 204 | default: |
| 205 | MIRBuilder.buildLoad(Res: ValVReg, Addr, MMO&: *MMO); |
| 206 | return; |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | /// How the physical register gets marked varies between formal |
| 211 | /// parameters (it's a basic-block live-in), and a call instruction |
| 212 | /// (it's an implicit-def of the BL). |
| 213 | virtual void markRegUsed(Register Reg) = 0; |
| 214 | }; |
| 215 | |
| 216 | struct FormalArgHandler : public IncomingArgHandler { |
| 217 | FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) |
| 218 | : IncomingArgHandler(MIRBuilder, MRI) {} |
| 219 | |
| 220 | void markRegUsed(Register Reg) override { |
| 221 | MIRBuilder.getMRI()->addLiveIn(Reg: Reg.asMCReg()); |
| 222 | MIRBuilder.getMBB().addLiveIn(PhysReg: Reg.asMCReg()); |
| 223 | } |
| 224 | }; |
| 225 | |
| 226 | struct CallReturnHandler : public IncomingArgHandler { |
| 227 | CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, |
| 228 | MachineInstrBuilder MIB) |
| 229 | : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {} |
| 230 | |
| 231 | void markRegUsed(Register Reg) override { |
| 232 | MIB.addDef(RegNo: Reg, Flags: RegState::Implicit); |
| 233 | } |
| 234 | |
| 235 | MachineInstrBuilder MIB; |
| 236 | }; |
| 237 | |
| 238 | /// A special return arg handler for "returned" attribute arg calls. |
| 239 | struct ReturnedArgCallReturnHandler : public CallReturnHandler { |
| 240 | ReturnedArgCallReturnHandler(MachineIRBuilder &MIRBuilder, |
| 241 | MachineRegisterInfo &MRI, |
| 242 | MachineInstrBuilder MIB) |
| 243 | : CallReturnHandler(MIRBuilder, MRI, MIB) {} |
| 244 | |
| 245 | void markRegUsed(Register Reg) override {} |
| 246 | }; |
| 247 | |
| 248 | struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { |
| 249 | OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, |
| 250 | MachineInstrBuilder MIB, bool IsTailCall = false, |
| 251 | int FPDiff = 0) |
| 252 | : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), IsTailCall(IsTailCall), |
| 253 | FPDiff(FPDiff), |
| 254 | Subtarget(MIRBuilder.getMF().getSubtarget<AArch64Subtarget>()) {} |
| 255 | |
| 256 | Register getStackAddress(uint64_t Size, int64_t Offset, |
| 257 | MachinePointerInfo &MPO, |
| 258 | ISD::ArgFlagsTy Flags) override { |
| 259 | MachineFunction &MF = MIRBuilder.getMF(); |
| 260 | LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64); |
| 261 | LLT s64 = LLT::scalar(SizeInBits: 64); |
| 262 | |
| 263 | if (IsTailCall) { |
| 264 | assert(!Flags.isByVal() && "byval unhandled with tail calls" ); |
| 265 | |
| 266 | Offset += FPDiff; |
| 267 | int FI = MF.getFrameInfo().CreateFixedObject(Size, SPOffset: Offset, IsImmutable: true); |
| 268 | auto FIReg = MIRBuilder.buildFrameIndex(Res: p0, Idx: FI); |
| 269 | MPO = MachinePointerInfo::getFixedStack(MF, FI); |
| 270 | return FIReg.getReg(Idx: 0); |
| 271 | } |
| 272 | |
| 273 | if (!SPReg) |
| 274 | SPReg = MIRBuilder.buildCopy(Res: p0, Op: Register(AArch64::SP)).getReg(Idx: 0); |
| 275 | |
| 276 | auto OffsetReg = MIRBuilder.buildConstant(Res: s64, Val: Offset); |
| 277 | |
| 278 | auto AddrReg = MIRBuilder.buildPtrAdd(Res: p0, Op0: SPReg, Op1: OffsetReg); |
| 279 | |
| 280 | MPO = MachinePointerInfo::getStack(MF, Offset); |
| 281 | return AddrReg.getReg(Idx: 0); |
| 282 | } |
| 283 | |
| 284 | /// We need to fixup the reported store size for certain value types because |
| 285 | /// we invert the interpretation of ValVT and LocVT in certain cases. This is |
| 286 | /// for compatibility with the DAG call lowering implementation, which we're |
| 287 | /// currently building on top of. |
| 288 | LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA, |
| 289 | ISD::ArgFlagsTy Flags) const override { |
| 290 | if (Flags.isPointer()) |
| 291 | return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags); |
| 292 | return getStackValueStoreTypeHack(VA); |
| 293 | } |
| 294 | |
| 295 | void assignValueToReg(Register ValVReg, Register PhysReg, |
| 296 | const CCValAssign &VA) override { |
| 297 | MIB.addUse(RegNo: PhysReg, Flags: RegState::Implicit); |
| 298 | Register ExtReg = extendRegister(ValReg: ValVReg, VA); |
| 299 | MIRBuilder.buildCopy(Res: PhysReg, Op: ExtReg); |
| 300 | } |
| 301 | |
| 302 | /// Check whether a stack argument requires lowering in a tail call. |
| 303 | static bool shouldLowerTailCallStackArg(const MachineFunction &MF, |
| 304 | const CCValAssign &VA, |
| 305 | Register ValVReg, |
| 306 | Register StoreAddr) { |
| 307 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 308 | // Print the defining instruction for the value. |
| 309 | auto *DefMI = MRI.getVRegDef(Reg: ValVReg); |
| 310 | assert(DefMI && "No defining instruction" ); |
| 311 | for (;;) { |
| 312 | // Look through nodes that don't alter the bits of the incoming value. |
| 313 | unsigned Op = DefMI->getOpcode(); |
| 314 | if (Op == TargetOpcode::G_ZEXT || Op == TargetOpcode::G_ANYEXT || |
| 315 | Op == TargetOpcode::G_BITCAST || isAssertMI(MI: *DefMI)) { |
| 316 | DefMI = MRI.getVRegDef(Reg: DefMI->getOperand(i: 1).getReg()); |
| 317 | continue; |
| 318 | } |
| 319 | break; |
| 320 | } |
| 321 | |
| 322 | auto *Load = dyn_cast<GLoad>(Val: DefMI); |
| 323 | if (!Load) |
| 324 | return true; |
| 325 | Register LoadReg = Load->getPointerReg(); |
| 326 | auto *LoadAddrDef = MRI.getVRegDef(Reg: LoadReg); |
| 327 | if (LoadAddrDef->getOpcode() != TargetOpcode::G_FRAME_INDEX) |
| 328 | return true; |
| 329 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 330 | int LoadFI = LoadAddrDef->getOperand(i: 1).getIndex(); |
| 331 | |
| 332 | auto *StoreAddrDef = MRI.getVRegDef(Reg: StoreAddr); |
| 333 | if (StoreAddrDef->getOpcode() != TargetOpcode::G_FRAME_INDEX) |
| 334 | return true; |
| 335 | int StoreFI = StoreAddrDef->getOperand(i: 1).getIndex(); |
| 336 | |
| 337 | if (!MFI.isImmutableObjectIndex(ObjectIdx: LoadFI)) |
| 338 | return true; |
| 339 | if (MFI.getObjectOffset(ObjectIdx: LoadFI) != MFI.getObjectOffset(ObjectIdx: StoreFI)) |
| 340 | return true; |
| 341 | if (Load->getMemSize() != MFI.getObjectSize(ObjectIdx: StoreFI)) |
| 342 | return true; |
| 343 | |
| 344 | return false; |
| 345 | } |
| 346 | |
| 347 | void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, |
| 348 | const MachinePointerInfo &MPO, |
| 349 | const CCValAssign &VA) override { |
| 350 | MachineFunction &MF = MIRBuilder.getMF(); |
| 351 | if (!FPDiff && !shouldLowerTailCallStackArg(MF, VA, ValVReg, StoreAddr: Addr)) |
| 352 | return; |
| 353 | auto MMO = MF.getMachineMemOperand(PtrInfo: MPO, f: MachineMemOperand::MOStore, MemTy, |
| 354 | base_alignment: inferAlignFromPtrInfo(MF, MPO)); |
| 355 | MIRBuilder.buildStore(Val: ValVReg, Addr, MMO&: *MMO); |
| 356 | } |
| 357 | |
| 358 | void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex, |
| 359 | Register Addr, LLT MemTy, |
| 360 | const MachinePointerInfo &MPO, |
| 361 | const CCValAssign &VA) override { |
| 362 | unsigned MaxSize = MemTy.getSizeInBytes() * 8; |
| 363 | // For varargs, we always want to extend them to 8 bytes, in which case |
| 364 | // we disable setting a max. |
| 365 | if (Arg.Flags[0].isVarArg()) |
| 366 | MaxSize = 0; |
| 367 | |
| 368 | Register ValVReg = Arg.Regs[RegIndex]; |
| 369 | if (VA.getLocInfo() != CCValAssign::LocInfo::FPExt) { |
| 370 | MVT LocVT = VA.getLocVT(); |
| 371 | MVT ValVT = VA.getValVT(); |
| 372 | |
| 373 | if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) { |
| 374 | std::swap(a&: ValVT, b&: LocVT); |
| 375 | MemTy = LLT(VA.getValVT()); |
| 376 | } |
| 377 | |
| 378 | ValVReg = extendRegister(ValReg: ValVReg, VA, MaxSizeBits: MaxSize); |
| 379 | } else { |
| 380 | // The store does not cover the full allocated stack slot. |
| 381 | MemTy = LLT(VA.getValVT()); |
| 382 | } |
| 383 | |
| 384 | assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA); |
| 385 | } |
| 386 | |
| 387 | MachineInstrBuilder MIB; |
| 388 | |
| 389 | bool IsTailCall; |
| 390 | |
| 391 | /// For tail calls, the byte offset of the call's argument area from the |
| 392 | /// callee's. Unused elsewhere. |
| 393 | int FPDiff; |
| 394 | |
| 395 | // Cache the SP register vreg if we need it more than once in this call site. |
| 396 | Register SPReg; |
| 397 | |
| 398 | const AArch64Subtarget &Subtarget; |
| 399 | }; |
| 400 | } // namespace |
| 401 | |
| 402 | static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) { |
| 403 | return (CallConv == CallingConv::Fast && TailCallOpt) || |
| 404 | CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; |
| 405 | } |
| 406 | |
| 407 | bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, |
| 408 | const Value *Val, |
| 409 | ArrayRef<Register> VRegs, |
| 410 | FunctionLoweringInfo &FLI, |
| 411 | Register SwiftErrorVReg) const { |
| 412 | auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: AArch64::RET_ReallyLR); |
| 413 | assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) && |
| 414 | "Return value without a vreg" ); |
| 415 | |
| 416 | bool Success = true; |
| 417 | if (!FLI.CanLowerReturn) { |
| 418 | insertSRetStores(MIRBuilder, RetTy: Val->getType(), VRegs, DemoteReg: FLI.DemoteRegister); |
| 419 | } else if (!VRegs.empty()) { |
| 420 | MachineFunction &MF = MIRBuilder.getMF(); |
| 421 | const Function &F = MF.getFunction(); |
| 422 | const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
| 423 | |
| 424 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 425 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
| 426 | CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC: F.getCallingConv()); |
| 427 | auto &DL = F.getDataLayout(); |
| 428 | LLVMContext &Ctx = Val->getType()->getContext(); |
| 429 | |
| 430 | SmallVector<EVT, 4> SplitEVTs; |
| 431 | ComputeValueVTs(TLI, DL, Ty: Val->getType(), ValueVTs&: SplitEVTs); |
| 432 | assert(VRegs.size() == SplitEVTs.size() && |
| 433 | "For each split Type there should be exactly one VReg." ); |
| 434 | |
| 435 | SmallVector<ArgInfo, 8> SplitArgs; |
| 436 | CallingConv::ID CC = F.getCallingConv(); |
| 437 | |
| 438 | for (unsigned i = 0; i < SplitEVTs.size(); ++i) { |
| 439 | Register CurVReg = VRegs[i]; |
| 440 | ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Context&: Ctx), 0}; |
| 441 | setArgFlags(Arg&: CurArgInfo, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: F); |
| 442 | |
| 443 | // i1 is a special case because SDAG i1 true is naturally zero extended |
| 444 | // when widened using ANYEXT. We need to do it explicitly here. |
| 445 | auto &Flags = CurArgInfo.Flags[0]; |
| 446 | if (MRI.getType(Reg: CurVReg).getSizeInBits() == TypeSize::getFixed(ExactSize: 1) && |
| 447 | !Flags.isSExt() && !Flags.isZExt()) { |
| 448 | CurVReg = MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 8), Op: CurVReg).getReg(Idx: 0); |
| 449 | } else if (TLI.getNumRegistersForCallingConv(Context&: Ctx, CC, VT: SplitEVTs[i]) == |
| 450 | 1) { |
| 451 | // Some types will need extending as specified by the CC. |
| 452 | MVT NewVT = TLI.getRegisterTypeForCallingConv(Context&: Ctx, CC, VT: SplitEVTs[i]); |
| 453 | if (EVT(NewVT) != SplitEVTs[i]) { |
| 454 | unsigned ExtendOp = TargetOpcode::G_ANYEXT; |
| 455 | if (F.getAttributes().hasRetAttr(Kind: Attribute::SExt)) |
| 456 | ExtendOp = TargetOpcode::G_SEXT; |
| 457 | else if (F.getAttributes().hasRetAttr(Kind: Attribute::ZExt)) |
| 458 | ExtendOp = TargetOpcode::G_ZEXT; |
| 459 | |
| 460 | LLT NewLLT(NewVT); |
| 461 | LLT OldLLT = getLLTForType(Ty&: *CurArgInfo.Ty, DL); |
| 462 | CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Context&: Ctx); |
| 463 | // Instead of an extend, we might have a vector type which needs |
| 464 | // padding with more elements, e.g. <2 x half> -> <4 x half>. |
| 465 | if (NewVT.isVector()) { |
| 466 | if (OldLLT.isVector()) { |
| 467 | if (NewLLT.getNumElements() > OldLLT.getNumElements()) { |
| 468 | CurVReg = |
| 469 | MIRBuilder.buildPadVectorWithUndefElements(Res: NewLLT, Op0: CurVReg) |
| 470 | .getReg(Idx: 0); |
| 471 | } else { |
| 472 | // Just do a vector extend. |
| 473 | CurVReg = MIRBuilder.buildInstr(Opc: ExtendOp, DstOps: {NewLLT}, SrcOps: {CurVReg}) |
| 474 | .getReg(Idx: 0); |
| 475 | } |
| 476 | } else if (NewLLT.getNumElements() >= 2 && |
| 477 | NewLLT.getNumElements() <= 8) { |
| 478 | // We need to pad a <1 x S> type to <2/4/8 x S>. Since we don't |
| 479 | // have <1 x S> vector types in GISel we use a build_vector |
| 480 | // instead of a vector merge/concat. |
| 481 | CurVReg = |
| 482 | MIRBuilder.buildPadVectorWithUndefElements(Res: NewLLT, Op0: CurVReg) |
| 483 | .getReg(Idx: 0); |
| 484 | } else { |
| 485 | LLVM_DEBUG(dbgs() << "Could not handle ret ty\n" ); |
| 486 | return false; |
| 487 | } |
| 488 | } else { |
| 489 | // If the split EVT was a <1 x T> vector, and NewVT is T, then we |
| 490 | // don't have to do anything since we don't distinguish between the |
| 491 | // two. |
| 492 | if (NewLLT != MRI.getType(Reg: CurVReg)) { |
| 493 | // A scalar extend. |
| 494 | CurVReg = MIRBuilder.buildInstr(Opc: ExtendOp, DstOps: {NewLLT}, SrcOps: {CurVReg}) |
| 495 | .getReg(Idx: 0); |
| 496 | } |
| 497 | } |
| 498 | } |
| 499 | } |
| 500 | if (CurVReg != CurArgInfo.Regs[0]) { |
| 501 | CurArgInfo.Regs[0] = CurVReg; |
| 502 | // Reset the arg flags after modifying CurVReg. |
| 503 | setArgFlags(Arg&: CurArgInfo, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: F); |
| 504 | } |
| 505 | splitToValueTypes(OrigArgInfo: CurArgInfo, SplitArgs, DL, CallConv: CC); |
| 506 | } |
| 507 | |
| 508 | AArch64OutgoingValueAssigner Assigner(AssignFn, AssignFn, Subtarget, |
| 509 | /*IsReturn*/ true); |
| 510 | OutgoingArgHandler Handler(MIRBuilder, MRI, MIB); |
| 511 | Success = determineAndHandleAssignments(Handler, Assigner, Args&: SplitArgs, |
| 512 | MIRBuilder, CallConv: CC, IsVarArg: F.isVarArg()); |
| 513 | } |
| 514 | |
| 515 | if (SwiftErrorVReg) { |
| 516 | MIB.addUse(RegNo: AArch64::X21, Flags: RegState::Implicit); |
| 517 | MIRBuilder.buildCopy(Res: AArch64::X21, Op: SwiftErrorVReg); |
| 518 | } |
| 519 | |
| 520 | MIRBuilder.insertInstr(MIB); |
| 521 | return Success; |
| 522 | } |
| 523 | |
| 524 | bool AArch64CallLowering::canLowerReturn(MachineFunction &MF, |
| 525 | CallingConv::ID CallConv, |
| 526 | SmallVectorImpl<BaseArgInfo> &Outs, |
| 527 | bool IsVarArg) const { |
| 528 | SmallVector<CCValAssign, 16> ArgLocs; |
| 529 | const auto &TLI = *getTLI<AArch64TargetLowering>(); |
| 530 | CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, |
| 531 | MF.getFunction().getContext()); |
| 532 | |
| 533 | return checkReturn(CCInfo, Outs, Fn: TLI.CCAssignFnForReturn(CC: CallConv)); |
| 534 | } |
| 535 | |
| 536 | /// Helper function to compute forwarded registers for musttail calls. Computes |
| 537 | /// the forwarded registers, sets MBB liveness, and emits COPY instructions that |
| 538 | /// can be used to save + restore registers later. |
| 539 | static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder, |
| 540 | CCAssignFn *AssignFn) { |
| 541 | MachineBasicBlock &MBB = MIRBuilder.getMBB(); |
| 542 | MachineFunction &MF = MIRBuilder.getMF(); |
| 543 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 544 | |
| 545 | if (!MFI.hasMustTailInVarArgFunc()) |
| 546 | return; |
| 547 | |
| 548 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
| 549 | const Function &F = MF.getFunction(); |
| 550 | assert(F.isVarArg() && "Expected F to be vararg?" ); |
| 551 | |
| 552 | // Compute the set of forwarded registers. The rest are scratch. |
| 553 | SmallVector<CCValAssign, 16> ArgLocs; |
| 554 | CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs, |
| 555 | F.getContext()); |
| 556 | SmallVector<MVT, 2> RegParmTypes; |
| 557 | RegParmTypes.push_back(Elt: MVT::i64); |
| 558 | RegParmTypes.push_back(Elt: MVT::f128); |
| 559 | |
| 560 | // Later on, we can use this vector to restore the registers if necessary. |
| 561 | SmallVectorImpl<ForwardedRegister> &Forwards = |
| 562 | FuncInfo->getForwardedMustTailRegParms(); |
| 563 | CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, Fn: AssignFn); |
| 564 | |
| 565 | // Conservatively forward X8, since it might be used for an aggregate |
| 566 | // return. |
| 567 | if (!CCInfo.isAllocated(Reg: AArch64::X8)) { |
| 568 | Register X8VReg = MF.addLiveIn(PReg: AArch64::X8, RC: &AArch64::GPR64RegClass); |
| 569 | Forwards.push_back(Elt: ForwardedRegister(X8VReg, AArch64::X8, MVT::i64)); |
| 570 | } |
| 571 | |
| 572 | // Add the forwards to the MachineBasicBlock and MachineFunction. |
| 573 | for (const auto &F : Forwards) { |
| 574 | MBB.addLiveIn(PhysReg: F.PReg); |
| 575 | MIRBuilder.buildCopy(Res: Register(F.VReg), Op: Register(F.PReg)); |
| 576 | } |
| 577 | } |
| 578 | |
| 579 | bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const { |
| 580 | auto &F = MF.getFunction(); |
| 581 | const auto &TM = static_cast<const AArch64TargetMachine &>(MF.getTarget()); |
| 582 | |
| 583 | const bool GlobalISelFlag = |
| 584 | getCGPassBuilderOption().EnableGlobalISelOption.value_or(u: false); |
| 585 | |
| 586 | auto OptLevel = MF.getTarget().getOptLevel(); |
| 587 | auto EnableGlobalISelAtO = TM.getEnableGlobalISelAtO(); |
| 588 | |
| 589 | // GlobalISel is currently only enabled when the opt level is less than or |
| 590 | // equal to EnableGlobalISelAt or it was explicitly enabled via the CLI. If we |
| 591 | // encounter this check, we know GlobalISel was enabled. If not by these two, |
| 592 | // it must have been used as part of the SDAG pipeline to use GlobalISel for |
| 593 | // optnone. |
| 594 | if (static_cast<unsigned>(OptLevel) > EnableGlobalISelAtO && !GlobalISelFlag) |
| 595 | return !F.hasOptNone(); |
| 596 | |
| 597 | if (!EnableSVEGISel && (F.getReturnType()->isScalableTy() || |
| 598 | llvm::any_of(Range: F.args(), P: [](const Argument &A) { |
| 599 | return A.getType()->isScalableTy(); |
| 600 | }))) |
| 601 | return true; |
| 602 | const auto &ST = MF.getSubtarget<AArch64Subtarget>(); |
| 603 | if (!ST.hasNEON() || !ST.hasFPARMv8()) { |
| 604 | LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n" ); |
| 605 | return true; |
| 606 | } |
| 607 | |
| 608 | SMEAttrs Attrs = MF.getInfo<AArch64FunctionInfo>()->getSMEFnAttrs(); |
| 609 | if (Attrs.hasZAState() || Attrs.hasZT0State() || |
| 610 | Attrs.hasStreamingInterfaceOrBody() || |
| 611 | Attrs.hasStreamingCompatibleInterface()) |
| 612 | return true; |
| 613 | |
| 614 | return false; |
| 615 | } |
| 616 | |
| 617 | void AArch64CallLowering::saveVarArgRegisters( |
| 618 | MachineIRBuilder &MIRBuilder, CallLowering::IncomingValueHandler &Handler, |
| 619 | CCState &CCInfo) const { |
| 620 | auto GPRArgRegs = AArch64::getGPRArgRegs(); |
| 621 | auto FPRArgRegs = AArch64::getFPRArgRegs(); |
| 622 | |
| 623 | MachineFunction &MF = MIRBuilder.getMF(); |
| 624 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 625 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 626 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
| 627 | auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
| 628 | bool IsWin64CC = Subtarget.isCallingConvWin64(CC: CCInfo.getCallingConv(), |
| 629 | IsVarArg: MF.getFunction().isVarArg()); |
| 630 | const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64); |
| 631 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
| 632 | |
| 633 | unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(Regs: GPRArgRegs); |
| 634 | unsigned NumVariadicGPRArgRegs = GPRArgRegs.size() - FirstVariadicGPR + 1; |
| 635 | |
| 636 | unsigned GPRSaveSize = 8 * (GPRArgRegs.size() - FirstVariadicGPR); |
| 637 | int GPRIdx = 0; |
| 638 | if (GPRSaveSize != 0) { |
| 639 | if (IsWin64CC) { |
| 640 | GPRIdx = MFI.CreateFixedObject(Size: GPRSaveSize, |
| 641 | SPOffset: -static_cast<int>(GPRSaveSize), IsImmutable: false); |
| 642 | if (GPRSaveSize & 15) |
| 643 | // The extra size here, if triggered, will always be 8. |
| 644 | MFI.CreateFixedObject(Size: 16 - (GPRSaveSize & 15), |
| 645 | SPOffset: -static_cast<int>(alignTo(Value: GPRSaveSize, Align: 16)), |
| 646 | IsImmutable: false); |
| 647 | } else |
| 648 | GPRIdx = MFI.CreateStackObject(Size: GPRSaveSize, Alignment: Align(8), isSpillSlot: false); |
| 649 | |
| 650 | auto FIN = MIRBuilder.buildFrameIndex(Res: p0, Idx: GPRIdx); |
| 651 | auto Offset = |
| 652 | MIRBuilder.buildConstant(Res: MRI.createGenericVirtualRegister(Ty: s64), Val: 8); |
| 653 | |
| 654 | for (unsigned i = FirstVariadicGPR; i < GPRArgRegs.size(); ++i) { |
| 655 | Register Val = MRI.createGenericVirtualRegister(Ty: s64); |
| 656 | Handler.assignValueToReg( |
| 657 | ValVReg: Val, PhysReg: GPRArgRegs[i], |
| 658 | VA: CCValAssign::getReg(ValNo: i + MF.getFunction().getNumOperands(), ValVT: MVT::i64, |
| 659 | Reg: GPRArgRegs[i], LocVT: MVT::i64, HTP: CCValAssign::Full)); |
| 660 | auto MPO = IsWin64CC ? MachinePointerInfo::getFixedStack( |
| 661 | MF, FI: GPRIdx, Offset: (i - FirstVariadicGPR) * 8) |
| 662 | : MachinePointerInfo::getStack(MF, Offset: i * 8); |
| 663 | MIRBuilder.buildStore(Val, Addr: FIN, PtrInfo: MPO, Alignment: inferAlignFromPtrInfo(MF, MPO)); |
| 664 | |
| 665 | FIN = MIRBuilder.buildPtrAdd(Res: MRI.createGenericVirtualRegister(Ty: p0), |
| 666 | Op0: FIN.getReg(Idx: 0), Op1: Offset); |
| 667 | } |
| 668 | } |
| 669 | FuncInfo->setVarArgsGPRIndex(GPRIdx); |
| 670 | FuncInfo->setVarArgsGPRSize(GPRSaveSize); |
| 671 | |
| 672 | if (Subtarget.hasFPARMv8() && !IsWin64CC) { |
| 673 | unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(Regs: FPRArgRegs); |
| 674 | |
| 675 | unsigned FPRSaveSize = 16 * (FPRArgRegs.size() - FirstVariadicFPR); |
| 676 | int FPRIdx = 0; |
| 677 | if (FPRSaveSize != 0) { |
| 678 | FPRIdx = MFI.CreateStackObject(Size: FPRSaveSize, Alignment: Align(16), isSpillSlot: false); |
| 679 | |
| 680 | auto FIN = MIRBuilder.buildFrameIndex(Res: p0, Idx: FPRIdx); |
| 681 | auto Offset = |
| 682 | MIRBuilder.buildConstant(Res: MRI.createGenericVirtualRegister(Ty: s64), Val: 16); |
| 683 | |
| 684 | for (unsigned i = FirstVariadicFPR; i < FPRArgRegs.size(); ++i) { |
| 685 | Register Val = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 128)); |
| 686 | Handler.assignValueToReg( |
| 687 | ValVReg: Val, PhysReg: FPRArgRegs[i], |
| 688 | VA: CCValAssign::getReg( |
| 689 | ValNo: i + MF.getFunction().getNumOperands() + NumVariadicGPRArgRegs, |
| 690 | ValVT: MVT::f128, Reg: FPRArgRegs[i], LocVT: MVT::f128, HTP: CCValAssign::Full)); |
| 691 | |
| 692 | auto MPO = MachinePointerInfo::getStack(MF, Offset: i * 16); |
| 693 | MIRBuilder.buildStore(Val, Addr: FIN, PtrInfo: MPO, Alignment: inferAlignFromPtrInfo(MF, MPO)); |
| 694 | |
| 695 | FIN = MIRBuilder.buildPtrAdd(Res: MRI.createGenericVirtualRegister(Ty: p0), |
| 696 | Op0: FIN.getReg(Idx: 0), Op1: Offset); |
| 697 | } |
| 698 | } |
| 699 | FuncInfo->setVarArgsFPRIndex(FPRIdx); |
| 700 | FuncInfo->setVarArgsFPRSize(FPRSaveSize); |
| 701 | } |
| 702 | } |
| 703 | |
| 704 | bool AArch64CallLowering::lowerFormalArguments( |
| 705 | MachineIRBuilder &MIRBuilder, const Function &F, |
| 706 | ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const { |
| 707 | MachineFunction &MF = MIRBuilder.getMF(); |
| 708 | MachineBasicBlock &MBB = MIRBuilder.getMBB(); |
| 709 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 710 | auto &DL = F.getDataLayout(); |
| 711 | auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
| 712 | |
| 713 | // Arm64EC has extra requirements for varargs calls which are only implemented |
| 714 | // in SelectionDAG; bail out for now. |
| 715 | if (F.isVarArg() && Subtarget.isWindowsArm64EC()) |
| 716 | return false; |
| 717 | |
| 718 | // Arm64EC thunks have a special calling convention which is only implemented |
| 719 | // in SelectionDAG; bail out for now. |
| 720 | if (F.getCallingConv() == CallingConv::ARM64EC_Thunk_Native || |
| 721 | F.getCallingConv() == CallingConv::ARM64EC_Thunk_X64) |
| 722 | return false; |
| 723 | |
| 724 | bool IsWin64 = |
| 725 | Subtarget.isCallingConvWin64(CC: F.getCallingConv(), IsVarArg: F.isVarArg()) && |
| 726 | !Subtarget.isWindowsArm64EC(); |
| 727 | |
| 728 | SmallVector<ArgInfo, 8> SplitArgs; |
| 729 | SmallVector<std::pair<Register, Register>> BoolArgs; |
| 730 | |
| 731 | // Insert the hidden sret parameter if the return value won't fit in the |
| 732 | // return registers. |
| 733 | if (!FLI.CanLowerReturn) |
| 734 | insertSRetIncomingArgument(F, SplitArgs, DemoteReg&: FLI.DemoteRegister, MRI, DL); |
| 735 | |
| 736 | unsigned i = 0; |
| 737 | for (auto &Arg : F.args()) { |
| 738 | if (DL.getTypeStoreSize(Ty: Arg.getType()).isZero()) |
| 739 | continue; |
| 740 | |
| 741 | ArgInfo OrigArg{VRegs[i], Arg, i}; |
| 742 | setArgFlags(Arg&: OrigArg, OpIdx: i + AttributeList::FirstArgIndex, DL, FuncInfo: F); |
| 743 | |
| 744 | // i1 arguments are zero-extended to i8 by the caller. Emit a |
| 745 | // hint to reflect this. |
| 746 | if (OrigArg.Ty->isIntegerTy(Bitwidth: 1)) { |
| 747 | assert(OrigArg.Regs.size() == 1 && |
| 748 | MRI.getType(OrigArg.Regs[0]).getSizeInBits() == 1 && |
| 749 | "Unexpected registers used for i1 arg" ); |
| 750 | |
| 751 | auto &Flags = OrigArg.Flags[0]; |
| 752 | if (!Flags.isZExt() && !Flags.isSExt()) { |
| 753 | // Lower i1 argument as i8, and insert AssertZExt + Trunc later. |
| 754 | Register OrigReg = OrigArg.Regs[0]; |
| 755 | Register WideReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 8)); |
| 756 | OrigArg.Regs[0] = WideReg; |
| 757 | BoolArgs.push_back(Elt: {OrigReg, WideReg}); |
| 758 | } |
| 759 | } |
| 760 | |
| 761 | if (Arg.hasAttribute(Kind: Attribute::SwiftAsync)) |
| 762 | MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); |
| 763 | |
| 764 | splitToValueTypes(OrigArgInfo: OrigArg, SplitArgs, DL, CallConv: F.getCallingConv()); |
| 765 | ++i; |
| 766 | } |
| 767 | |
| 768 | if (!MBB.empty()) |
| 769 | MIRBuilder.setInstr(*MBB.begin()); |
| 770 | |
| 771 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
| 772 | CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC: F.getCallingConv(), IsVarArg: IsWin64 && F.isVarArg()); |
| 773 | |
| 774 | AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn); |
| 775 | FormalArgHandler Handler(MIRBuilder, MRI); |
| 776 | SmallVector<CCValAssign, 16> ArgLocs; |
| 777 | CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); |
| 778 | if (!determineAssignments(Assigner, Args&: SplitArgs, CCInfo) || |
| 779 | !handleAssignments(Handler, Args&: SplitArgs, CCState&: CCInfo, ArgLocs, MIRBuilder)) |
| 780 | return false; |
| 781 | |
| 782 | if (!BoolArgs.empty()) { |
| 783 | for (auto &KV : BoolArgs) { |
| 784 | Register OrigReg = KV.first; |
| 785 | Register WideReg = KV.second; |
| 786 | LLT WideTy = MRI.getType(Reg: WideReg); |
| 787 | assert(MRI.getType(OrigReg).getScalarSizeInBits() == 1 && |
| 788 | "Unexpected bit size of a bool arg" ); |
| 789 | MIRBuilder.buildTrunc( |
| 790 | Res: OrigReg, Op: MIRBuilder.buildAssertZExt(Res: WideTy, Op: WideReg, Size: 1).getReg(Idx: 0)); |
| 791 | } |
| 792 | } |
| 793 | |
| 794 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
| 795 | uint64_t StackSize = Assigner.StackSize; |
| 796 | if (F.isVarArg()) { |
| 797 | if ((!Subtarget.isTargetDarwin() && !Subtarget.isWindowsArm64EC()) || IsWin64) { |
| 798 | // The AAPCS variadic function ABI is identical to the non-variadic |
| 799 | // one. As a result there may be more arguments in registers and we should |
| 800 | // save them for future reference. |
| 801 | // Win64 variadic functions also pass arguments in registers, but all |
| 802 | // float arguments are passed in integer registers. |
| 803 | saveVarArgRegisters(MIRBuilder, Handler, CCInfo); |
| 804 | } else if (Subtarget.isWindowsArm64EC()) { |
| 805 | return false; |
| 806 | } |
| 807 | |
| 808 | // We currently pass all varargs at 8-byte alignment, or 4 in ILP32. |
| 809 | StackSize = alignTo(Value: Assigner.StackSize, Align: Subtarget.isTargetILP32() ? 4 : 8); |
| 810 | |
| 811 | auto &MFI = MIRBuilder.getMF().getFrameInfo(); |
| 812 | FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(Size: 4, SPOffset: StackSize, IsImmutable: true)); |
| 813 | } |
| 814 | |
| 815 | if (doesCalleeRestoreStack(CallConv: F.getCallingConv(), |
| 816 | TailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt)) { |
| 817 | // We have a non-standard ABI, so why not make full use of the stack that |
| 818 | // we're going to pop? It must be aligned to 16 B in any case. |
| 819 | StackSize = alignTo(Value: StackSize, Align: 16); |
| 820 | |
| 821 | // If we're expected to restore the stack (e.g. fastcc), then we'll be |
| 822 | // adding a multiple of 16. |
| 823 | FuncInfo->setArgumentStackToRestore(StackSize); |
| 824 | |
| 825 | // Our own callers will guarantee that the space is free by giving an |
| 826 | // aligned value to CALLSEQ_START. |
| 827 | } |
| 828 | |
| 829 | // When we tail call, we need to check if the callee's arguments |
| 830 | // will fit on the caller's stack. So, whenever we lower formal arguments, |
| 831 | // we should keep track of this information, since we might lower a tail call |
| 832 | // in this function later. |
| 833 | FuncInfo->setBytesInStackArgArea(StackSize); |
| 834 | |
| 835 | if (Subtarget.hasCustomCallingConv()) |
| 836 | Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF); |
| 837 | |
| 838 | handleMustTailForwardedRegisters(MIRBuilder, AssignFn); |
| 839 | |
| 840 | // Move back to the end of the basic block. |
| 841 | MIRBuilder.setMBB(MBB); |
| 842 | |
| 843 | return true; |
| 844 | } |
| 845 | |
| 846 | /// Return true if the calling convention is one that we can guarantee TCO for. |
| 847 | static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) { |
| 848 | return (CC == CallingConv::Fast && GuaranteeTailCalls) || |
| 849 | CC == CallingConv::Tail || CC == CallingConv::SwiftTail; |
| 850 | } |
| 851 | |
| 852 | /// Return true if we might ever do TCO for calls with this calling convention. |
| 853 | static bool mayTailCallThisCC(CallingConv::ID CC) { |
| 854 | switch (CC) { |
| 855 | case CallingConv::C: |
| 856 | case CallingConv::PreserveMost: |
| 857 | case CallingConv::PreserveAll: |
| 858 | case CallingConv::PreserveNone: |
| 859 | case CallingConv::Swift: |
| 860 | case CallingConv::SwiftTail: |
| 861 | case CallingConv::Tail: |
| 862 | case CallingConv::Fast: |
| 863 | return true; |
| 864 | default: |
| 865 | return false; |
| 866 | } |
| 867 | } |
| 868 | |
| 869 | /// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for |
| 870 | /// CC. |
| 871 | static std::pair<CCAssignFn *, CCAssignFn *> |
| 872 | getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) { |
| 873 | return {TLI.CCAssignFnForCall(CC, IsVarArg: false), TLI.CCAssignFnForCall(CC, IsVarArg: true)}; |
| 874 | } |
| 875 | |
| 876 | bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay( |
| 877 | CallLoweringInfo &Info, MachineFunction &MF, |
| 878 | SmallVectorImpl<ArgInfo> &InArgs) const { |
| 879 | const Function &CallerF = MF.getFunction(); |
| 880 | CallingConv::ID CalleeCC = Info.CallConv; |
| 881 | CallingConv::ID CallerCC = CallerF.getCallingConv(); |
| 882 | |
| 883 | // If the calling conventions match, then everything must be the same. |
| 884 | if (CalleeCC == CallerCC) |
| 885 | return true; |
| 886 | |
| 887 | // Check if the caller and callee will handle arguments in the same way. |
| 888 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
| 889 | CCAssignFn *CalleeAssignFnFixed; |
| 890 | CCAssignFn *CalleeAssignFnVarArg; |
| 891 | std::tie(args&: CalleeAssignFnFixed, args&: CalleeAssignFnVarArg) = |
| 892 | getAssignFnsForCC(CC: CalleeCC, TLI); |
| 893 | |
| 894 | CCAssignFn *CallerAssignFnFixed; |
| 895 | CCAssignFn *CallerAssignFnVarArg; |
| 896 | std::tie(args&: CallerAssignFnFixed, args&: CallerAssignFnVarArg) = |
| 897 | getAssignFnsForCC(CC: CallerCC, TLI); |
| 898 | |
| 899 | AArch64IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed, |
| 900 | CalleeAssignFnVarArg); |
| 901 | AArch64IncomingValueAssigner CallerAssigner(CallerAssignFnFixed, |
| 902 | CallerAssignFnVarArg); |
| 903 | |
| 904 | if (!resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner)) |
| 905 | return false; |
| 906 | |
| 907 | // Make sure that the caller and callee preserve all of the same registers. |
| 908 | auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); |
| 909 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); |
| 910 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); |
| 911 | if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) { |
| 912 | TRI->UpdateCustomCallPreservedMask(MF, Mask: &CallerPreserved); |
| 913 | TRI->UpdateCustomCallPreservedMask(MF, Mask: &CalleePreserved); |
| 914 | } |
| 915 | |
| 916 | return TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved); |
| 917 | } |
| 918 | |
| 919 | bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable( |
| 920 | CallLoweringInfo &Info, MachineFunction &MF, |
| 921 | SmallVectorImpl<ArgInfo> &OrigOutArgs) const { |
| 922 | // If there are no outgoing arguments, then we are done. |
| 923 | if (OrigOutArgs.empty()) |
| 924 | return true; |
| 925 | |
| 926 | const Function &CallerF = MF.getFunction(); |
| 927 | LLVMContext &Ctx = CallerF.getContext(); |
| 928 | CallingConv::ID CalleeCC = Info.CallConv; |
| 929 | CallingConv::ID CallerCC = CallerF.getCallingConv(); |
| 930 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
| 931 | const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
| 932 | |
| 933 | CCAssignFn *AssignFnFixed; |
| 934 | CCAssignFn *AssignFnVarArg; |
| 935 | std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) = getAssignFnsForCC(CC: CalleeCC, TLI); |
| 936 | |
| 937 | // We have outgoing arguments. Make sure that we can tail call with them. |
| 938 | SmallVector<CCValAssign, 16> OutLocs; |
| 939 | CCState OutInfo(CalleeCC, false, MF, OutLocs, Ctx); |
| 940 | |
| 941 | AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg, |
| 942 | Subtarget, /*IsReturn*/ false); |
| 943 | // determineAssignments() may modify argument flags, so make a copy. |
| 944 | SmallVector<ArgInfo, 8> OutArgs; |
| 945 | append_range(C&: OutArgs, R&: OrigOutArgs); |
| 946 | if (!determineAssignments(Assigner&: CalleeAssigner, Args&: OutArgs, CCInfo&: OutInfo)) { |
| 947 | LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n" ); |
| 948 | return false; |
| 949 | } |
| 950 | |
| 951 | // Make sure that they can fit on the caller's stack. |
| 952 | const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
| 953 | if (OutInfo.getStackSize() > FuncInfo->getBytesInStackArgArea()) { |
| 954 | LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n" ); |
| 955 | return false; |
| 956 | } |
| 957 | |
| 958 | // Verify that the parameters in callee-saved registers match. |
| 959 | // TODO: Port this over to CallLowering as general code once swiftself is |
| 960 | // supported. |
| 961 | auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); |
| 962 | const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC); |
| 963 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 964 | |
| 965 | if (Info.IsVarArg) { |
| 966 | // Be conservative and disallow variadic memory operands to match SDAG's |
| 967 | // behaviour. |
| 968 | // FIXME: If the caller's calling convention is C, then we can |
| 969 | // potentially use its argument area. However, for cases like fastcc, |
| 970 | // we can't do anything. |
| 971 | for (unsigned i = 0; i < OutLocs.size(); ++i) { |
| 972 | auto &ArgLoc = OutLocs[i]; |
| 973 | if (ArgLoc.isRegLoc()) |
| 974 | continue; |
| 975 | |
| 976 | LLVM_DEBUG( |
| 977 | dbgs() |
| 978 | << "... Cannot tail call vararg function with stack arguments\n" ); |
| 979 | return false; |
| 980 | } |
| 981 | } |
| 982 | |
| 983 | return parametersInCSRMatch(MRI, CallerPreservedMask, ArgLocs: OutLocs, OutVals: OutArgs); |
| 984 | } |
| 985 | |
| 986 | bool AArch64CallLowering::isEligibleForTailCallOptimization( |
| 987 | MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, |
| 988 | SmallVectorImpl<ArgInfo> &InArgs, |
| 989 | SmallVectorImpl<ArgInfo> &OutArgs) const { |
| 990 | |
| 991 | // Must pass all target-independent checks in order to tail call optimize. |
| 992 | if (!Info.IsTailCall) |
| 993 | return false; |
| 994 | |
| 995 | CallingConv::ID CalleeCC = Info.CallConv; |
| 996 | MachineFunction &MF = MIRBuilder.getMF(); |
| 997 | const Function &CallerF = MF.getFunction(); |
| 998 | |
| 999 | LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n" ); |
| 1000 | |
| 1001 | if (Info.SwiftErrorVReg) { |
| 1002 | // TODO: We should handle this. |
| 1003 | // Note that this is also handled by the check for no outgoing arguments. |
| 1004 | // Proactively disabling this though, because the swifterror handling in |
| 1005 | // lowerCall inserts a COPY *after* the location of the call. |
| 1006 | LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n" ); |
| 1007 | return false; |
| 1008 | } |
| 1009 | |
| 1010 | if (!mayTailCallThisCC(CC: CalleeCC)) { |
| 1011 | LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n" ); |
| 1012 | return false; |
| 1013 | } |
| 1014 | |
| 1015 | // Byval parameters hand the function a pointer directly into the stack area |
| 1016 | // we want to reuse during a tail call. Working around this *is* possible (see |
| 1017 | // X86). |
| 1018 | // |
| 1019 | // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try |
| 1020 | // it? |
| 1021 | // |
| 1022 | // On Windows, "inreg" attributes signify non-aggregate indirect returns. |
| 1023 | // In this case, it is necessary to save/restore X0 in the callee. Tail |
| 1024 | // call opt interferes with this. So we disable tail call opt when the |
| 1025 | // caller has an argument with "inreg" attribute. |
| 1026 | // |
| 1027 | // FIXME: Check whether the callee also has an "inreg" argument. |
| 1028 | // |
| 1029 | // When the caller has a swifterror argument, we don't want to tail call |
| 1030 | // because would have to move into the swifterror register before the |
| 1031 | // tail call. |
| 1032 | if (any_of(Range: CallerF.args(), P: [](const Argument &A) { |
| 1033 | return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr(); |
| 1034 | })) { |
| 1035 | LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, " |
| 1036 | "inreg, or swifterror arguments\n" ); |
| 1037 | return false; |
| 1038 | } |
| 1039 | |
| 1040 | // Externally-defined functions with weak linkage should not be |
| 1041 | // tail-called on AArch64 when the OS does not support dynamic |
| 1042 | // pre-emption of symbols, as the AAELF spec requires normal calls |
| 1043 | // to undefined weak functions to be replaced with a NOP or jump to the |
| 1044 | // next instruction. The behaviour of branch instructions in this |
| 1045 | // situation (as used for tail calls) is implementation-defined, so we |
| 1046 | // cannot rely on the linker replacing the tail call with a return. |
| 1047 | if (Info.Callee.isGlobal()) { |
| 1048 | const GlobalValue *GV = Info.Callee.getGlobal(); |
| 1049 | const Triple &TT = MF.getTarget().getTargetTriple(); |
| 1050 | if (GV->hasExternalWeakLinkage() && |
| 1051 | (!TT.isOSWindows() || TT.isOSBinFormatELF() || |
| 1052 | TT.isOSBinFormatMachO())) { |
| 1053 | LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function " |
| 1054 | "with weak linkage for this OS.\n" ); |
| 1055 | return false; |
| 1056 | } |
| 1057 | } |
| 1058 | |
| 1059 | // If we have -tailcallopt, then we're done. |
| 1060 | if (canGuaranteeTCO(CC: CalleeCC, GuaranteeTailCalls: MF.getTarget().Options.GuaranteedTailCallOpt)) |
| 1061 | return CalleeCC == CallerF.getCallingConv(); |
| 1062 | |
| 1063 | // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall). |
| 1064 | // Try to find cases where we can do that. |
| 1065 | |
| 1066 | // I want anyone implementing a new calling convention to think long and hard |
| 1067 | // about this assert. |
| 1068 | assert((!Info.IsVarArg || CalleeCC == CallingConv::C) && |
| 1069 | "Unexpected variadic calling convention" ); |
| 1070 | |
| 1071 | // Verify that the incoming and outgoing arguments from the callee are |
| 1072 | // safe to tail call. |
| 1073 | if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) { |
| 1074 | LLVM_DEBUG( |
| 1075 | dbgs() |
| 1076 | << "... Caller and callee have incompatible calling conventions.\n" ); |
| 1077 | return false; |
| 1078 | } |
| 1079 | |
| 1080 | if (!areCalleeOutgoingArgsTailCallable(Info, MF, OrigOutArgs&: OutArgs)) |
| 1081 | return false; |
| 1082 | |
| 1083 | LLVM_DEBUG( |
| 1084 | dbgs() << "... Call is eligible for tail call optimization.\n" ); |
| 1085 | return true; |
| 1086 | } |
| 1087 | |
| 1088 | static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, |
| 1089 | bool IsTailCall, |
| 1090 | std::optional<CallLowering::PtrAuthInfo> &PAI, |
| 1091 | MachineRegisterInfo &MRI) { |
| 1092 | const AArch64FunctionInfo *FuncInfo = CallerF.getInfo<AArch64FunctionInfo>(); |
| 1093 | |
| 1094 | if (!IsTailCall) { |
| 1095 | if (!PAI) |
| 1096 | return IsIndirect ? getBLRCallOpcode(MF: CallerF) : (unsigned)AArch64::BL; |
| 1097 | |
| 1098 | assert(IsIndirect && "Direct call should not be authenticated" ); |
| 1099 | assert((PAI->Key == AArch64PACKey::IA || PAI->Key == AArch64PACKey::IB) && |
| 1100 | "Invalid auth call key" ); |
| 1101 | return AArch64::BLRA; |
| 1102 | } |
| 1103 | |
| 1104 | if (!IsIndirect) |
| 1105 | return AArch64::TCRETURNdi; |
| 1106 | |
| 1107 | // When BTI or PAuthLR are enabled, there are restrictions on using x16 and |
| 1108 | // x17 to hold the function pointer. |
| 1109 | if (FuncInfo->branchTargetEnforcement()) { |
| 1110 | if (FuncInfo->branchProtectionPAuthLR()) { |
| 1111 | assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR" ); |
| 1112 | return AArch64::TCRETURNrix17; |
| 1113 | } |
| 1114 | if (PAI) |
| 1115 | return AArch64::AUTH_TCRETURN_BTI; |
| 1116 | return AArch64::TCRETURNrix16x17; |
| 1117 | } |
| 1118 | |
| 1119 | if (FuncInfo->branchProtectionPAuthLR()) { |
| 1120 | assert(!PAI && "ptrauth tail-calls not yet supported with PAuthLR" ); |
| 1121 | return AArch64::TCRETURNrinotx16; |
| 1122 | } |
| 1123 | |
| 1124 | if (PAI) |
| 1125 | return AArch64::AUTH_TCRETURN; |
| 1126 | return AArch64::TCRETURNri; |
| 1127 | } |
| 1128 | |
| 1129 | static const uint32_t * |
| 1130 | getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> &OutArgs, |
| 1131 | AArch64CallLowering::CallLoweringInfo &Info, |
| 1132 | const AArch64RegisterInfo &TRI, MachineFunction &MF) { |
| 1133 | const uint32_t *Mask; |
| 1134 | if (!OutArgs.empty() && OutArgs[0].Flags[0].isReturned()) { |
| 1135 | // For 'this' returns, use the X0-preserving mask if applicable |
| 1136 | Mask = TRI.getThisReturnPreservedMask(MF, Info.CallConv); |
| 1137 | if (!Mask) { |
| 1138 | OutArgs[0].Flags[0].setReturned(false); |
| 1139 | Mask = TRI.getCallPreservedMask(MF, Info.CallConv); |
| 1140 | } |
| 1141 | } else { |
| 1142 | Mask = TRI.getCallPreservedMask(MF, Info.CallConv); |
| 1143 | } |
| 1144 | return Mask; |
| 1145 | } |
| 1146 | |
| 1147 | bool AArch64CallLowering::lowerTailCall( |
| 1148 | MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, |
| 1149 | SmallVectorImpl<ArgInfo> &OutArgs) const { |
| 1150 | MachineFunction &MF = MIRBuilder.getMF(); |
| 1151 | const Function &F = MF.getFunction(); |
| 1152 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 1153 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
| 1154 | AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); |
| 1155 | |
| 1156 | // True when we're tail calling, but without -tailcallopt. |
| 1157 | bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt && |
| 1158 | Info.CallConv != CallingConv::Tail && |
| 1159 | Info.CallConv != CallingConv::SwiftTail; |
| 1160 | |
| 1161 | // Find out which ABI gets to decide where things go. |
| 1162 | CallingConv::ID CalleeCC = Info.CallConv; |
| 1163 | CCAssignFn *AssignFnFixed; |
| 1164 | CCAssignFn *AssignFnVarArg; |
| 1165 | std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) = getAssignFnsForCC(CC: CalleeCC, TLI); |
| 1166 | |
| 1167 | MachineInstrBuilder CallSeqStart; |
| 1168 | if (!IsSibCall) |
| 1169 | CallSeqStart = MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKDOWN); |
| 1170 | |
| 1171 | unsigned Opc = getCallOpcode(CallerF: MF, IsIndirect: Info.Callee.isReg(), IsTailCall: true, PAI&: Info.PAI, MRI); |
| 1172 | auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: Opc); |
| 1173 | MIB.add(MO: Info.Callee); |
| 1174 | |
| 1175 | // Tell the call which registers are clobbered. |
| 1176 | const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
| 1177 | auto TRI = Subtarget.getRegisterInfo(); |
| 1178 | |
| 1179 | // Byte offset for the tail call. When we are sibcalling, this will always |
| 1180 | // be 0. |
| 1181 | MIB.addImm(Val: 0); |
| 1182 | |
| 1183 | // Authenticated tail calls always take key/discriminator arguments. |
| 1184 | if (Opc == AArch64::AUTH_TCRETURN || Opc == AArch64::AUTH_TCRETURN_BTI) { |
| 1185 | assert((Info.PAI->Key == AArch64PACKey::IA || |
| 1186 | Info.PAI->Key == AArch64PACKey::IB) && |
| 1187 | "Invalid auth call key" ); |
| 1188 | MIB.addImm(Val: Info.PAI->Key); |
| 1189 | |
| 1190 | Register AddrDisc = 0; |
| 1191 | uint16_t IntDisc = 0; |
| 1192 | std::tie(args&: IntDisc, args&: AddrDisc) = |
| 1193 | extractPtrauthBlendDiscriminators(Disc: Info.PAI->Discriminator, MRI); |
| 1194 | |
| 1195 | MIB.addImm(Val: IntDisc); |
| 1196 | MIB.addUse(RegNo: AddrDisc); |
| 1197 | if (AddrDisc != AArch64::NoRegister) { |
| 1198 | MIB->getOperand(i: 4).setReg(constrainOperandRegClass( |
| 1199 | MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(), |
| 1200 | RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB, II: MIB->getDesc(), |
| 1201 | RegMO&: MIB->getOperand(i: 4), OpIdx: 4)); |
| 1202 | } |
| 1203 | } |
| 1204 | |
| 1205 | // Tell the call which registers are clobbered. |
| 1206 | const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC); |
| 1207 | if (Subtarget.hasCustomCallingConv()) |
| 1208 | TRI->UpdateCustomCallPreservedMask(MF, Mask: &Mask); |
| 1209 | MIB.addRegMask(Mask); |
| 1210 | |
| 1211 | if (Info.CFIType) |
| 1212 | MIB->setCFIType(MF, Type: Info.CFIType->getZExtValue()); |
| 1213 | |
| 1214 | if (TRI->isAnyArgRegReserved(MF)) |
| 1215 | TRI->emitReservedArgRegCallError(MF); |
| 1216 | |
| 1217 | // FPDiff is the byte offset of the call's argument area from the callee's. |
| 1218 | // Stores to callee stack arguments will be placed in FixedStackSlots offset |
| 1219 | // by this amount for a tail call. In a sibling call it must be 0 because the |
| 1220 | // caller will deallocate the entire stack and the callee still expects its |
| 1221 | // arguments to begin at SP+0. |
| 1222 | int FPDiff = 0; |
| 1223 | |
| 1224 | // This will be 0 for sibcalls, potentially nonzero for tail calls produced |
| 1225 | // by -tailcallopt. For sibcalls, the memory operands for the call are |
| 1226 | // already available in the caller's incoming argument space. |
| 1227 | unsigned NumBytes = 0; |
| 1228 | if (!IsSibCall) { |
| 1229 | // We aren't sibcalling, so we need to compute FPDiff. We need to do this |
| 1230 | // before handling assignments, because FPDiff must be known for memory |
| 1231 | // arguments. |
| 1232 | unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); |
| 1233 | SmallVector<CCValAssign, 16> OutLocs; |
| 1234 | CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext()); |
| 1235 | |
| 1236 | AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg, |
| 1237 | Subtarget, /*IsReturn*/ false); |
| 1238 | if (!determineAssignments(Assigner&: CalleeAssigner, Args&: OutArgs, CCInfo&: OutInfo)) |
| 1239 | return false; |
| 1240 | |
| 1241 | // The callee will pop the argument stack as a tail call. Thus, we must |
| 1242 | // keep it 16-byte aligned. |
| 1243 | NumBytes = alignTo(Value: OutInfo.getStackSize(), Align: 16); |
| 1244 | |
| 1245 | // FPDiff will be negative if this tail call requires more space than we |
| 1246 | // would automatically have in our incoming argument space. Positive if we |
| 1247 | // actually shrink the stack. |
| 1248 | FPDiff = NumReusableBytes - NumBytes; |
| 1249 | |
| 1250 | // Update the required reserved area if this is the tail call requiring the |
| 1251 | // most argument stack space. |
| 1252 | if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff) |
| 1253 | FuncInfo->setTailCallReservedStack(-FPDiff); |
| 1254 | |
| 1255 | // The stack pointer must be 16-byte aligned at all times it's used for a |
| 1256 | // memory operation, which in practice means at *all* times and in |
| 1257 | // particular across call boundaries. Therefore our own arguments started at |
| 1258 | // a 16-byte aligned SP and the delta applied for the tail call should |
| 1259 | // satisfy the same constraint. |
| 1260 | assert(FPDiff % 16 == 0 && "unaligned stack on tail call" ); |
| 1261 | } |
| 1262 | |
| 1263 | const auto &Forwards = FuncInfo->getForwardedMustTailRegParms(); |
| 1264 | |
| 1265 | AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg, |
| 1266 | Subtarget, /*IsReturn*/ false); |
| 1267 | |
| 1268 | // Do the actual argument marshalling. |
| 1269 | OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, |
| 1270 | /*IsTailCall*/ true, FPDiff); |
| 1271 | if (!determineAndHandleAssignments(Handler, Assigner, Args&: OutArgs, MIRBuilder, |
| 1272 | CallConv: CalleeCC, IsVarArg: Info.IsVarArg)) |
| 1273 | return false; |
| 1274 | |
| 1275 | Mask = getMaskForArgs(OutArgs, Info, TRI: *TRI, MF); |
| 1276 | |
| 1277 | if (Info.IsVarArg && Info.IsMustTailCall) { |
| 1278 | // Now we know what's being passed to the function. Add uses to the call for |
| 1279 | // the forwarded registers that we *aren't* passing as parameters. This will |
| 1280 | // preserve the copies we build earlier. |
| 1281 | for (const auto &F : Forwards) { |
| 1282 | Register ForwardedReg = F.PReg; |
| 1283 | // If the register is already passed, or aliases a register which is |
| 1284 | // already being passed, then skip it. |
| 1285 | if (any_of(Range: MIB->uses(), P: [&ForwardedReg, &TRI](const MachineOperand &Use) { |
| 1286 | if (!Use.isReg()) |
| 1287 | return false; |
| 1288 | return TRI->regsOverlap(RegA: Use.getReg(), RegB: ForwardedReg); |
| 1289 | })) |
| 1290 | continue; |
| 1291 | |
| 1292 | // We aren't passing it already, so we should add it to the call. |
| 1293 | MIRBuilder.buildCopy(Res: ForwardedReg, Op: Register(F.VReg)); |
| 1294 | MIB.addReg(RegNo: ForwardedReg, Flags: RegState::Implicit); |
| 1295 | } |
| 1296 | } |
| 1297 | |
| 1298 | // If we have -tailcallopt, we need to adjust the stack. We'll do the call |
| 1299 | // sequence start and end here. |
| 1300 | if (!IsSibCall) { |
| 1301 | MIB->getOperand(i: 1).setImm(FPDiff); |
| 1302 | CallSeqStart.addImm(Val: 0).addImm(Val: 0); |
| 1303 | // End the call sequence *before* emitting the call. Normally, we would |
| 1304 | // tidy the frame up after the call. However, here, we've laid out the |
| 1305 | // parameters so that when SP is reset, they will be in the correct |
| 1306 | // location. |
| 1307 | MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKUP).addImm(Val: 0).addImm(Val: 0); |
| 1308 | } |
| 1309 | |
| 1310 | // Now we can add the actual call instruction to the correct basic block. |
| 1311 | MIRBuilder.insertInstr(MIB); |
| 1312 | |
| 1313 | // If Callee is a reg, since it is used by a target specific instruction, |
| 1314 | // it must have a register class matching the constraint of that instruction. |
| 1315 | if (MIB->getOperand(i: 0).isReg()) |
| 1316 | constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(), |
| 1317 | RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB, |
| 1318 | II: MIB->getDesc(), RegMO&: MIB->getOperand(i: 0), OpIdx: 0); |
| 1319 | |
| 1320 | MF.getFrameInfo().setHasTailCall(); |
| 1321 | Info.LoweredTailCall = true; |
| 1322 | return true; |
| 1323 | } |
| 1324 | |
| 1325 | bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, |
| 1326 | CallLoweringInfo &Info) const { |
| 1327 | MachineFunction &MF = MIRBuilder.getMF(); |
| 1328 | const Function &F = MF.getFunction(); |
| 1329 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 1330 | auto &DL = F.getDataLayout(); |
| 1331 | const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>(); |
| 1332 | const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); |
| 1333 | |
| 1334 | // Arm64EC has extra requirements for varargs calls; bail out for now. |
| 1335 | // |
| 1336 | // Arm64EC has special mangling rules for calls; bail out on all calls for |
| 1337 | // now. |
| 1338 | if (Subtarget.isWindowsArm64EC()) |
| 1339 | return false; |
| 1340 | |
| 1341 | // Arm64EC thunks have a special calling convention which is only implemented |
| 1342 | // in SelectionDAG; bail out for now. |
| 1343 | if (Info.CallConv == CallingConv::ARM64EC_Thunk_Native || |
| 1344 | Info.CallConv == CallingConv::ARM64EC_Thunk_X64) |
| 1345 | return false; |
| 1346 | |
| 1347 | SmallVector<ArgInfo, 8> OutArgs; |
| 1348 | for (auto &OrigArg : Info.OrigArgs) { |
| 1349 | splitToValueTypes(OrigArgInfo: OrigArg, SplitArgs&: OutArgs, DL, CallConv: Info.CallConv); |
| 1350 | // AAPCS requires that we zero-extend i1 to 8 bits by the caller. |
| 1351 | auto &Flags = OrigArg.Flags[0]; |
| 1352 | if (OrigArg.Ty->isIntegerTy(Bitwidth: 1) && !Flags.isSExt() && !Flags.isZExt()) { |
| 1353 | ArgInfo &OutArg = OutArgs.back(); |
| 1354 | assert(OutArg.Regs.size() == 1 && |
| 1355 | MRI.getType(OutArg.Regs[0]).getSizeInBits() == 1 && |
| 1356 | "Unexpected registers used for i1 arg" ); |
| 1357 | |
| 1358 | // We cannot use a ZExt ArgInfo flag here, because it will |
| 1359 | // zero-extend the argument to i32 instead of just i8. |
| 1360 | OutArg.Regs[0] = |
| 1361 | MIRBuilder.buildZExt(Res: LLT::scalar(SizeInBits: 8), Op: OutArg.Regs[0]).getReg(Idx: 0); |
| 1362 | LLVMContext &Ctx = MF.getFunction().getContext(); |
| 1363 | OutArg.Ty = Type::getInt8Ty(C&: Ctx); |
| 1364 | } |
| 1365 | } |
| 1366 | |
| 1367 | SmallVector<ArgInfo, 8> InArgs; |
| 1368 | if (!Info.OrigRet.Ty->isVoidTy()) |
| 1369 | splitToValueTypes(OrigArgInfo: Info.OrigRet, SplitArgs&: InArgs, DL, CallConv: Info.CallConv); |
| 1370 | |
| 1371 | // If we can lower as a tail call, do that instead. |
| 1372 | bool CanTailCallOpt = |
| 1373 | isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs); |
| 1374 | |
| 1375 | // We must emit a tail call if we have musttail. |
| 1376 | if (Info.IsMustTailCall && !CanTailCallOpt) { |
| 1377 | // There are types of incoming/outgoing arguments we can't handle yet, so |
| 1378 | // it doesn't make sense to actually die here like in ISelLowering. Instead, |
| 1379 | // fall back to SelectionDAG and let it try to handle this. |
| 1380 | LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n" ); |
| 1381 | return false; |
| 1382 | } |
| 1383 | |
| 1384 | Info.IsTailCall = CanTailCallOpt; |
| 1385 | if (CanTailCallOpt) |
| 1386 | return lowerTailCall(MIRBuilder, Info, OutArgs); |
| 1387 | |
| 1388 | // Find out which ABI gets to decide where things go. |
| 1389 | CCAssignFn *AssignFnFixed; |
| 1390 | CCAssignFn *AssignFnVarArg; |
| 1391 | std::tie(args&: AssignFnFixed, args&: AssignFnVarArg) = |
| 1392 | getAssignFnsForCC(CC: Info.CallConv, TLI); |
| 1393 | |
| 1394 | MachineInstrBuilder CallSeqStart; |
| 1395 | CallSeqStart = MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKDOWN); |
| 1396 | |
| 1397 | // Create a temporarily-floating call instruction so we can add the implicit |
| 1398 | // uses of arg registers. |
| 1399 | |
| 1400 | unsigned Opc = 0; |
| 1401 | // Calls with operand bundle "clang.arc.attachedcall" are special. They should |
| 1402 | // be expanded to the call, directly followed by a special marker sequence and |
| 1403 | // a call to an ObjC library function. |
| 1404 | if (Info.CB && objcarc::hasAttachedCallOpBundle(CB: Info.CB)) |
| 1405 | Opc = Info.PAI ? AArch64::BLRA_RVMARKER : AArch64::BLR_RVMARKER; |
| 1406 | // A call to a returns twice function like setjmp must be followed by a bti |
| 1407 | // instruction. |
| 1408 | else if (Info.CB && Info.CB->hasFnAttr(Kind: Attribute::ReturnsTwice) && |
| 1409 | !Subtarget.noBTIAtReturnTwice() && |
| 1410 | MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) |
| 1411 | Opc = AArch64::BLR_BTI; |
| 1412 | else { |
| 1413 | // For an intrinsic call (e.g. memset), use GOT if "RtLibUseGOT" (-fno-plt) |
| 1414 | // is set. |
| 1415 | if (Info.Callee.isSymbol() && F.getParent()->getRtLibUseGOT()) { |
| 1416 | auto MIB = MIRBuilder.buildInstr(Opcode: TargetOpcode::G_GLOBAL_VALUE); |
| 1417 | DstOp(getLLTForType(Ty&: *F.getType(), DL)).addDefToMIB(MRI, MIB); |
| 1418 | MIB.addExternalSymbol(FnName: Info.Callee.getSymbolName(), TargetFlags: AArch64II::MO_GOT); |
| 1419 | Info.Callee = MachineOperand::CreateReg(Reg: MIB.getReg(Idx: 0), isDef: false); |
| 1420 | } |
| 1421 | Opc = getCallOpcode(CallerF: MF, IsIndirect: Info.Callee.isReg(), IsTailCall: false, PAI&: Info.PAI, MRI); |
| 1422 | } |
| 1423 | |
| 1424 | auto MIB = MIRBuilder.buildInstrNoInsert(Opcode: Opc); |
| 1425 | unsigned CalleeOpNo = 0; |
| 1426 | |
| 1427 | if (Opc == AArch64::BLR_RVMARKER || Opc == AArch64::BLRA_RVMARKER) { |
| 1428 | // Add a target global address for the retainRV/claimRV runtime function |
| 1429 | // just before the call target. |
| 1430 | Function *ARCFn = *objcarc::getAttachedARCFunction(CB: Info.CB); |
| 1431 | MIB.addGlobalAddress(GV: ARCFn); |
| 1432 | ++CalleeOpNo; |
| 1433 | |
| 1434 | // We may or may not need to emit both the marker and the retain/claim call. |
| 1435 | // Tell the pseudo expansion using an additional boolean op. |
| 1436 | MIB.addImm(Val: objcarc::attachedCallOpBundleNeedsMarker(CB: Info.CB)); |
| 1437 | ++CalleeOpNo; |
| 1438 | } else if (Info.CFIType) { |
| 1439 | MIB->setCFIType(MF, Type: Info.CFIType->getZExtValue()); |
| 1440 | } |
| 1441 | MIB->setDeactivationSymbol(MF, DS: Info.DeactivationSymbol); |
| 1442 | |
| 1443 | MIB.add(MO: Info.Callee); |
| 1444 | |
| 1445 | // Tell the call which registers are clobbered. |
| 1446 | const uint32_t *Mask; |
| 1447 | const auto *TRI = Subtarget.getRegisterInfo(); |
| 1448 | |
| 1449 | AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg, |
| 1450 | Subtarget, /*IsReturn*/ false); |
| 1451 | // Do the actual argument marshalling. |
| 1452 | OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, /*IsReturn*/ false); |
| 1453 | if (!determineAndHandleAssignments(Handler, Assigner, Args&: OutArgs, MIRBuilder, |
| 1454 | CallConv: Info.CallConv, IsVarArg: Info.IsVarArg)) |
| 1455 | return false; |
| 1456 | |
| 1457 | Mask = getMaskForArgs(OutArgs, Info, TRI: *TRI, MF); |
| 1458 | |
| 1459 | if (Opc == AArch64::BLRA || Opc == AArch64::BLRA_RVMARKER) { |
| 1460 | assert((Info.PAI->Key == AArch64PACKey::IA || |
| 1461 | Info.PAI->Key == AArch64PACKey::IB) && |
| 1462 | "Invalid auth call key" ); |
| 1463 | MIB.addImm(Val: Info.PAI->Key); |
| 1464 | |
| 1465 | Register AddrDisc = 0; |
| 1466 | uint16_t IntDisc = 0; |
| 1467 | std::tie(args&: IntDisc, args&: AddrDisc) = |
| 1468 | extractPtrauthBlendDiscriminators(Disc: Info.PAI->Discriminator, MRI); |
| 1469 | |
| 1470 | MIB.addImm(Val: IntDisc); |
| 1471 | MIB.addUse(RegNo: AddrDisc); |
| 1472 | if (AddrDisc != AArch64::NoRegister) { |
| 1473 | constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *MF.getSubtarget().getInstrInfo(), |
| 1474 | RBI: *MF.getSubtarget().getRegBankInfo(), InsertPt&: *MIB, |
| 1475 | II: MIB->getDesc(), RegMO&: MIB->getOperand(i: CalleeOpNo + 3), |
| 1476 | OpIdx: CalleeOpNo + 3); |
| 1477 | } |
| 1478 | } |
| 1479 | |
| 1480 | // Tell the call which registers are clobbered. |
| 1481 | if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) |
| 1482 | TRI->UpdateCustomCallPreservedMask(MF, Mask: &Mask); |
| 1483 | MIB.addRegMask(Mask); |
| 1484 | |
| 1485 | if (TRI->isAnyArgRegReserved(MF)) |
| 1486 | TRI->emitReservedArgRegCallError(MF); |
| 1487 | |
| 1488 | // Now we can add the actual call instruction to the correct basic block. |
| 1489 | MIRBuilder.insertInstr(MIB); |
| 1490 | |
| 1491 | uint64_t CalleePopBytes = |
| 1492 | doesCalleeRestoreStack(CallConv: Info.CallConv, |
| 1493 | TailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt) |
| 1494 | ? alignTo(Value: Assigner.StackSize, Align: 16) |
| 1495 | : 0; |
| 1496 | |
| 1497 | CallSeqStart.addImm(Val: Assigner.StackSize).addImm(Val: 0); |
| 1498 | MIRBuilder.buildInstr(Opcode: AArch64::ADJCALLSTACKUP) |
| 1499 | .addImm(Val: Assigner.StackSize) |
| 1500 | .addImm(Val: CalleePopBytes); |
| 1501 | |
| 1502 | // If Callee is a reg, since it is used by a target specific |
| 1503 | // instruction, it must have a register class matching the |
| 1504 | // constraint of that instruction. |
| 1505 | if (MIB->getOperand(i: CalleeOpNo).isReg()) |
| 1506 | constrainOperandRegClass(MF, TRI: *TRI, MRI, TII: *Subtarget.getInstrInfo(), |
| 1507 | RBI: *Subtarget.getRegBankInfo(), InsertPt&: *MIB, II: MIB->getDesc(), |
| 1508 | RegMO&: MIB->getOperand(i: CalleeOpNo), OpIdx: CalleeOpNo); |
| 1509 | |
| 1510 | // Finally we can copy the returned value back into its virtual-register. In |
| 1511 | // symmetry with the arguments, the physical register must be an |
| 1512 | // implicit-define of the call instruction. |
| 1513 | if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) { |
| 1514 | CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(CC: Info.CallConv); |
| 1515 | CallReturnHandler Handler(MIRBuilder, MRI, MIB); |
| 1516 | bool UsingReturnedArg = |
| 1517 | !OutArgs.empty() && OutArgs[0].Flags[0].isReturned(); |
| 1518 | |
| 1519 | AArch64OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn, Subtarget, |
| 1520 | /*IsReturn*/ false); |
| 1521 | ReturnedArgCallReturnHandler ReturnedArgHandler(MIRBuilder, MRI, MIB); |
| 1522 | if (!determineAndHandleAssignments( |
| 1523 | Handler&: UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, Args&: InArgs, |
| 1524 | MIRBuilder, CallConv: Info.CallConv, IsVarArg: Info.IsVarArg, |
| 1525 | ThisReturnRegs: UsingReturnedArg ? ArrayRef(OutArgs[0].Regs) |
| 1526 | : ArrayRef<Register>())) |
| 1527 | return false; |
| 1528 | } |
| 1529 | |
| 1530 | if (Info.SwiftErrorVReg) { |
| 1531 | MIB.addDef(RegNo: AArch64::X21, Flags: RegState::Implicit); |
| 1532 | MIRBuilder.buildCopy(Res: Info.SwiftErrorVReg, Op: Register(AArch64::X21)); |
| 1533 | } |
| 1534 | |
| 1535 | if (!Info.CanLowerReturn) { |
| 1536 | insertSRetLoads(MIRBuilder, RetTy: Info.OrigRet.Ty, VRegs: Info.OrigRet.Regs, |
| 1537 | DemoteReg: Info.DemoteRegister, FI: Info.DemoteStackIndex); |
| 1538 | } |
| 1539 | return true; |
| 1540 | } |
| 1541 | |
| 1542 | bool AArch64CallLowering::isTypeIsValidForThisReturn(EVT Ty) const { |
| 1543 | return Ty.getSizeInBits() == 64; |
| 1544 | } |
| 1545 | |