| 1 | //===-- lib/CodeGen/GlobalISel/CallLowering.cpp - Call lowering -----------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// This file implements some simple delegations needed for call lowering. |
| 11 | /// |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "llvm/CodeGen/GlobalISel/CallLowering.h" |
| 15 | #include "llvm/CodeGen/Analysis.h" |
| 16 | #include "llvm/CodeGen/CallingConvLower.h" |
| 17 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
| 18 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
| 19 | #include "llvm/CodeGen/MachineFrameInfo.h" |
| 20 | #include "llvm/CodeGen/MachineOperand.h" |
| 21 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 22 | #include "llvm/CodeGen/TargetLowering.h" |
| 23 | #include "llvm/IR/DataLayout.h" |
| 24 | #include "llvm/IR/LLVMContext.h" |
| 25 | #include "llvm/IR/Module.h" |
| 26 | #include "llvm/Target/TargetMachine.h" |
| 27 | |
| 28 | #define DEBUG_TYPE "call-lowering" |
| 29 | |
| 30 | using namespace llvm; |
| 31 | |
| 32 | void CallLowering::anchor() {} |
| 33 | |
| 34 | /// Helper function which updates \p Flags when \p AttrFn returns true. |
| 35 | static void |
| 36 | addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags, |
| 37 | const std::function<bool(Attribute::AttrKind)> &AttrFn) { |
| 38 | // TODO: There are missing flags. Add them here. |
| 39 | if (AttrFn(Attribute::SExt)) |
| 40 | Flags.setSExt(); |
| 41 | if (AttrFn(Attribute::ZExt)) |
| 42 | Flags.setZExt(); |
| 43 | if (AttrFn(Attribute::InReg)) |
| 44 | Flags.setInReg(); |
| 45 | if (AttrFn(Attribute::StructRet)) |
| 46 | Flags.setSRet(); |
| 47 | if (AttrFn(Attribute::Nest)) |
| 48 | Flags.setNest(); |
| 49 | if (AttrFn(Attribute::ByVal)) |
| 50 | Flags.setByVal(); |
| 51 | if (AttrFn(Attribute::ByRef)) |
| 52 | Flags.setByRef(); |
| 53 | if (AttrFn(Attribute::Preallocated)) |
| 54 | Flags.setPreallocated(); |
| 55 | if (AttrFn(Attribute::InAlloca)) |
| 56 | Flags.setInAlloca(); |
| 57 | if (AttrFn(Attribute::Returned)) |
| 58 | Flags.setReturned(); |
| 59 | if (AttrFn(Attribute::SwiftSelf)) |
| 60 | Flags.setSwiftSelf(); |
| 61 | if (AttrFn(Attribute::SwiftAsync)) |
| 62 | Flags.setSwiftAsync(); |
| 63 | if (AttrFn(Attribute::SwiftError)) |
| 64 | Flags.setSwiftError(); |
| 65 | } |
| 66 | |
| 67 | ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call, |
| 68 | unsigned ArgIdx) const { |
| 69 | ISD::ArgFlagsTy Flags; |
| 70 | addFlagsUsingAttrFn(Flags, AttrFn: [&Call, &ArgIdx](Attribute::AttrKind Attr) { |
| 71 | return Call.paramHasAttr(ArgNo: ArgIdx, Kind: Attr); |
| 72 | }); |
| 73 | return Flags; |
| 74 | } |
| 75 | |
| 76 | ISD::ArgFlagsTy |
| 77 | CallLowering::getAttributesForReturn(const CallBase &Call) const { |
| 78 | ISD::ArgFlagsTy Flags; |
| 79 | addFlagsUsingAttrFn(Flags, AttrFn: [&Call](Attribute::AttrKind Attr) { |
| 80 | return Call.hasRetAttr(Kind: Attr); |
| 81 | }); |
| 82 | return Flags; |
| 83 | } |
| 84 | |
| 85 | void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags, |
| 86 | const AttributeList &Attrs, |
| 87 | unsigned OpIdx) const { |
| 88 | addFlagsUsingAttrFn(Flags, AttrFn: [&Attrs, &OpIdx](Attribute::AttrKind Attr) { |
| 89 | return Attrs.hasAttributeAtIndex(Index: OpIdx, Kind: Attr); |
| 90 | }); |
| 91 | } |
| 92 | |
| 93 | bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, |
| 94 | ArrayRef<Register> ResRegs, |
| 95 | ArrayRef<ArrayRef<Register>> ArgRegs, |
| 96 | Register SwiftErrorVReg, |
| 97 | std::optional<PtrAuthInfo> PAI, |
| 98 | Register ConvergenceCtrlToken, |
| 99 | std::function<Register()> GetCalleeReg) const { |
| 100 | CallLoweringInfo Info; |
| 101 | const DataLayout &DL = MIRBuilder.getDataLayout(); |
| 102 | MachineFunction &MF = MIRBuilder.getMF(); |
| 103 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 104 | bool CanBeTailCalled = CB.isTailCall() && |
| 105 | isInTailCallPosition(Call: CB, TM: MF.getTarget()) && |
| 106 | (MF.getFunction() |
| 107 | .getFnAttribute(Kind: "disable-tail-calls" ) |
| 108 | .getValueAsString() != "true" ); |
| 109 | |
| 110 | CallingConv::ID CallConv = CB.getCallingConv(); |
| 111 | Type *RetTy = CB.getType(); |
| 112 | bool IsVarArg = CB.getFunctionType()->isVarArg(); |
| 113 | |
| 114 | SmallVector<BaseArgInfo, 4> SplitArgs; |
| 115 | getReturnInfo(CallConv, RetTy, Attrs: CB.getAttributes(), Outs&: SplitArgs, DL); |
| 116 | Info.CanLowerReturn = canLowerReturn(MF, CallConv, Outs&: SplitArgs, IsVarArg); |
| 117 | |
| 118 | Info.IsConvergent = CB.isConvergent(); |
| 119 | |
| 120 | if (!Info.CanLowerReturn) { |
| 121 | // Callee requires sret demotion. |
| 122 | insertSRetOutgoingArgument(MIRBuilder, CB, Info); |
| 123 | |
| 124 | // The sret demotion isn't compatible with tail-calls, since the sret |
| 125 | // argument points into the caller's stack frame. |
| 126 | CanBeTailCalled = false; |
| 127 | } |
| 128 | |
| 129 | // First step is to marshall all the function's parameters into the correct |
| 130 | // physregs and memory locations. Gather the sequence of argument types that |
| 131 | // we'll pass to the assigner function. |
| 132 | unsigned i = 0; |
| 133 | unsigned NumFixedArgs = CB.getFunctionType()->getNumParams(); |
| 134 | for (const auto &Arg : CB.args()) { |
| 135 | ArgInfo OrigArg{ArgRegs[i], *Arg.get(), i, getAttributesForArgIdx(Call: CB, ArgIdx: i)}; |
| 136 | setArgFlags(Arg&: OrigArg, OpIdx: i + AttributeList::FirstArgIndex, DL, FuncInfo: CB); |
| 137 | if (i >= NumFixedArgs) |
| 138 | OrigArg.Flags[0].setVarArg(); |
| 139 | |
| 140 | // If we have an explicit sret argument that is an Instruction, (i.e., it |
| 141 | // might point to function-local memory), we can't meaningfully tail-call. |
| 142 | if (OrigArg.Flags[0].isSRet() && isa<Instruction>(Val: &Arg)) |
| 143 | CanBeTailCalled = false; |
| 144 | |
| 145 | Info.OrigArgs.push_back(Elt: OrigArg); |
| 146 | ++i; |
| 147 | } |
| 148 | |
| 149 | // Try looking through a bitcast from one function type to another. |
| 150 | // Commonly happens with calls to objc_msgSend(). |
| 151 | const Value *CalleeV = CB.getCalledOperand()->stripPointerCasts(); |
| 152 | |
| 153 | // If IRTranslator chose to drop the ptrauth info, we can turn this into |
| 154 | // a direct call. |
| 155 | if (!PAI && CB.countOperandBundlesOfType(ID: LLVMContext::OB_ptrauth)) { |
| 156 | CalleeV = cast<ConstantPtrAuth>(Val: CalleeV)->getPointer(); |
| 157 | assert(isa<Function>(CalleeV)); |
| 158 | } |
| 159 | |
| 160 | if (const Function *F = dyn_cast<Function>(Val: CalleeV)) { |
| 161 | if (F->hasFnAttribute(Kind: Attribute::NonLazyBind)) { |
| 162 | LLT Ty = getLLTForType(Ty&: *F->getType(), DL); |
| 163 | Register Reg = MIRBuilder.buildGlobalValue(Res: Ty, GV: F).getReg(Idx: 0); |
| 164 | Info.Callee = MachineOperand::CreateReg(Reg, isDef: false); |
| 165 | } else { |
| 166 | Info.Callee = MachineOperand::CreateGA(GV: F, Offset: 0); |
| 167 | } |
| 168 | } else if (isa<GlobalIFunc>(Val: CalleeV) || isa<GlobalAlias>(Val: CalleeV)) { |
| 169 | // IR IFuncs and Aliases can't be forward declared (only defined), so the |
| 170 | // callee must be in the same TU and therefore we can direct-call it without |
| 171 | // worrying about it being out of range. |
| 172 | Info.Callee = MachineOperand::CreateGA(GV: cast<GlobalValue>(Val: CalleeV), Offset: 0); |
| 173 | } else |
| 174 | Info.Callee = MachineOperand::CreateReg(Reg: GetCalleeReg(), isDef: false); |
| 175 | |
| 176 | Register ReturnHintAlignReg; |
| 177 | Align ReturnHintAlign; |
| 178 | |
| 179 | Info.OrigRet = ArgInfo{ResRegs, RetTy, 0, getAttributesForReturn(Call: CB)}; |
| 180 | |
| 181 | if (!Info.OrigRet.Ty->isVoidTy()) { |
| 182 | setArgFlags(Arg&: Info.OrigRet, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: CB); |
| 183 | |
| 184 | if (MaybeAlign Alignment = CB.getRetAlign()) { |
| 185 | if (*Alignment > Align(1)) { |
| 186 | ReturnHintAlignReg = MRI.cloneVirtualRegister(VReg: ResRegs[0]); |
| 187 | Info.OrigRet.Regs[0] = ReturnHintAlignReg; |
| 188 | ReturnHintAlign = *Alignment; |
| 189 | } |
| 190 | } |
| 191 | } |
| 192 | |
| 193 | auto Bundle = CB.getOperandBundle(ID: LLVMContext::OB_kcfi); |
| 194 | if (Bundle && CB.isIndirectCall()) { |
| 195 | Info.CFIType = cast<ConstantInt>(Val: Bundle->Inputs[0]); |
| 196 | assert(Info.CFIType->getType()->isIntegerTy(32) && "Invalid CFI type" ); |
| 197 | } |
| 198 | |
| 199 | if (auto Bundle = CB.getOperandBundle(ID: LLVMContext::OB_deactivation_symbol)) { |
| 200 | Info.DeactivationSymbol = cast<GlobalValue>(Val: Bundle->Inputs[0]); |
| 201 | } |
| 202 | |
| 203 | Info.CB = &CB; |
| 204 | Info.KnownCallees = CB.getMetadata(KindID: LLVMContext::MD_callees); |
| 205 | Info.CallConv = CallConv; |
| 206 | Info.SwiftErrorVReg = SwiftErrorVReg; |
| 207 | Info.PAI = PAI; |
| 208 | Info.ConvergenceCtrlToken = ConvergenceCtrlToken; |
| 209 | Info.IsMustTailCall = CB.isMustTailCall(); |
| 210 | Info.IsTailCall = CanBeTailCalled; |
| 211 | Info.IsVarArg = IsVarArg; |
| 212 | if (!lowerCall(MIRBuilder, Info)) |
| 213 | return false; |
| 214 | |
| 215 | if (ReturnHintAlignReg && !Info.LoweredTailCall) { |
| 216 | MIRBuilder.buildAssertAlign(Res: ResRegs[0], Op: ReturnHintAlignReg, |
| 217 | AlignVal: ReturnHintAlign); |
| 218 | } |
| 219 | |
| 220 | return true; |
| 221 | } |
| 222 | |
| 223 | template <typename FuncInfoTy> |
| 224 | void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, |
| 225 | const DataLayout &DL, |
| 226 | const FuncInfoTy &FuncInfo) const { |
| 227 | auto &Flags = Arg.Flags[0]; |
| 228 | const AttributeList &Attrs = FuncInfo.getAttributes(); |
| 229 | addArgFlagsFromAttributes(Flags, Attrs, OpIdx); |
| 230 | |
| 231 | PointerType *PtrTy = dyn_cast<PointerType>(Val: Arg.Ty->getScalarType()); |
| 232 | if (PtrTy) { |
| 233 | Flags.setPointer(); |
| 234 | Flags.setPointerAddrSpace(PtrTy->getPointerAddressSpace()); |
| 235 | } |
| 236 | |
| 237 | Align MemAlign = DL.getABITypeAlign(Ty: Arg.Ty); |
| 238 | if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated() || |
| 239 | Flags.isByRef()) { |
| 240 | assert(OpIdx >= AttributeList::FirstArgIndex); |
| 241 | unsigned ParamIdx = OpIdx - AttributeList::FirstArgIndex; |
| 242 | |
| 243 | Type *ElementTy = FuncInfo.getParamByValType(ParamIdx); |
| 244 | if (!ElementTy) |
| 245 | ElementTy = FuncInfo.getParamByRefType(ParamIdx); |
| 246 | if (!ElementTy) |
| 247 | ElementTy = FuncInfo.getParamInAllocaType(ParamIdx); |
| 248 | if (!ElementTy) |
| 249 | ElementTy = FuncInfo.getParamPreallocatedType(ParamIdx); |
| 250 | |
| 251 | assert(ElementTy && "Must have byval, inalloca or preallocated type" ); |
| 252 | |
| 253 | uint64_t MemSize = DL.getTypeAllocSize(Ty: ElementTy); |
| 254 | if (Flags.isByRef()) |
| 255 | Flags.setByRefSize(MemSize); |
| 256 | else |
| 257 | Flags.setByValSize(MemSize); |
| 258 | |
| 259 | // For ByVal, alignment should be passed from FE. BE will guess if |
| 260 | // this info is not there but there are cases it cannot get right. |
| 261 | if (auto ParamAlign = FuncInfo.getParamStackAlign(ParamIdx)) |
| 262 | MemAlign = *ParamAlign; |
| 263 | else if ((ParamAlign = FuncInfo.getParamAlign(ParamIdx))) |
| 264 | MemAlign = *ParamAlign; |
| 265 | else |
| 266 | MemAlign = getTLI()->getByValTypeAlignment(Ty: ElementTy, DL); |
| 267 | } else if (OpIdx >= AttributeList::FirstArgIndex) { |
| 268 | if (auto ParamAlign = |
| 269 | FuncInfo.getParamStackAlign(OpIdx - AttributeList::FirstArgIndex)) |
| 270 | MemAlign = *ParamAlign; |
| 271 | } |
| 272 | Flags.setMemAlign(MemAlign); |
| 273 | Flags.setOrigAlign(DL.getABITypeAlign(Ty: Arg.Ty)); |
| 274 | |
| 275 | // Don't try to use the returned attribute if the argument is marked as |
| 276 | // swiftself, since it won't be passed in x0. |
| 277 | if (Flags.isSwiftSelf()) |
| 278 | Flags.setReturned(false); |
| 279 | } |
| 280 | |
| 281 | template void |
| 282 | CallLowering::setArgFlags<Function>(CallLowering::ArgInfo &Arg, unsigned OpIdx, |
| 283 | const DataLayout &DL, |
| 284 | const Function &FuncInfo) const; |
| 285 | |
| 286 | template void |
| 287 | CallLowering::setArgFlags<CallBase>(CallLowering::ArgInfo &Arg, unsigned OpIdx, |
| 288 | const DataLayout &DL, |
| 289 | const CallBase &FuncInfo) const; |
| 290 | |
| 291 | void CallLowering::splitToValueTypes(const ArgInfo &OrigArg, |
| 292 | SmallVectorImpl<ArgInfo> &SplitArgs, |
| 293 | const DataLayout &DL, |
| 294 | CallingConv::ID CallConv, |
| 295 | SmallVectorImpl<uint64_t> *Offsets) const { |
| 296 | LLVMContext &Ctx = OrigArg.Ty->getContext(); |
| 297 | |
| 298 | SmallVector<EVT, 4> SplitVTs; |
| 299 | ComputeValueVTs(TLI: *TLI, DL, Ty: OrigArg.Ty, ValueVTs&: SplitVTs, /*MemVTs=*/nullptr, FixedOffsets: Offsets, |
| 300 | StartingOffset: 0); |
| 301 | |
| 302 | if (SplitVTs.size() == 0) |
| 303 | return; |
| 304 | |
| 305 | if (SplitVTs.size() == 1) { |
| 306 | // No splitting to do, but we want to replace the original type (e.g. [1 x |
| 307 | // double] -> double). |
| 308 | SplitArgs.emplace_back(Args: OrigArg.Regs[0], Args: SplitVTs[0].getTypeForEVT(Context&: Ctx), |
| 309 | Args: OrigArg.OrigArgIndex, Args: OrigArg.Flags[0], |
| 310 | Args: OrigArg.OrigValue); |
| 311 | return; |
| 312 | } |
| 313 | |
| 314 | // Create one ArgInfo for each virtual register in the original ArgInfo. |
| 315 | assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch" ); |
| 316 | |
| 317 | bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( |
| 318 | Ty: OrigArg.Ty, CallConv, isVarArg: false, DL); |
| 319 | for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) { |
| 320 | Type *SplitTy = SplitVTs[i].getTypeForEVT(Context&: Ctx); |
| 321 | SplitArgs.emplace_back(Args: OrigArg.Regs[i], Args&: SplitTy, Args: OrigArg.OrigArgIndex, |
| 322 | Args: OrigArg.Flags[0]); |
| 323 | if (NeedsRegBlock) |
| 324 | SplitArgs.back().Flags[0].setInConsecutiveRegs(); |
| 325 | } |
| 326 | |
| 327 | SplitArgs.back().Flags[0].setInConsecutiveRegsLast(); |
| 328 | } |
| 329 | |
| 330 | /// Pack values \p SrcRegs to cover the vector type result \p DstRegs. |
| 331 | static MachineInstrBuilder |
| 332 | mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, |
| 333 | ArrayRef<Register> SrcRegs) { |
| 334 | MachineRegisterInfo &MRI = *B.getMRI(); |
| 335 | LLT LLTy = MRI.getType(Reg: DstRegs[0]); |
| 336 | LLT PartLLT = MRI.getType(Reg: SrcRegs[0]); |
| 337 | |
| 338 | // Deal with v3s16 split into v2s16 |
| 339 | LLT LCMTy = getCoverTy(OrigTy: LLTy, TargetTy: PartLLT); |
| 340 | if (LCMTy == LLTy) { |
| 341 | // Common case where no padding is needed. |
| 342 | assert(DstRegs.size() == 1); |
| 343 | return B.buildConcatVectors(Res: DstRegs[0], Ops: SrcRegs); |
| 344 | } |
| 345 | |
| 346 | // We need to create an unmerge to the result registers, which may require |
| 347 | // widening the original value. |
| 348 | Register UnmergeSrcReg; |
| 349 | if (LCMTy != PartLLT) { |
| 350 | assert(DstRegs.size() == 1); |
| 351 | return B.buildDeleteTrailingVectorElements( |
| 352 | Res: DstRegs[0], Op0: B.buildMergeLikeInstr(Res: LCMTy, Ops: SrcRegs)); |
| 353 | } else { |
| 354 | // We don't need to widen anything if we're extracting a scalar which was |
| 355 | // promoted to a vector e.g. s8 -> v4s8 -> s8 |
| 356 | assert(SrcRegs.size() == 1); |
| 357 | UnmergeSrcReg = SrcRegs[0]; |
| 358 | } |
| 359 | |
| 360 | int NumDst = LCMTy.getSizeInBits() / LLTy.getSizeInBits(); |
| 361 | |
| 362 | SmallVector<Register, 8> PadDstRegs(NumDst); |
| 363 | llvm::copy(Range&: DstRegs, Out: PadDstRegs.begin()); |
| 364 | |
| 365 | // Create the excess dead defs for the unmerge. |
| 366 | for (int I = DstRegs.size(); I != NumDst; ++I) |
| 367 | PadDstRegs[I] = MRI.createGenericVirtualRegister(Ty: LLTy); |
| 368 | |
| 369 | if (PadDstRegs.size() == 1) |
| 370 | return B.buildDeleteTrailingVectorElements(Res: DstRegs[0], Op0: UnmergeSrcReg); |
| 371 | return B.buildUnmerge(Res: PadDstRegs, Op: UnmergeSrcReg); |
| 372 | } |
| 373 | |
| 374 | void CallLowering::buildCopyFromRegs(MachineIRBuilder &B, |
| 375 | ArrayRef<Register> OrigRegs, |
| 376 | ArrayRef<Register> Regs, LLT LLTy, |
| 377 | LLT PartLLT, const ISD::ArgFlagsTy Flags) { |
| 378 | MachineRegisterInfo &MRI = *B.getMRI(); |
| 379 | |
| 380 | if (PartLLT == LLTy) { |
| 381 | // We should have avoided introducing a new virtual register, and just |
| 382 | // directly assigned here. |
| 383 | assert(OrigRegs[0] == Regs[0]); |
| 384 | return; |
| 385 | } |
| 386 | |
| 387 | if (PartLLT.getSizeInBits() == LLTy.getSizeInBits() && OrigRegs.size() == 1 && |
| 388 | Regs.size() == 1) { |
| 389 | B.buildBitcast(Dst: OrigRegs[0], Src: Regs[0]); |
| 390 | return; |
| 391 | } |
| 392 | |
| 393 | // A vector PartLLT needs extending to LLTy's element size. |
| 394 | // E.g. <2 x s64> = G_SEXT <2 x s32>. |
| 395 | if (PartLLT.isVector() == LLTy.isVector() && |
| 396 | PartLLT.getScalarSizeInBits() > LLTy.getScalarSizeInBits() && |
| 397 | (!PartLLT.isVector() || |
| 398 | PartLLT.getElementCount() == LLTy.getElementCount()) && |
| 399 | OrigRegs.size() == 1 && Regs.size() == 1) { |
| 400 | Register SrcReg = Regs[0]; |
| 401 | |
| 402 | LLT LocTy = MRI.getType(Reg: SrcReg); |
| 403 | |
| 404 | if (Flags.isSExt()) { |
| 405 | SrcReg = B.buildAssertSExt(Res: LocTy, Op: SrcReg, Size: LLTy.getScalarSizeInBits()) |
| 406 | .getReg(Idx: 0); |
| 407 | } else if (Flags.isZExt()) { |
| 408 | SrcReg = B.buildAssertZExt(Res: LocTy, Op: SrcReg, Size: LLTy.getScalarSizeInBits()) |
| 409 | .getReg(Idx: 0); |
| 410 | } |
| 411 | |
| 412 | // Sometimes pointers are passed zero extended. |
| 413 | LLT OrigTy = MRI.getType(Reg: OrigRegs[0]); |
| 414 | if (OrigTy.isPointer()) { |
| 415 | LLT IntPtrTy = LLT::scalar(SizeInBits: OrigTy.getSizeInBits()); |
| 416 | B.buildIntToPtr(Dst: OrigRegs[0], Src: B.buildTrunc(Res: IntPtrTy, Op: SrcReg)); |
| 417 | return; |
| 418 | } |
| 419 | |
| 420 | B.buildTrunc(Res: OrigRegs[0], Op: SrcReg); |
| 421 | return; |
| 422 | } |
| 423 | |
| 424 | if (!LLTy.isVector() && !PartLLT.isVector()) { |
| 425 | assert(OrigRegs.size() == 1); |
| 426 | LLT OrigTy = MRI.getType(Reg: OrigRegs[0]); |
| 427 | |
| 428 | unsigned SrcSize = PartLLT.getSizeInBits().getFixedValue() * Regs.size(); |
| 429 | if (SrcSize == OrigTy.getSizeInBits()) |
| 430 | B.buildMergeValues(Res: OrigRegs[0], Ops: Regs); |
| 431 | else { |
| 432 | auto Widened = B.buildMergeLikeInstr(Res: LLT::scalar(SizeInBits: SrcSize), Ops: Regs); |
| 433 | B.buildTrunc(Res: OrigRegs[0], Op: Widened); |
| 434 | } |
| 435 | |
| 436 | return; |
| 437 | } |
| 438 | |
| 439 | if (PartLLT.isVector()) { |
| 440 | assert(OrigRegs.size() == 1); |
| 441 | SmallVector<Register> CastRegs(Regs); |
| 442 | |
| 443 | // If PartLLT is a mismatched vector in both number of elements and element |
| 444 | // size, e.g. PartLLT == v2s64 and LLTy is v3s32, then first coerce it to |
| 445 | // have the same elt type, i.e. v4s32. |
| 446 | // TODO: Extend this coersion to element multiples other than just 2. |
| 447 | if (TypeSize::isKnownGT(LHS: PartLLT.getSizeInBits(), RHS: LLTy.getSizeInBits()) && |
| 448 | PartLLT.getScalarSizeInBits() == LLTy.getScalarSizeInBits() * 2 && |
| 449 | Regs.size() == 1) { |
| 450 | LLT NewTy = PartLLT.changeElementType(NewEltTy: LLTy.getElementType()) |
| 451 | .changeElementCount(EC: PartLLT.getElementCount() * 2); |
| 452 | CastRegs[0] = B.buildBitcast(Dst: NewTy, Src: Regs[0]).getReg(Idx: 0); |
| 453 | PartLLT = NewTy; |
| 454 | } |
| 455 | |
| 456 | if (LLTy.getScalarType() == PartLLT.getElementType()) { |
| 457 | mergeVectorRegsToResultRegs(B, DstRegs: OrigRegs, SrcRegs: CastRegs); |
| 458 | } else { |
| 459 | unsigned I = 0; |
| 460 | LLT GCDTy = getGCDType(OrigTy: LLTy, TargetTy: PartLLT); |
| 461 | |
| 462 | // We are both splitting a vector, and bitcasting its element types. Cast |
| 463 | // the source pieces into the appropriate number of pieces with the result |
| 464 | // element type. |
| 465 | for (Register SrcReg : CastRegs) |
| 466 | CastRegs[I++] = B.buildBitcast(Dst: GCDTy, Src: SrcReg).getReg(Idx: 0); |
| 467 | mergeVectorRegsToResultRegs(B, DstRegs: OrigRegs, SrcRegs: CastRegs); |
| 468 | } |
| 469 | |
| 470 | return; |
| 471 | } |
| 472 | |
| 473 | assert(LLTy.isVector() && !PartLLT.isVector()); |
| 474 | |
| 475 | LLT DstEltTy = LLTy.getElementType(); |
| 476 | |
| 477 | // Pointer information was discarded. We'll need to coerce some register types |
| 478 | // to avoid violating type constraints. |
| 479 | LLT RealDstEltTy = MRI.getType(Reg: OrigRegs[0]).getElementType(); |
| 480 | |
| 481 | assert(DstEltTy.getSizeInBits() == RealDstEltTy.getSizeInBits()); |
| 482 | |
| 483 | if (DstEltTy == PartLLT) { |
| 484 | // Vector was trivially scalarized. |
| 485 | |
| 486 | if (RealDstEltTy.isPointer()) { |
| 487 | for (Register Reg : Regs) |
| 488 | MRI.setType(VReg: Reg, Ty: RealDstEltTy); |
| 489 | } |
| 490 | |
| 491 | B.buildBuildVector(Res: OrigRegs[0], Ops: Regs); |
| 492 | } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) { |
| 493 | // Deal with vector with 64-bit elements decomposed to 32-bit |
| 494 | // registers. Need to create intermediate 64-bit elements. |
| 495 | SmallVector<Register, 8> EltMerges; |
| 496 | int PartsPerElt = |
| 497 | divideCeil(Numerator: DstEltTy.getSizeInBits(), Denominator: PartLLT.getSizeInBits()); |
| 498 | LLT ExtendedPartTy = LLT::scalar(SizeInBits: PartLLT.getSizeInBits() * PartsPerElt); |
| 499 | |
| 500 | for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) { |
| 501 | auto Merge = |
| 502 | B.buildMergeLikeInstr(Res: ExtendedPartTy, Ops: Regs.take_front(N: PartsPerElt)); |
| 503 | if (ExtendedPartTy.getSizeInBits() > RealDstEltTy.getSizeInBits()) |
| 504 | Merge = B.buildTrunc(Res: RealDstEltTy, Op: Merge); |
| 505 | // Fix the type in case this is really a vector of pointers. |
| 506 | MRI.setType(VReg: Merge.getReg(Idx: 0), Ty: RealDstEltTy); |
| 507 | EltMerges.push_back(Elt: Merge.getReg(Idx: 0)); |
| 508 | Regs = Regs.drop_front(N: PartsPerElt); |
| 509 | } |
| 510 | |
| 511 | B.buildBuildVector(Res: OrigRegs[0], Ops: EltMerges); |
| 512 | } else { |
| 513 | // Vector was split, and elements promoted to a wider type. |
| 514 | // FIXME: Should handle floating point promotions. |
| 515 | unsigned NumElts = LLTy.getNumElements(); |
| 516 | LLT BVType = LLT::fixed_vector(NumElements: NumElts, ScalarTy: PartLLT); |
| 517 | |
| 518 | Register BuildVec; |
| 519 | if (NumElts == Regs.size()) |
| 520 | BuildVec = B.buildBuildVector(Res: BVType, Ops: Regs).getReg(Idx: 0); |
| 521 | else { |
| 522 | // Vector elements are packed in the inputs. |
| 523 | // e.g. we have a <4 x s16> but 2 x s32 in regs. |
| 524 | assert(NumElts > Regs.size()); |
| 525 | LLT SrcEltTy = MRI.getType(Reg: Regs[0]); |
| 526 | |
| 527 | LLT OriginalEltTy = MRI.getType(Reg: OrigRegs[0]).getElementType(); |
| 528 | |
| 529 | // Input registers contain packed elements. |
| 530 | // Determine how many elements per reg. |
| 531 | assert((SrcEltTy.getSizeInBits() % OriginalEltTy.getSizeInBits()) == 0); |
| 532 | unsigned EltPerReg = |
| 533 | (SrcEltTy.getSizeInBits() / OriginalEltTy.getSizeInBits()); |
| 534 | |
| 535 | SmallVector<Register, 0> BVRegs; |
| 536 | BVRegs.reserve(N: Regs.size() * EltPerReg); |
| 537 | for (Register R : Regs) { |
| 538 | auto Unmerge = B.buildUnmerge(Res: OriginalEltTy, Op: R); |
| 539 | for (unsigned K = 0; K < EltPerReg; ++K) |
| 540 | BVRegs.push_back(Elt: B.buildAnyExt(Res: PartLLT, Op: Unmerge.getReg(Idx: K)).getReg(Idx: 0)); |
| 541 | } |
| 542 | |
| 543 | // We may have some more elements in BVRegs, e.g. if we have 2 s32 pieces |
| 544 | // for a <3 x s16> vector. We should have less than EltPerReg extra items. |
| 545 | if (BVRegs.size() > NumElts) { |
| 546 | assert((BVRegs.size() - NumElts) < EltPerReg); |
| 547 | BVRegs.truncate(N: NumElts); |
| 548 | } |
| 549 | BuildVec = B.buildBuildVector(Res: BVType, Ops: BVRegs).getReg(Idx: 0); |
| 550 | } |
| 551 | B.buildTrunc(Res: OrigRegs[0], Op: BuildVec); |
| 552 | } |
| 553 | } |
| 554 | |
| 555 | void CallLowering::buildCopyToRegs(MachineIRBuilder &B, |
| 556 | ArrayRef<Register> DstRegs, Register SrcReg, |
| 557 | LLT SrcTy, LLT PartTy, unsigned ExtendOp) { |
| 558 | // We could just insert a regular copy, but this is unreachable at the moment. |
| 559 | assert(SrcTy != PartTy && "identical part types shouldn't reach here" ); |
| 560 | |
| 561 | const TypeSize PartSize = PartTy.getSizeInBits(); |
| 562 | |
| 563 | if (PartSize == SrcTy.getSizeInBits() && DstRegs.size() == 1) { |
| 564 | // TODO: Handle int<->ptr casts. It just happens the ABI lowering |
| 565 | // assignments are not pointer aware. |
| 566 | B.buildBitcast(Dst: DstRegs[0], Src: SrcReg); |
| 567 | return; |
| 568 | } |
| 569 | |
| 570 | if (PartTy.isVector() == SrcTy.isVector() && |
| 571 | PartTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits()) { |
| 572 | assert(DstRegs.size() == 1); |
| 573 | B.buildInstr(Opc: ExtendOp, DstOps: {DstRegs[0]}, SrcOps: {SrcReg}); |
| 574 | return; |
| 575 | } |
| 576 | |
| 577 | if (SrcTy.isVector() && !PartTy.isVector() && |
| 578 | TypeSize::isKnownGT(LHS: PartSize, RHS: SrcTy.getElementType().getSizeInBits()) && |
| 579 | SrcTy.getElementCount() == ElementCount::getFixed(MinVal: DstRegs.size())) { |
| 580 | // Vector was scalarized, and the elements extended. |
| 581 | auto UnmergeToEltTy = B.buildUnmerge(Res: SrcTy.getElementType(), Op: SrcReg); |
| 582 | for (int i = 0, e = DstRegs.size(); i != e; ++i) |
| 583 | B.buildAnyExt(Res: DstRegs[i], Op: UnmergeToEltTy.getReg(Idx: i)); |
| 584 | return; |
| 585 | } |
| 586 | |
| 587 | if (SrcTy.isVector() && PartTy.isVector() && |
| 588 | PartTy.getSizeInBits() == SrcTy.getSizeInBits() && |
| 589 | ElementCount::isKnownLT(LHS: SrcTy.getElementCount(), |
| 590 | RHS: PartTy.getElementCount())) { |
| 591 | // A coercion like: v2f32 -> v4f32 or nxv2f32 -> nxv4f32 |
| 592 | Register DstReg = DstRegs.front(); |
| 593 | B.buildPadVectorWithUndefElements(Res: DstReg, Op0: SrcReg); |
| 594 | return; |
| 595 | } |
| 596 | |
| 597 | LLT GCDTy = getGCDType(OrigTy: SrcTy, TargetTy: PartTy); |
| 598 | if (GCDTy == PartTy) { |
| 599 | // If this already evenly divisible, we can create a simple unmerge. |
| 600 | B.buildUnmerge(Res: DstRegs, Op: SrcReg); |
| 601 | return; |
| 602 | } |
| 603 | |
| 604 | if (SrcTy.isVector() && !PartTy.isVector() && |
| 605 | SrcTy.getScalarSizeInBits() > PartTy.getSizeInBits()) { |
| 606 | LLT ExtTy = |
| 607 | LLT::vector(EC: SrcTy.getElementCount(), |
| 608 | ScalarTy: LLT::scalar(SizeInBits: PartTy.getScalarSizeInBits() * DstRegs.size() / |
| 609 | SrcTy.getNumElements())); |
| 610 | auto Ext = B.buildAnyExt(Res: ExtTy, Op: SrcReg); |
| 611 | B.buildUnmerge(Res: DstRegs, Op: Ext); |
| 612 | return; |
| 613 | } |
| 614 | |
| 615 | MachineRegisterInfo &MRI = *B.getMRI(); |
| 616 | LLT DstTy = MRI.getType(Reg: DstRegs[0]); |
| 617 | LLT CoverTy = getCoverTy(OrigTy: SrcTy, TargetTy: PartTy); |
| 618 | if (SrcTy.isVector() && DstRegs.size() > 1) { |
| 619 | TypeSize FullCoverSize = |
| 620 | DstTy.getSizeInBits().multiplyCoefficientBy(RHS: DstRegs.size()); |
| 621 | |
| 622 | LLT EltTy = SrcTy.getElementType(); |
| 623 | TypeSize EltSize = EltTy.getSizeInBits(); |
| 624 | if (FullCoverSize.isKnownMultipleOf(RHS: EltSize)) { |
| 625 | TypeSize VecSize = FullCoverSize.divideCoefficientBy(RHS: EltSize); |
| 626 | CoverTy = |
| 627 | LLT::vector(EC: ElementCount::get(MinVal: VecSize, Scalable: VecSize.isScalable()), ScalarTy: EltTy); |
| 628 | } |
| 629 | } |
| 630 | |
| 631 | if (PartTy.isVector() && CoverTy == PartTy) { |
| 632 | assert(DstRegs.size() == 1); |
| 633 | B.buildPadVectorWithUndefElements(Res: DstRegs[0], Op0: SrcReg); |
| 634 | return; |
| 635 | } |
| 636 | |
| 637 | const unsigned DstSize = DstTy.getSizeInBits(); |
| 638 | const unsigned SrcSize = SrcTy.getSizeInBits(); |
| 639 | unsigned CoveringSize = CoverTy.getSizeInBits(); |
| 640 | |
| 641 | Register UnmergeSrc = SrcReg; |
| 642 | |
| 643 | if (!CoverTy.isVector() && CoveringSize != SrcSize) { |
| 644 | // For scalars, it's common to be able to use a simple extension. |
| 645 | if (SrcTy.isScalar() && DstTy.isScalar()) { |
| 646 | CoveringSize = alignTo(Value: SrcSize, Align: DstSize); |
| 647 | LLT CoverTy = LLT::scalar(SizeInBits: CoveringSize); |
| 648 | UnmergeSrc = B.buildInstr(Opc: ExtendOp, DstOps: {CoverTy}, SrcOps: {SrcReg}).getReg(Idx: 0); |
| 649 | } else { |
| 650 | // Widen to the common type. |
| 651 | // FIXME: This should respect the extend type |
| 652 | Register Undef = B.buildUndef(Res: SrcTy).getReg(Idx: 0); |
| 653 | SmallVector<Register, 8> MergeParts(1, SrcReg); |
| 654 | for (unsigned Size = SrcSize; Size != CoveringSize; Size += SrcSize) |
| 655 | MergeParts.push_back(Elt: Undef); |
| 656 | UnmergeSrc = B.buildMergeLikeInstr(Res: CoverTy, Ops: MergeParts).getReg(Idx: 0); |
| 657 | } |
| 658 | } |
| 659 | |
| 660 | if (CoverTy.isVector() && CoveringSize != SrcSize) |
| 661 | UnmergeSrc = B.buildPadVectorWithUndefElements(Res: CoverTy, Op0: SrcReg).getReg(Idx: 0); |
| 662 | |
| 663 | B.buildUnmerge(Res: DstRegs, Op: UnmergeSrc); |
| 664 | } |
| 665 | |
| 666 | bool CallLowering::determineAndHandleAssignments( |
| 667 | ValueHandler &Handler, ValueAssigner &Assigner, |
| 668 | SmallVectorImpl<ArgInfo> &Args, MachineIRBuilder &MIRBuilder, |
| 669 | CallingConv::ID CallConv, bool IsVarArg, |
| 670 | ArrayRef<Register> ThisReturnRegs) const { |
| 671 | MachineFunction &MF = MIRBuilder.getMF(); |
| 672 | const Function &F = MF.getFunction(); |
| 673 | SmallVector<CCValAssign, 16> ArgLocs; |
| 674 | |
| 675 | CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, F.getContext()); |
| 676 | if (!determineAssignments(Assigner, Args, CCInfo)) |
| 677 | return false; |
| 678 | |
| 679 | return handleAssignments(Handler, Args, CCState&: CCInfo, ArgLocs, MIRBuilder, |
| 680 | ThisReturnRegs); |
| 681 | } |
| 682 | |
| 683 | static unsigned extendOpFromFlags(llvm::ISD::ArgFlagsTy Flags) { |
| 684 | if (Flags.isSExt()) |
| 685 | return TargetOpcode::G_SEXT; |
| 686 | if (Flags.isZExt()) |
| 687 | return TargetOpcode::G_ZEXT; |
| 688 | return TargetOpcode::G_ANYEXT; |
| 689 | } |
| 690 | |
| 691 | bool CallLowering::determineAssignments(ValueAssigner &Assigner, |
| 692 | SmallVectorImpl<ArgInfo> &Args, |
| 693 | CCState &CCInfo) const { |
| 694 | LLVMContext &Ctx = CCInfo.getContext(); |
| 695 | const CallingConv::ID CallConv = CCInfo.getCallingConv(); |
| 696 | |
| 697 | unsigned NumArgs = Args.size(); |
| 698 | for (unsigned i = 0; i != NumArgs; ++i) { |
| 699 | EVT CurVT = EVT::getEVT(Ty: Args[i].Ty); |
| 700 | |
| 701 | MVT NewVT = TLI->getRegisterTypeForCallingConv(Context&: Ctx, CC: CallConv, VT: CurVT); |
| 702 | |
| 703 | // If we need to split the type over multiple regs, check it's a scenario |
| 704 | // we currently support. |
| 705 | unsigned NumParts = |
| 706 | TLI->getNumRegistersForCallingConv(Context&: Ctx, CC: CallConv, VT: CurVT); |
| 707 | |
| 708 | if (NumParts == 1) { |
| 709 | // Try to use the register type if we couldn't assign the VT. |
| 710 | if (Assigner.assignArg(ValNo: i, OrigVT: CurVT, ValVT: NewVT, LocVT: NewVT, LocInfo: CCValAssign::Full, Info: Args[i], |
| 711 | Flags: Args[i].Flags[0], State&: CCInfo)) |
| 712 | return false; |
| 713 | continue; |
| 714 | } |
| 715 | |
| 716 | // For incoming arguments (physregs to vregs), we could have values in |
| 717 | // physregs (or memlocs) which we want to extract and copy to vregs. |
| 718 | // During this, we might have to deal with the LLT being split across |
| 719 | // multiple regs, so we have to record this information for later. |
| 720 | // |
| 721 | // If we have outgoing args, then we have the opposite case. We have a |
| 722 | // vreg with an LLT which we want to assign to a physical location, and |
| 723 | // we might have to record that the value has to be split later. |
| 724 | |
| 725 | // We're handling an incoming arg which is split over multiple regs. |
| 726 | // E.g. passing an s128 on AArch64. |
| 727 | ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; |
| 728 | Args[i].Flags.clear(); |
| 729 | |
| 730 | for (unsigned Part = 0; Part < NumParts; ++Part) { |
| 731 | ISD::ArgFlagsTy Flags = OrigFlags; |
| 732 | if (Part == 0) { |
| 733 | Flags.setSplit(); |
| 734 | } else { |
| 735 | Flags.setOrigAlign(Align(1)); |
| 736 | if (Part == NumParts - 1) |
| 737 | Flags.setSplitEnd(); |
| 738 | } |
| 739 | |
| 740 | Args[i].Flags.push_back(Elt: Flags); |
| 741 | if (Assigner.assignArg(ValNo: i, OrigVT: CurVT, ValVT: NewVT, LocVT: NewVT, LocInfo: CCValAssign::Full, Info: Args[i], |
| 742 | Flags: Args[i].Flags[Part], State&: CCInfo)) { |
| 743 | // Still couldn't assign this smaller part type for some reason. |
| 744 | return false; |
| 745 | } |
| 746 | } |
| 747 | } |
| 748 | |
| 749 | return true; |
| 750 | } |
| 751 | |
| 752 | bool CallLowering::handleAssignments(ValueHandler &Handler, |
| 753 | SmallVectorImpl<ArgInfo> &Args, |
| 754 | CCState &CCInfo, |
| 755 | SmallVectorImpl<CCValAssign> &ArgLocs, |
| 756 | MachineIRBuilder &MIRBuilder, |
| 757 | ArrayRef<Register> ThisReturnRegs) const { |
| 758 | MachineFunction &MF = MIRBuilder.getMF(); |
| 759 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 760 | const Function &F = MF.getFunction(); |
| 761 | const DataLayout &DL = F.getDataLayout(); |
| 762 | |
| 763 | const unsigned NumArgs = Args.size(); |
| 764 | |
| 765 | // Stores thunks for outgoing register assignments. This is used so we delay |
| 766 | // generating register copies until mem loc assignments are done. We do this |
| 767 | // so that if the target is using the delayed stack protector feature, we can |
| 768 | // find the split point of the block accurately. E.g. if we have: |
| 769 | // G_STORE %val, %memloc |
| 770 | // $x0 = COPY %foo |
| 771 | // $x1 = COPY %bar |
| 772 | // CALL func |
| 773 | // ... then the split point for the block will correctly be at, and including, |
| 774 | // the copy to $x0. If instead the G_STORE instruction immediately precedes |
| 775 | // the CALL, then we'd prematurely choose the CALL as the split point, thus |
| 776 | // generating a split block with a CALL that uses undefined physregs. |
| 777 | SmallVector<std::function<void()>> DelayedOutgoingRegAssignments; |
| 778 | |
| 779 | for (unsigned i = 0, j = 0; i != NumArgs; ++i, ++j) { |
| 780 | assert(j < ArgLocs.size() && "Skipped too many arg locs" ); |
| 781 | CCValAssign &VA = ArgLocs[j]; |
| 782 | assert(VA.getValNo() == i && "Location doesn't correspond to current arg" ); |
| 783 | |
| 784 | if (VA.needsCustom()) { |
| 785 | std::function<void()> Thunk; |
| 786 | unsigned NumArgRegs = Handler.assignCustomValue( |
| 787 | Arg&: Args[i], VAs: ArrayRef(ArgLocs).slice(N: j), Thunk: &Thunk); |
| 788 | if (Thunk) |
| 789 | DelayedOutgoingRegAssignments.emplace_back(Args&: Thunk); |
| 790 | if (!NumArgRegs) |
| 791 | return false; |
| 792 | j += (NumArgRegs - 1); |
| 793 | continue; |
| 794 | } |
| 795 | |
| 796 | auto AllocaAddressSpace = MF.getDataLayout().getAllocaAddrSpace(); |
| 797 | |
| 798 | const MVT ValVT = VA.getValVT(); |
| 799 | const MVT LocVT = VA.getLocVT(); |
| 800 | |
| 801 | const LLT LocTy(LocVT); |
| 802 | const LLT ValTy(ValVT); |
| 803 | const LLT NewLLT = Handler.isIncomingArgumentHandler() ? LocTy : ValTy; |
| 804 | const EVT OrigVT = EVT::getEVT(Ty: Args[i].Ty); |
| 805 | const LLT OrigTy = getLLTForType(Ty&: *Args[i].Ty, DL); |
| 806 | const LLT PointerTy = LLT::pointer( |
| 807 | AddressSpace: AllocaAddressSpace, SizeInBits: DL.getPointerSizeInBits(AS: AllocaAddressSpace)); |
| 808 | |
| 809 | // Expected to be multiple regs for a single incoming arg. |
| 810 | // There should be Regs.size() ArgLocs per argument. |
| 811 | // This should be the same as getNumRegistersForCallingConv |
| 812 | const unsigned NumParts = Args[i].Flags.size(); |
| 813 | |
| 814 | // Now split the registers into the assigned types. |
| 815 | Args[i].OrigRegs.assign(in_start: Args[i].Regs.begin(), in_end: Args[i].Regs.end()); |
| 816 | |
| 817 | if (NumParts != 1 || NewLLT != OrigTy) { |
| 818 | // If we can't directly assign the register, we need one or more |
| 819 | // intermediate values. |
| 820 | Args[i].Regs.resize(N: NumParts); |
| 821 | |
| 822 | // When we have indirect parameter passing we are receiving a pointer, |
| 823 | // that points to the actual value, so we need one "temporary" pointer. |
| 824 | if (VA.getLocInfo() == CCValAssign::Indirect) { |
| 825 | if (Handler.isIncomingArgumentHandler()) |
| 826 | Args[i].Regs[0] = MRI.createGenericVirtualRegister(Ty: PointerTy); |
| 827 | } else { |
| 828 | // For each split register, create and assign a vreg that will store |
| 829 | // the incoming component of the larger value. These will later be |
| 830 | // merged to form the final vreg. |
| 831 | for (unsigned Part = 0; Part < NumParts; ++Part) |
| 832 | Args[i].Regs[Part] = MRI.createGenericVirtualRegister(Ty: NewLLT); |
| 833 | } |
| 834 | } |
| 835 | |
| 836 | assert((j + (NumParts - 1)) < ArgLocs.size() && |
| 837 | "Too many regs for number of args" ); |
| 838 | |
| 839 | // Coerce into outgoing value types before register assignment. |
| 840 | if (!Handler.isIncomingArgumentHandler() && OrigTy != ValTy && |
| 841 | VA.getLocInfo() != CCValAssign::Indirect) { |
| 842 | assert(Args[i].OrigRegs.size() == 1); |
| 843 | buildCopyToRegs(B&: MIRBuilder, DstRegs: Args[i].Regs, SrcReg: Args[i].OrigRegs[0], SrcTy: OrigTy, |
| 844 | PartTy: ValTy, ExtendOp: extendOpFromFlags(Flags: Args[i].Flags[0])); |
| 845 | } |
| 846 | |
| 847 | bool IndirectParameterPassingHandled = false; |
| 848 | bool BigEndianPartOrdering = TLI->hasBigEndianPartOrdering(VT: OrigVT, DL); |
| 849 | for (unsigned Part = 0; Part < NumParts; ++Part) { |
| 850 | assert((VA.getLocInfo() != CCValAssign::Indirect || Part == 0) && |
| 851 | "Only the first parameter should be processed when " |
| 852 | "handling indirect passing!" ); |
| 853 | Register ArgReg = Args[i].Regs[Part]; |
| 854 | // There should be Regs.size() ArgLocs per argument. |
| 855 | unsigned Idx = BigEndianPartOrdering ? NumParts - 1 - Part : Part; |
| 856 | CCValAssign &VA = ArgLocs[j + Idx]; |
| 857 | const ISD::ArgFlagsTy Flags = Args[i].Flags[Part]; |
| 858 | |
| 859 | // We found an indirect parameter passing, and we have an |
| 860 | // OutgoingValueHandler as our handler (so we are at the call site or the |
| 861 | // return value). In this case, start the construction of the following |
| 862 | // GMIR, that is responsible for the preparation of indirect parameter |
| 863 | // passing: |
| 864 | // |
| 865 | // %1(indirectly passed type) = The value to pass |
| 866 | // %3(pointer) = G_FRAME_INDEX %stack.0 |
| 867 | // G_STORE %1, %3 :: (store (s128), align 8) |
| 868 | // |
| 869 | // After this GMIR, the remaining part of the loop body will decide how |
| 870 | // to get the value to the caller and we break out of the loop. |
| 871 | if (VA.getLocInfo() == CCValAssign::Indirect && |
| 872 | !Handler.isIncomingArgumentHandler()) { |
| 873 | Align AlignmentForStored = DL.getPrefTypeAlign(Ty: Args[i].Ty); |
| 874 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 875 | // Get some space on the stack for the value, so later we can pass it |
| 876 | // as a reference. |
| 877 | int FrameIdx = MFI.CreateStackObject(Size: OrigTy.getScalarSizeInBits(), |
| 878 | Alignment: AlignmentForStored, isSpillSlot: false); |
| 879 | Register PointerToStackReg = |
| 880 | MIRBuilder.buildFrameIndex(Res: PointerTy, Idx: FrameIdx).getReg(Idx: 0); |
| 881 | MachinePointerInfo StackPointerMPO = |
| 882 | MachinePointerInfo::getFixedStack(MF, FI: FrameIdx); |
| 883 | // Store the value in the previously created stack space. |
| 884 | MIRBuilder.buildStore(Val: Args[i].OrigRegs[Part], Addr: PointerToStackReg, |
| 885 | PtrInfo: StackPointerMPO, |
| 886 | Alignment: inferAlignFromPtrInfo(MF, MPO: StackPointerMPO)); |
| 887 | |
| 888 | ArgReg = PointerToStackReg; |
| 889 | IndirectParameterPassingHandled = true; |
| 890 | } |
| 891 | |
| 892 | if (VA.isMemLoc() && !Flags.isByVal()) { |
| 893 | // Individual pieces may have been spilled to the stack and others |
| 894 | // passed in registers. |
| 895 | |
| 896 | // TODO: The memory size may be larger than the value we need to |
| 897 | // store. We may need to adjust the offset for big endian targets. |
| 898 | LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags); |
| 899 | |
| 900 | MachinePointerInfo MPO; |
| 901 | Register StackAddr = |
| 902 | Handler.getStackAddress(MemSize: VA.getLocInfo() == CCValAssign::Indirect |
| 903 | ? PointerTy.getSizeInBytes() |
| 904 | : MemTy.getSizeInBytes(), |
| 905 | Offset: VA.getLocMemOffset(), MPO, Flags); |
| 906 | |
| 907 | // Finish the handling of indirect passing from the passers |
| 908 | // (OutgoingParameterHandler) side. |
| 909 | // This branch is needed, so the pointer to the value is loaded onto the |
| 910 | // stack. |
| 911 | if (VA.getLocInfo() == CCValAssign::Indirect) |
| 912 | Handler.assignValueToAddress(ValVReg: ArgReg, Addr: StackAddr, MemTy: PointerTy, MPO, VA); |
| 913 | else |
| 914 | Handler.assignValueToAddress(Arg: Args[i], ValRegIndex: Part, Addr: StackAddr, MemTy, MPO, |
| 915 | VA); |
| 916 | } else if (VA.isMemLoc() && Flags.isByVal()) { |
| 917 | assert(Args[i].Regs.size() == 1 && "didn't expect split byval pointer" ); |
| 918 | |
| 919 | if (Handler.isIncomingArgumentHandler()) { |
| 920 | // We just need to copy the frame index value to the pointer. |
| 921 | MachinePointerInfo MPO; |
| 922 | Register StackAddr = Handler.getStackAddress( |
| 923 | MemSize: Flags.getByValSize(), Offset: VA.getLocMemOffset(), MPO, Flags); |
| 924 | MIRBuilder.buildCopy(Res: Args[i].Regs[0], Op: StackAddr); |
| 925 | } else { |
| 926 | // For outgoing byval arguments, insert the implicit copy byval |
| 927 | // implies, such that writes in the callee do not modify the caller's |
| 928 | // value. |
| 929 | uint64_t MemSize = Flags.getByValSize(); |
| 930 | int64_t Offset = VA.getLocMemOffset(); |
| 931 | |
| 932 | MachinePointerInfo DstMPO; |
| 933 | Register StackAddr = |
| 934 | Handler.getStackAddress(MemSize, Offset, MPO&: DstMPO, Flags); |
| 935 | |
| 936 | MachinePointerInfo SrcMPO(Args[i].OrigValue); |
| 937 | if (!Args[i].OrigValue) { |
| 938 | // We still need to accurately track the stack address space if we |
| 939 | // don't know the underlying value. |
| 940 | const LLT PtrTy = MRI.getType(Reg: StackAddr); |
| 941 | SrcMPO = MachinePointerInfo(PtrTy.getAddressSpace()); |
| 942 | } |
| 943 | |
| 944 | Align DstAlign = std::max(a: Flags.getNonZeroByValAlign(), |
| 945 | b: inferAlignFromPtrInfo(MF, MPO: DstMPO)); |
| 946 | |
| 947 | Align SrcAlign = std::max(a: Flags.getNonZeroByValAlign(), |
| 948 | b: inferAlignFromPtrInfo(MF, MPO: SrcMPO)); |
| 949 | |
| 950 | Handler.copyArgumentMemory(Arg: Args[i], DstPtr: StackAddr, SrcPtr: Args[i].Regs[0], |
| 951 | DstPtrInfo: DstMPO, DstAlign, SrcPtrInfo: SrcMPO, SrcAlign, |
| 952 | MemSize, VA); |
| 953 | } |
| 954 | } else if (i == 0 && !ThisReturnRegs.empty() && |
| 955 | Handler.isIncomingArgumentHandler() && |
| 956 | isTypeIsValidForThisReturn(Ty: ValVT)) { |
| 957 | Handler.assignValueToReg(ValVReg: ArgReg, PhysReg: ThisReturnRegs[Part], VA); |
| 958 | } else if (Handler.isIncomingArgumentHandler()) { |
| 959 | Handler.assignValueToReg(ValVReg: ArgReg, PhysReg: VA.getLocReg(), VA); |
| 960 | } else { |
| 961 | DelayedOutgoingRegAssignments.emplace_back(Args: [=, &Handler]() { |
| 962 | Handler.assignValueToReg(ValVReg: ArgReg, PhysReg: VA.getLocReg(), VA); |
| 963 | }); |
| 964 | } |
| 965 | |
| 966 | // Finish the handling of indirect parameter passing when receiving |
| 967 | // the value (we are in the called function or the caller when receiving |
| 968 | // the return value). |
| 969 | if (VA.getLocInfo() == CCValAssign::Indirect && |
| 970 | Handler.isIncomingArgumentHandler()) { |
| 971 | Align Alignment = DL.getABITypeAlign(Ty: Args[i].Ty); |
| 972 | MachinePointerInfo MPO = MachinePointerInfo::getUnknownStack(MF); |
| 973 | |
| 974 | // Since we are doing indirect parameter passing, we know that the value |
| 975 | // in the temporary register is not the value passed to the function, |
| 976 | // but rather a pointer to that value. Let's load that value into the |
| 977 | // virtual register where the parameter should go. |
| 978 | MIRBuilder.buildLoad(Res: Args[i].OrigRegs[0], Addr: Args[i].Regs[0], PtrInfo: MPO, |
| 979 | Alignment); |
| 980 | |
| 981 | IndirectParameterPassingHandled = true; |
| 982 | } |
| 983 | |
| 984 | if (IndirectParameterPassingHandled) |
| 985 | break; |
| 986 | } |
| 987 | |
| 988 | // Now that all pieces have been assigned, re-pack the register typed values |
| 989 | // into the original value typed registers. This is only necessary, when |
| 990 | // the value was passed in multiple registers, not indirectly. |
| 991 | if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT && |
| 992 | !IndirectParameterPassingHandled) { |
| 993 | // Merge the split registers into the expected larger result vregs of |
| 994 | // the original call. |
| 995 | buildCopyFromRegs(B&: MIRBuilder, OrigRegs: Args[i].OrigRegs, Regs: Args[i].Regs, LLTy: OrigTy, |
| 996 | PartLLT: LocTy, Flags: Args[i].Flags[0]); |
| 997 | } |
| 998 | |
| 999 | j += NumParts - 1; |
| 1000 | } |
| 1001 | for (auto &Fn : DelayedOutgoingRegAssignments) |
| 1002 | Fn(); |
| 1003 | |
| 1004 | return true; |
| 1005 | } |
| 1006 | |
| 1007 | void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, |
| 1008 | ArrayRef<Register> VRegs, Register DemoteReg, |
| 1009 | int FI) const { |
| 1010 | MachineFunction &MF = MIRBuilder.getMF(); |
| 1011 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 1012 | const DataLayout &DL = MF.getDataLayout(); |
| 1013 | |
| 1014 | SmallVector<EVT, 4> SplitVTs; |
| 1015 | SmallVector<uint64_t, 4> Offsets; |
| 1016 | ComputeValueVTs(TLI: *TLI, DL, Ty: RetTy, ValueVTs&: SplitVTs, /*MemVTs=*/nullptr, FixedOffsets: &Offsets, StartingOffset: 0); |
| 1017 | |
| 1018 | assert(VRegs.size() == SplitVTs.size()); |
| 1019 | |
| 1020 | unsigned NumValues = SplitVTs.size(); |
| 1021 | Align BaseAlign = DL.getPrefTypeAlign(Ty: RetTy); |
| 1022 | Type *RetPtrTy = |
| 1023 | PointerType::get(C&: RetTy->getContext(), AddressSpace: DL.getAllocaAddrSpace()); |
| 1024 | LLT OffsetLLTy = getLLTForType(Ty&: *DL.getIndexType(PtrTy: RetPtrTy), DL); |
| 1025 | |
| 1026 | MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); |
| 1027 | |
| 1028 | for (unsigned I = 0; I < NumValues; ++I) { |
| 1029 | Register Addr; |
| 1030 | MIRBuilder.materializeObjectPtrOffset(Res&: Addr, Op0: DemoteReg, ValueTy: OffsetLLTy, |
| 1031 | Value: Offsets[I]); |
| 1032 | auto *MMO = MF.getMachineMemOperand(PtrInfo, f: MachineMemOperand::MOLoad, |
| 1033 | MemTy: MRI.getType(Reg: VRegs[I]), |
| 1034 | base_alignment: commonAlignment(A: BaseAlign, Offset: Offsets[I])); |
| 1035 | MIRBuilder.buildLoad(Res: VRegs[I], Addr, MMO&: *MMO); |
| 1036 | } |
| 1037 | } |
| 1038 | |
| 1039 | void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, |
| 1040 | ArrayRef<Register> VRegs, |
| 1041 | Register DemoteReg) const { |
| 1042 | MachineFunction &MF = MIRBuilder.getMF(); |
| 1043 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 1044 | const DataLayout &DL = MF.getDataLayout(); |
| 1045 | |
| 1046 | SmallVector<EVT, 4> SplitVTs; |
| 1047 | SmallVector<uint64_t, 4> Offsets; |
| 1048 | ComputeValueVTs(TLI: *TLI, DL, Ty: RetTy, ValueVTs&: SplitVTs, /*MemVTs=*/nullptr, FixedOffsets: &Offsets, StartingOffset: 0); |
| 1049 | |
| 1050 | assert(VRegs.size() == SplitVTs.size()); |
| 1051 | |
| 1052 | unsigned NumValues = SplitVTs.size(); |
| 1053 | Align BaseAlign = DL.getPrefTypeAlign(Ty: RetTy); |
| 1054 | unsigned AS = DL.getAllocaAddrSpace(); |
| 1055 | LLT OffsetLLTy = getLLTForType(Ty&: *DL.getIndexType(C&: RetTy->getContext(), AddressSpace: AS), DL); |
| 1056 | |
| 1057 | MachinePointerInfo PtrInfo(AS); |
| 1058 | |
| 1059 | for (unsigned I = 0; I < NumValues; ++I) { |
| 1060 | Register Addr; |
| 1061 | MIRBuilder.materializeObjectPtrOffset(Res&: Addr, Op0: DemoteReg, ValueTy: OffsetLLTy, |
| 1062 | Value: Offsets[I]); |
| 1063 | auto *MMO = MF.getMachineMemOperand(PtrInfo, f: MachineMemOperand::MOStore, |
| 1064 | MemTy: MRI.getType(Reg: VRegs[I]), |
| 1065 | base_alignment: commonAlignment(A: BaseAlign, Offset: Offsets[I])); |
| 1066 | MIRBuilder.buildStore(Val: VRegs[I], Addr, MMO&: *MMO); |
| 1067 | } |
| 1068 | } |
| 1069 | |
| 1070 | void CallLowering::insertSRetIncomingArgument( |
| 1071 | const Function &F, SmallVectorImpl<ArgInfo> &SplitArgs, Register &DemoteReg, |
| 1072 | MachineRegisterInfo &MRI, const DataLayout &DL) const { |
| 1073 | unsigned AS = DL.getAllocaAddrSpace(); |
| 1074 | DemoteReg = MRI.createGenericVirtualRegister( |
| 1075 | Ty: LLT::pointer(AddressSpace: AS, SizeInBits: DL.getPointerSizeInBits(AS))); |
| 1076 | |
| 1077 | Type *PtrTy = PointerType::get(C&: F.getContext(), AddressSpace: AS); |
| 1078 | |
| 1079 | SmallVector<EVT, 1> ValueVTs; |
| 1080 | ComputeValueVTs(TLI: *TLI, DL, Ty: PtrTy, ValueVTs); |
| 1081 | |
| 1082 | // NOTE: Assume that a pointer won't get split into more than one VT. |
| 1083 | assert(ValueVTs.size() == 1); |
| 1084 | |
| 1085 | ArgInfo DemoteArg(DemoteReg, ValueVTs[0].getTypeForEVT(Context&: PtrTy->getContext()), |
| 1086 | ArgInfo::NoArgIndex); |
| 1087 | setArgFlags(Arg&: DemoteArg, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: F); |
| 1088 | DemoteArg.Flags[0].setSRet(); |
| 1089 | SplitArgs.insert(I: SplitArgs.begin(), Elt: DemoteArg); |
| 1090 | } |
| 1091 | |
| 1092 | void CallLowering::insertSRetOutgoingArgument(MachineIRBuilder &MIRBuilder, |
| 1093 | const CallBase &CB, |
| 1094 | CallLoweringInfo &Info) const { |
| 1095 | const DataLayout &DL = MIRBuilder.getDataLayout(); |
| 1096 | Type *RetTy = CB.getType(); |
| 1097 | unsigned AS = DL.getAllocaAddrSpace(); |
| 1098 | LLT FramePtrTy = LLT::pointer(AddressSpace: AS, SizeInBits: DL.getPointerSizeInBits(AS)); |
| 1099 | |
| 1100 | int FI = MIRBuilder.getMF().getFrameInfo().CreateStackObject( |
| 1101 | Size: DL.getTypeAllocSize(Ty: RetTy), Alignment: DL.getPrefTypeAlign(Ty: RetTy), isSpillSlot: false); |
| 1102 | |
| 1103 | Register DemoteReg = MIRBuilder.buildFrameIndex(Res: FramePtrTy, Idx: FI).getReg(Idx: 0); |
| 1104 | ArgInfo DemoteArg(DemoteReg, PointerType::get(C&: RetTy->getContext(), AddressSpace: AS), |
| 1105 | ArgInfo::NoArgIndex); |
| 1106 | setArgFlags(Arg&: DemoteArg, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: CB); |
| 1107 | DemoteArg.Flags[0].setSRet(); |
| 1108 | |
| 1109 | Info.OrigArgs.insert(I: Info.OrigArgs.begin(), Elt: DemoteArg); |
| 1110 | Info.DemoteStackIndex = FI; |
| 1111 | Info.DemoteRegister = DemoteReg; |
| 1112 | } |
| 1113 | |
| 1114 | bool CallLowering::checkReturn(CCState &CCInfo, |
| 1115 | SmallVectorImpl<BaseArgInfo> &Outs, |
| 1116 | CCAssignFn *Fn) const { |
| 1117 | for (unsigned I = 0, E = Outs.size(); I < E; ++I) { |
| 1118 | MVT VT = MVT::getVT(Ty: Outs[I].Ty); |
| 1119 | if (Fn(I, VT, VT, CCValAssign::Full, Outs[I].Flags[0], Outs[I].Ty, CCInfo)) |
| 1120 | return false; |
| 1121 | } |
| 1122 | return true; |
| 1123 | } |
| 1124 | |
| 1125 | void CallLowering::getReturnInfo(CallingConv::ID CallConv, Type *RetTy, |
| 1126 | AttributeList Attrs, |
| 1127 | SmallVectorImpl<BaseArgInfo> &Outs, |
| 1128 | const DataLayout &DL) const { |
| 1129 | LLVMContext &Context = RetTy->getContext(); |
| 1130 | ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); |
| 1131 | |
| 1132 | SmallVector<EVT, 4> SplitVTs; |
| 1133 | ComputeValueVTs(TLI: *TLI, DL, Ty: RetTy, ValueVTs&: SplitVTs); |
| 1134 | addArgFlagsFromAttributes(Flags, Attrs, OpIdx: AttributeList::ReturnIndex); |
| 1135 | |
| 1136 | for (EVT VT : SplitVTs) { |
| 1137 | unsigned NumParts = |
| 1138 | TLI->getNumRegistersForCallingConv(Context, CC: CallConv, VT); |
| 1139 | MVT RegVT = TLI->getRegisterTypeForCallingConv(Context, CC: CallConv, VT); |
| 1140 | Type *PartTy = EVT(RegVT).getTypeForEVT(Context); |
| 1141 | |
| 1142 | for (unsigned I = 0; I < NumParts; ++I) { |
| 1143 | Outs.emplace_back(Args&: PartTy, Args&: Flags); |
| 1144 | } |
| 1145 | } |
| 1146 | } |
| 1147 | |
| 1148 | bool CallLowering::checkReturnTypeForCallConv(MachineFunction &MF) const { |
| 1149 | const auto &F = MF.getFunction(); |
| 1150 | Type *ReturnType = F.getReturnType(); |
| 1151 | CallingConv::ID CallConv = F.getCallingConv(); |
| 1152 | |
| 1153 | SmallVector<BaseArgInfo, 4> SplitArgs; |
| 1154 | getReturnInfo(CallConv, RetTy: ReturnType, Attrs: F.getAttributes(), Outs&: SplitArgs, |
| 1155 | DL: MF.getDataLayout()); |
| 1156 | return canLowerReturn(MF, CallConv, Outs&: SplitArgs, IsVarArg: F.isVarArg()); |
| 1157 | } |
| 1158 | |
| 1159 | bool CallLowering::parametersInCSRMatch( |
| 1160 | const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, |
| 1161 | const SmallVectorImpl<CCValAssign> &OutLocs, |
| 1162 | const SmallVectorImpl<ArgInfo> &OutArgs) const { |
| 1163 | for (unsigned i = 0; i < OutLocs.size(); ++i) { |
| 1164 | const auto &ArgLoc = OutLocs[i]; |
| 1165 | // If it's not a register, it's fine. |
| 1166 | if (!ArgLoc.isRegLoc()) |
| 1167 | continue; |
| 1168 | |
| 1169 | MCRegister PhysReg = ArgLoc.getLocReg(); |
| 1170 | |
| 1171 | // Only look at callee-saved registers. |
| 1172 | if (MachineOperand::clobbersPhysReg(RegMask: CallerPreservedMask, PhysReg)) |
| 1173 | continue; |
| 1174 | |
| 1175 | LLVM_DEBUG( |
| 1176 | dbgs() |
| 1177 | << "... Call has an argument passed in a callee-saved register.\n" ); |
| 1178 | |
| 1179 | // Check if it was copied from. |
| 1180 | const ArgInfo &OutInfo = OutArgs[i]; |
| 1181 | |
| 1182 | if (OutInfo.Regs.size() > 1) { |
| 1183 | LLVM_DEBUG( |
| 1184 | dbgs() << "... Cannot handle arguments in multiple registers.\n" ); |
| 1185 | return false; |
| 1186 | } |
| 1187 | |
| 1188 | // Check if we copy the register, walking through copies from virtual |
| 1189 | // registers. Note that getDefIgnoringCopies does not ignore copies from |
| 1190 | // physical registers. |
| 1191 | MachineInstr *RegDef = getDefIgnoringCopies(Reg: OutInfo.Regs[0], MRI); |
| 1192 | if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) { |
| 1193 | LLVM_DEBUG( |
| 1194 | dbgs() |
| 1195 | << "... Parameter was not copied into a VReg, cannot tail call.\n" ); |
| 1196 | return false; |
| 1197 | } |
| 1198 | |
| 1199 | // Got a copy. Verify that it's the same as the register we want. |
| 1200 | Register CopyRHS = RegDef->getOperand(i: 1).getReg(); |
| 1201 | if (CopyRHS != PhysReg) { |
| 1202 | LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into " |
| 1203 | "VReg, cannot tail call.\n" ); |
| 1204 | return false; |
| 1205 | } |
| 1206 | } |
| 1207 | |
| 1208 | return true; |
| 1209 | } |
| 1210 | |
| 1211 | bool CallLowering::resultsCompatible(CallLoweringInfo &Info, |
| 1212 | MachineFunction &MF, |
| 1213 | SmallVectorImpl<ArgInfo> &InArgs, |
| 1214 | ValueAssigner &CalleeAssigner, |
| 1215 | ValueAssigner &CallerAssigner) const { |
| 1216 | const Function &F = MF.getFunction(); |
| 1217 | CallingConv::ID CalleeCC = Info.CallConv; |
| 1218 | CallingConv::ID CallerCC = F.getCallingConv(); |
| 1219 | |
| 1220 | if (CallerCC == CalleeCC) |
| 1221 | return true; |
| 1222 | |
| 1223 | SmallVector<CCValAssign, 16> ArgLocs1; |
| 1224 | CCState CCInfo1(CalleeCC, Info.IsVarArg, MF, ArgLocs1, F.getContext()); |
| 1225 | if (!determineAssignments(Assigner&: CalleeAssigner, Args&: InArgs, CCInfo&: CCInfo1)) |
| 1226 | return false; |
| 1227 | |
| 1228 | SmallVector<CCValAssign, 16> ArgLocs2; |
| 1229 | CCState CCInfo2(CallerCC, F.isVarArg(), MF, ArgLocs2, F.getContext()); |
| 1230 | if (!determineAssignments(Assigner&: CallerAssigner, Args&: InArgs, CCInfo&: CCInfo2)) |
| 1231 | return false; |
| 1232 | |
| 1233 | // We need the argument locations to match up exactly. If there's more in |
| 1234 | // one than the other, then we are done. |
| 1235 | if (ArgLocs1.size() != ArgLocs2.size()) |
| 1236 | return false; |
| 1237 | |
| 1238 | // Make sure that each location is passed in exactly the same way. |
| 1239 | for (unsigned i = 0, e = ArgLocs1.size(); i < e; ++i) { |
| 1240 | const CCValAssign &Loc1 = ArgLocs1[i]; |
| 1241 | const CCValAssign &Loc2 = ArgLocs2[i]; |
| 1242 | |
| 1243 | // We need both of them to be the same. So if one is a register and one |
| 1244 | // isn't, we're done. |
| 1245 | if (Loc1.isRegLoc() != Loc2.isRegLoc()) |
| 1246 | return false; |
| 1247 | |
| 1248 | if (Loc1.isRegLoc()) { |
| 1249 | // If they don't have the same register location, we're done. |
| 1250 | if (Loc1.getLocReg() != Loc2.getLocReg()) |
| 1251 | return false; |
| 1252 | |
| 1253 | // They matched, so we can move to the next ArgLoc. |
| 1254 | continue; |
| 1255 | } |
| 1256 | |
| 1257 | // Loc1 wasn't a RegLoc, so they both must be MemLocs. Check if they match. |
| 1258 | if (Loc1.getLocMemOffset() != Loc2.getLocMemOffset()) |
| 1259 | return false; |
| 1260 | } |
| 1261 | |
| 1262 | return true; |
| 1263 | } |
| 1264 | |
| 1265 | LLT CallLowering::ValueHandler::getStackValueStoreType( |
| 1266 | const DataLayout &DL, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const { |
| 1267 | const MVT ValVT = VA.getValVT(); |
| 1268 | if (ValVT != MVT::iPTR) { |
| 1269 | LLT ValTy(ValVT); |
| 1270 | |
| 1271 | // We lost the pointeriness going through CCValAssign, so try to restore it |
| 1272 | // based on the flags. |
| 1273 | if (Flags.isPointer()) { |
| 1274 | LLT PtrTy = LLT::pointer(AddressSpace: Flags.getPointerAddrSpace(), |
| 1275 | SizeInBits: ValTy.getScalarSizeInBits()); |
| 1276 | if (ValVT.isVector() && ValVT.getVectorNumElements() != 1) |
| 1277 | return LLT::vector(EC: ValTy.getElementCount(), ScalarTy: PtrTy); |
| 1278 | return PtrTy; |
| 1279 | } |
| 1280 | |
| 1281 | return ValTy; |
| 1282 | } |
| 1283 | |
| 1284 | unsigned AddrSpace = Flags.getPointerAddrSpace(); |
| 1285 | return LLT::pointer(AddressSpace: AddrSpace, SizeInBits: DL.getPointerSize(AS: AddrSpace)); |
| 1286 | } |
| 1287 | |
| 1288 | void CallLowering::ValueHandler::copyArgumentMemory( |
| 1289 | const ArgInfo &Arg, Register DstPtr, Register SrcPtr, |
| 1290 | const MachinePointerInfo &DstPtrInfo, Align DstAlign, |
| 1291 | const MachinePointerInfo &SrcPtrInfo, Align SrcAlign, uint64_t MemSize, |
| 1292 | CCValAssign &VA) const { |
| 1293 | MachineFunction &MF = MIRBuilder.getMF(); |
| 1294 | MachineMemOperand *SrcMMO = MF.getMachineMemOperand( |
| 1295 | PtrInfo: SrcPtrInfo, |
| 1296 | F: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable, Size: MemSize, |
| 1297 | BaseAlignment: SrcAlign); |
| 1298 | |
| 1299 | MachineMemOperand *DstMMO = MF.getMachineMemOperand( |
| 1300 | PtrInfo: DstPtrInfo, |
| 1301 | F: MachineMemOperand::MOStore | MachineMemOperand::MODereferenceable, |
| 1302 | Size: MemSize, BaseAlignment: DstAlign); |
| 1303 | |
| 1304 | const LLT PtrTy = MRI.getType(Reg: DstPtr); |
| 1305 | const LLT SizeTy = LLT::scalar(SizeInBits: PtrTy.getSizeInBits()); |
| 1306 | |
| 1307 | auto SizeConst = MIRBuilder.buildConstant(Res: SizeTy, Val: MemSize); |
| 1308 | MIRBuilder.buildMemCpy(DstPtr, SrcPtr, Size: SizeConst, DstMMO&: *DstMMO, SrcMMO&: *SrcMMO); |
| 1309 | } |
| 1310 | |
| 1311 | Register CallLowering::ValueHandler::extendRegister(Register ValReg, |
| 1312 | const CCValAssign &VA, |
| 1313 | unsigned MaxSizeBits) { |
| 1314 | LLT LocTy{VA.getLocVT()}; |
| 1315 | LLT ValTy{VA.getValVT()}; |
| 1316 | |
| 1317 | if (LocTy.getSizeInBits() == ValTy.getSizeInBits()) |
| 1318 | return ValReg; |
| 1319 | |
| 1320 | if (LocTy.isScalar() && MaxSizeBits && MaxSizeBits < LocTy.getSizeInBits()) { |
| 1321 | if (MaxSizeBits <= ValTy.getSizeInBits()) |
| 1322 | return ValReg; |
| 1323 | LocTy = LLT::scalar(SizeInBits: MaxSizeBits); |
| 1324 | } |
| 1325 | |
| 1326 | const LLT ValRegTy = MRI.getType(Reg: ValReg); |
| 1327 | if (ValRegTy.isPointer()) { |
| 1328 | // The x32 ABI wants to zero extend 32-bit pointers to 64-bit registers, so |
| 1329 | // we have to cast to do the extension. |
| 1330 | LLT IntPtrTy = LLT::scalar(SizeInBits: ValRegTy.getSizeInBits()); |
| 1331 | ValReg = MIRBuilder.buildPtrToInt(Dst: IntPtrTy, Src: ValReg).getReg(Idx: 0); |
| 1332 | } |
| 1333 | |
| 1334 | switch (VA.getLocInfo()) { |
| 1335 | default: |
| 1336 | break; |
| 1337 | case CCValAssign::Full: |
| 1338 | case CCValAssign::BCvt: |
| 1339 | // FIXME: bitconverting between vector types may or may not be a |
| 1340 | // nop in big-endian situations. |
| 1341 | return ValReg; |
| 1342 | case CCValAssign::AExt: { |
| 1343 | auto MIB = MIRBuilder.buildAnyExt(Res: LocTy, Op: ValReg); |
| 1344 | return MIB.getReg(Idx: 0); |
| 1345 | } |
| 1346 | case CCValAssign::SExt: { |
| 1347 | Register NewReg = MRI.createGenericVirtualRegister(Ty: LocTy); |
| 1348 | MIRBuilder.buildSExt(Res: NewReg, Op: ValReg); |
| 1349 | return NewReg; |
| 1350 | } |
| 1351 | case CCValAssign::ZExt: { |
| 1352 | Register NewReg = MRI.createGenericVirtualRegister(Ty: LocTy); |
| 1353 | MIRBuilder.buildZExt(Res: NewReg, Op: ValReg); |
| 1354 | return NewReg; |
| 1355 | } |
| 1356 | } |
| 1357 | llvm_unreachable("unable to extend register" ); |
| 1358 | } |
| 1359 | |
| 1360 | void CallLowering::ValueAssigner::anchor() {} |
| 1361 | |
| 1362 | Register CallLowering::IncomingValueHandler::buildExtensionHint( |
| 1363 | const CCValAssign &VA, Register SrcReg, LLT NarrowTy) { |
| 1364 | switch (VA.getLocInfo()) { |
| 1365 | case CCValAssign::LocInfo::ZExt: { |
| 1366 | return MIRBuilder |
| 1367 | .buildAssertZExt(Res: MRI.cloneVirtualRegister(VReg: SrcReg), Op: SrcReg, |
| 1368 | Size: NarrowTy.getScalarSizeInBits()) |
| 1369 | .getReg(Idx: 0); |
| 1370 | } |
| 1371 | case CCValAssign::LocInfo::SExt: { |
| 1372 | return MIRBuilder |
| 1373 | .buildAssertSExt(Res: MRI.cloneVirtualRegister(VReg: SrcReg), Op: SrcReg, |
| 1374 | Size: NarrowTy.getScalarSizeInBits()) |
| 1375 | .getReg(Idx: 0); |
| 1376 | break; |
| 1377 | } |
| 1378 | default: |
| 1379 | return SrcReg; |
| 1380 | } |
| 1381 | } |
| 1382 | |
| 1383 | /// Check if we can use a basic COPY instruction between the two types. |
| 1384 | /// |
| 1385 | /// We're currently building on top of the infrastructure using MVT, which loses |
| 1386 | /// pointer information in the CCValAssign. We accept copies from physical |
| 1387 | /// registers that have been reported as integers if it's to an equivalent sized |
| 1388 | /// pointer LLT. |
| 1389 | static bool isCopyCompatibleType(LLT SrcTy, LLT DstTy) { |
| 1390 | if (SrcTy == DstTy) |
| 1391 | return true; |
| 1392 | |
| 1393 | if (SrcTy.getSizeInBits() != DstTy.getSizeInBits()) |
| 1394 | return false; |
| 1395 | |
| 1396 | SrcTy = SrcTy.getScalarType(); |
| 1397 | DstTy = DstTy.getScalarType(); |
| 1398 | |
| 1399 | return (SrcTy.isPointer() && DstTy.isScalar()) || |
| 1400 | (DstTy.isPointer() && SrcTy.isScalar()); |
| 1401 | } |
| 1402 | |
| 1403 | void CallLowering::IncomingValueHandler::assignValueToReg( |
| 1404 | Register ValVReg, Register PhysReg, const CCValAssign &VA) { |
| 1405 | const MVT LocVT = VA.getLocVT(); |
| 1406 | const LLT LocTy(LocVT); |
| 1407 | const LLT RegTy = MRI.getType(Reg: ValVReg); |
| 1408 | |
| 1409 | if (isCopyCompatibleType(SrcTy: RegTy, DstTy: LocTy)) { |
| 1410 | MIRBuilder.buildCopy(Res: ValVReg, Op: PhysReg); |
| 1411 | return; |
| 1412 | } |
| 1413 | |
| 1414 | auto Copy = MIRBuilder.buildCopy(Res: LocTy, Op: PhysReg); |
| 1415 | auto Hint = buildExtensionHint(VA, SrcReg: Copy.getReg(Idx: 0), NarrowTy: RegTy); |
| 1416 | MIRBuilder.buildTrunc(Res: ValVReg, Op: Hint); |
| 1417 | } |
| 1418 | |