1 | //===-- lib/CodeGen/GlobalISel/CallLowering.cpp - Call lowering -----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file implements some simple delegations needed for call lowering. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/CodeGen/GlobalISel/CallLowering.h" |
15 | #include "llvm/CodeGen/Analysis.h" |
16 | #include "llvm/CodeGen/CallingConvLower.h" |
17 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
18 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
19 | #include "llvm/CodeGen/MachineFrameInfo.h" |
20 | #include "llvm/CodeGen/MachineOperand.h" |
21 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
22 | #include "llvm/CodeGen/TargetLowering.h" |
23 | #include "llvm/IR/DataLayout.h" |
24 | #include "llvm/IR/IntrinsicInst.h" |
25 | #include "llvm/IR/LLVMContext.h" |
26 | #include "llvm/IR/Module.h" |
27 | #include "llvm/Target/TargetMachine.h" |
28 | |
29 | #define DEBUG_TYPE "call-lowering" |
30 | |
31 | using namespace llvm; |
32 | |
33 | void CallLowering::anchor() {} |
34 | |
35 | /// Helper function which updates \p Flags when \p AttrFn returns true. |
36 | static void |
37 | addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags, |
38 | const std::function<bool(Attribute::AttrKind)> &AttrFn) { |
39 | // TODO: There are missing flags. Add them here. |
40 | if (AttrFn(Attribute::SExt)) |
41 | Flags.setSExt(); |
42 | if (AttrFn(Attribute::ZExt)) |
43 | Flags.setZExt(); |
44 | if (AttrFn(Attribute::InReg)) |
45 | Flags.setInReg(); |
46 | if (AttrFn(Attribute::StructRet)) |
47 | Flags.setSRet(); |
48 | if (AttrFn(Attribute::Nest)) |
49 | Flags.setNest(); |
50 | if (AttrFn(Attribute::ByVal)) |
51 | Flags.setByVal(); |
52 | if (AttrFn(Attribute::ByRef)) |
53 | Flags.setByRef(); |
54 | if (AttrFn(Attribute::Preallocated)) |
55 | Flags.setPreallocated(); |
56 | if (AttrFn(Attribute::InAlloca)) |
57 | Flags.setInAlloca(); |
58 | if (AttrFn(Attribute::Returned)) |
59 | Flags.setReturned(); |
60 | if (AttrFn(Attribute::SwiftSelf)) |
61 | Flags.setSwiftSelf(); |
62 | if (AttrFn(Attribute::SwiftAsync)) |
63 | Flags.setSwiftAsync(); |
64 | if (AttrFn(Attribute::SwiftError)) |
65 | Flags.setSwiftError(); |
66 | } |
67 | |
68 | ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call, |
69 | unsigned ArgIdx) const { |
70 | ISD::ArgFlagsTy Flags; |
71 | addFlagsUsingAttrFn(Flags, AttrFn: [&Call, &ArgIdx](Attribute::AttrKind Attr) { |
72 | return Call.paramHasAttr(ArgNo: ArgIdx, Kind: Attr); |
73 | }); |
74 | return Flags; |
75 | } |
76 | |
77 | ISD::ArgFlagsTy |
78 | CallLowering::getAttributesForReturn(const CallBase &Call) const { |
79 | ISD::ArgFlagsTy Flags; |
80 | addFlagsUsingAttrFn(Flags, AttrFn: [&Call](Attribute::AttrKind Attr) { |
81 | return Call.hasRetAttr(Kind: Attr); |
82 | }); |
83 | return Flags; |
84 | } |
85 | |
86 | void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags, |
87 | const AttributeList &Attrs, |
88 | unsigned OpIdx) const { |
89 | addFlagsUsingAttrFn(Flags, AttrFn: [&Attrs, &OpIdx](Attribute::AttrKind Attr) { |
90 | return Attrs.hasAttributeAtIndex(Index: OpIdx, Kind: Attr); |
91 | }); |
92 | } |
93 | |
94 | bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, |
95 | ArrayRef<Register> ResRegs, |
96 | ArrayRef<ArrayRef<Register>> ArgRegs, |
97 | Register SwiftErrorVReg, |
98 | std::optional<PtrAuthInfo> PAI, |
99 | Register ConvergenceCtrlToken, |
100 | std::function<unsigned()> GetCalleeReg) const { |
101 | CallLoweringInfo Info; |
102 | const DataLayout &DL = MIRBuilder.getDataLayout(); |
103 | MachineFunction &MF = MIRBuilder.getMF(); |
104 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
105 | bool CanBeTailCalled = CB.isTailCall() && |
106 | isInTailCallPosition(Call: CB, TM: MF.getTarget()) && |
107 | (MF.getFunction() |
108 | .getFnAttribute(Kind: "disable-tail-calls" ) |
109 | .getValueAsString() != "true" ); |
110 | |
111 | CallingConv::ID CallConv = CB.getCallingConv(); |
112 | Type *RetTy = CB.getType(); |
113 | bool IsVarArg = CB.getFunctionType()->isVarArg(); |
114 | |
115 | SmallVector<BaseArgInfo, 4> SplitArgs; |
116 | getReturnInfo(CallConv, RetTy, Attrs: CB.getAttributes(), Outs&: SplitArgs, DL); |
117 | Info.CanLowerReturn = canLowerReturn(MF, CallConv, Outs&: SplitArgs, IsVarArg); |
118 | |
119 | Info.IsConvergent = CB.isConvergent(); |
120 | |
121 | if (!Info.CanLowerReturn) { |
122 | // Callee requires sret demotion. |
123 | insertSRetOutgoingArgument(MIRBuilder, CB, Info); |
124 | |
125 | // The sret demotion isn't compatible with tail-calls, since the sret |
126 | // argument points into the caller's stack frame. |
127 | CanBeTailCalled = false; |
128 | } |
129 | |
130 | // First step is to marshall all the function's parameters into the correct |
131 | // physregs and memory locations. Gather the sequence of argument types that |
132 | // we'll pass to the assigner function. |
133 | unsigned i = 0; |
134 | unsigned NumFixedArgs = CB.getFunctionType()->getNumParams(); |
135 | for (const auto &Arg : CB.args()) { |
136 | ArgInfo OrigArg{ArgRegs[i], *Arg.get(), i, getAttributesForArgIdx(Call: CB, ArgIdx: i), |
137 | i < NumFixedArgs}; |
138 | setArgFlags(Arg&: OrigArg, OpIdx: i + AttributeList::FirstArgIndex, DL, FuncInfo: CB); |
139 | |
140 | // If we have an explicit sret argument that is an Instruction, (i.e., it |
141 | // might point to function-local memory), we can't meaningfully tail-call. |
142 | if (OrigArg.Flags[0].isSRet() && isa<Instruction>(Val: &Arg)) |
143 | CanBeTailCalled = false; |
144 | |
145 | Info.OrigArgs.push_back(Elt: OrigArg); |
146 | ++i; |
147 | } |
148 | |
149 | // Try looking through a bitcast from one function type to another. |
150 | // Commonly happens with calls to objc_msgSend(). |
151 | const Value *CalleeV = CB.getCalledOperand()->stripPointerCasts(); |
152 | |
153 | // If IRTranslator chose to drop the ptrauth info, we can turn this into |
154 | // a direct call. |
155 | if (!PAI && CB.countOperandBundlesOfType(ID: LLVMContext::OB_ptrauth)) { |
156 | CalleeV = cast<ConstantPtrAuth>(Val: CalleeV)->getPointer(); |
157 | assert(isa<Function>(CalleeV)); |
158 | } |
159 | |
160 | if (const Function *F = dyn_cast<Function>(Val: CalleeV)) { |
161 | if (F->hasFnAttribute(Kind: Attribute::NonLazyBind)) { |
162 | LLT Ty = getLLTForType(Ty&: *F->getType(), DL); |
163 | Register Reg = MIRBuilder.buildGlobalValue(Res: Ty, GV: F).getReg(Idx: 0); |
164 | Info.Callee = MachineOperand::CreateReg(Reg, isDef: false); |
165 | } else { |
166 | Info.Callee = MachineOperand::CreateGA(GV: F, Offset: 0); |
167 | } |
168 | } else if (isa<GlobalIFunc>(Val: CalleeV) || isa<GlobalAlias>(Val: CalleeV)) { |
169 | // IR IFuncs and Aliases can't be forward declared (only defined), so the |
170 | // callee must be in the same TU and therefore we can direct-call it without |
171 | // worrying about it being out of range. |
172 | Info.Callee = MachineOperand::CreateGA(GV: cast<GlobalValue>(Val: CalleeV), Offset: 0); |
173 | } else |
174 | Info.Callee = MachineOperand::CreateReg(Reg: GetCalleeReg(), isDef: false); |
175 | |
176 | Register ReturnHintAlignReg; |
177 | Align ReturnHintAlign; |
178 | |
179 | Info.OrigRet = ArgInfo{ResRegs, RetTy, 0, getAttributesForReturn(Call: CB)}; |
180 | |
181 | if (!Info.OrigRet.Ty->isVoidTy()) { |
182 | setArgFlags(Arg&: Info.OrigRet, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: CB); |
183 | |
184 | if (MaybeAlign Alignment = CB.getRetAlign()) { |
185 | if (*Alignment > Align(1)) { |
186 | ReturnHintAlignReg = MRI.cloneVirtualRegister(VReg: ResRegs[0]); |
187 | Info.OrigRet.Regs[0] = ReturnHintAlignReg; |
188 | ReturnHintAlign = *Alignment; |
189 | } |
190 | } |
191 | } |
192 | |
193 | auto Bundle = CB.getOperandBundle(ID: LLVMContext::OB_kcfi); |
194 | if (Bundle && CB.isIndirectCall()) { |
195 | Info.CFIType = cast<ConstantInt>(Val: Bundle->Inputs[0]); |
196 | assert(Info.CFIType->getType()->isIntegerTy(32) && "Invalid CFI type" ); |
197 | } |
198 | |
199 | Info.CB = &CB; |
200 | Info.KnownCallees = CB.getMetadata(KindID: LLVMContext::MD_callees); |
201 | Info.CallConv = CallConv; |
202 | Info.SwiftErrorVReg = SwiftErrorVReg; |
203 | Info.PAI = PAI; |
204 | Info.ConvergenceCtrlToken = ConvergenceCtrlToken; |
205 | Info.IsMustTailCall = CB.isMustTailCall(); |
206 | Info.IsTailCall = CanBeTailCalled; |
207 | Info.IsVarArg = IsVarArg; |
208 | if (!lowerCall(MIRBuilder, Info)) |
209 | return false; |
210 | |
211 | if (ReturnHintAlignReg && !Info.LoweredTailCall) { |
212 | MIRBuilder.buildAssertAlign(Res: ResRegs[0], Op: ReturnHintAlignReg, |
213 | AlignVal: ReturnHintAlign); |
214 | } |
215 | |
216 | return true; |
217 | } |
218 | |
219 | template <typename FuncInfoTy> |
220 | void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, |
221 | const DataLayout &DL, |
222 | const FuncInfoTy &FuncInfo) const { |
223 | auto &Flags = Arg.Flags[0]; |
224 | const AttributeList &Attrs = FuncInfo.getAttributes(); |
225 | addArgFlagsFromAttributes(Flags, Attrs, OpIdx); |
226 | |
227 | PointerType *PtrTy = dyn_cast<PointerType>(Val: Arg.Ty->getScalarType()); |
228 | if (PtrTy) { |
229 | Flags.setPointer(); |
230 | Flags.setPointerAddrSpace(PtrTy->getPointerAddressSpace()); |
231 | } |
232 | |
233 | Align MemAlign = DL.getABITypeAlign(Ty: Arg.Ty); |
234 | if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated() || |
235 | Flags.isByRef()) { |
236 | assert(OpIdx >= AttributeList::FirstArgIndex); |
237 | unsigned ParamIdx = OpIdx - AttributeList::FirstArgIndex; |
238 | |
239 | Type *ElementTy = FuncInfo.getParamByValType(ParamIdx); |
240 | if (!ElementTy) |
241 | ElementTy = FuncInfo.getParamByRefType(ParamIdx); |
242 | if (!ElementTy) |
243 | ElementTy = FuncInfo.getParamInAllocaType(ParamIdx); |
244 | if (!ElementTy) |
245 | ElementTy = FuncInfo.getParamPreallocatedType(ParamIdx); |
246 | |
247 | assert(ElementTy && "Must have byval, inalloca or preallocated type" ); |
248 | |
249 | uint64_t MemSize = DL.getTypeAllocSize(Ty: ElementTy); |
250 | if (Flags.isByRef()) |
251 | Flags.setByRefSize(MemSize); |
252 | else |
253 | Flags.setByValSize(MemSize); |
254 | |
255 | // For ByVal, alignment should be passed from FE. BE will guess if |
256 | // this info is not there but there are cases it cannot get right. |
257 | if (auto ParamAlign = FuncInfo.getParamStackAlign(ParamIdx)) |
258 | MemAlign = *ParamAlign; |
259 | else if ((ParamAlign = FuncInfo.getParamAlign(ParamIdx))) |
260 | MemAlign = *ParamAlign; |
261 | else |
262 | MemAlign = Align(getTLI()->getByValTypeAlignment(Ty: ElementTy, DL)); |
263 | } else if (OpIdx >= AttributeList::FirstArgIndex) { |
264 | if (auto ParamAlign = |
265 | FuncInfo.getParamStackAlign(OpIdx - AttributeList::FirstArgIndex)) |
266 | MemAlign = *ParamAlign; |
267 | } |
268 | Flags.setMemAlign(MemAlign); |
269 | Flags.setOrigAlign(DL.getABITypeAlign(Ty: Arg.Ty)); |
270 | |
271 | // Don't try to use the returned attribute if the argument is marked as |
272 | // swiftself, since it won't be passed in x0. |
273 | if (Flags.isSwiftSelf()) |
274 | Flags.setReturned(false); |
275 | } |
276 | |
277 | template void |
278 | CallLowering::setArgFlags<Function>(CallLowering::ArgInfo &Arg, unsigned OpIdx, |
279 | const DataLayout &DL, |
280 | const Function &FuncInfo) const; |
281 | |
282 | template void |
283 | CallLowering::setArgFlags<CallBase>(CallLowering::ArgInfo &Arg, unsigned OpIdx, |
284 | const DataLayout &DL, |
285 | const CallBase &FuncInfo) const; |
286 | |
287 | void CallLowering::splitToValueTypes(const ArgInfo &OrigArg, |
288 | SmallVectorImpl<ArgInfo> &SplitArgs, |
289 | const DataLayout &DL, |
290 | CallingConv::ID CallConv, |
291 | SmallVectorImpl<uint64_t> *Offsets) const { |
292 | LLVMContext &Ctx = OrigArg.Ty->getContext(); |
293 | |
294 | SmallVector<EVT, 4> SplitVTs; |
295 | ComputeValueVTs(TLI: *TLI, DL, Ty: OrigArg.Ty, ValueVTs&: SplitVTs, FixedOffsets: Offsets, StartingOffset: 0); |
296 | |
297 | if (SplitVTs.size() == 0) |
298 | return; |
299 | |
300 | if (SplitVTs.size() == 1) { |
301 | // No splitting to do, but we want to replace the original type (e.g. [1 x |
302 | // double] -> double). |
303 | SplitArgs.emplace_back(Args: OrigArg.Regs[0], Args: SplitVTs[0].getTypeForEVT(Context&: Ctx), |
304 | Args: OrigArg.OrigArgIndex, Args: OrigArg.Flags[0], |
305 | Args: OrigArg.IsFixed, Args: OrigArg.OrigValue); |
306 | return; |
307 | } |
308 | |
309 | // Create one ArgInfo for each virtual register in the original ArgInfo. |
310 | assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch" ); |
311 | |
312 | bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( |
313 | Ty: OrigArg.Ty, CallConv, isVarArg: false, DL); |
314 | for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) { |
315 | Type *SplitTy = SplitVTs[i].getTypeForEVT(Context&: Ctx); |
316 | SplitArgs.emplace_back(Args: OrigArg.Regs[i], Args&: SplitTy, Args: OrigArg.OrigArgIndex, |
317 | Args: OrigArg.Flags[0], Args: OrigArg.IsFixed); |
318 | if (NeedsRegBlock) |
319 | SplitArgs.back().Flags[0].setInConsecutiveRegs(); |
320 | } |
321 | |
322 | SplitArgs.back().Flags[0].setInConsecutiveRegsLast(); |
323 | } |
324 | |
325 | /// Pack values \p SrcRegs to cover the vector type result \p DstRegs. |
326 | static MachineInstrBuilder |
327 | mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, |
328 | ArrayRef<Register> SrcRegs) { |
329 | MachineRegisterInfo &MRI = *B.getMRI(); |
330 | LLT LLTy = MRI.getType(Reg: DstRegs[0]); |
331 | LLT PartLLT = MRI.getType(Reg: SrcRegs[0]); |
332 | |
333 | // Deal with v3s16 split into v2s16 |
334 | LLT LCMTy = getCoverTy(OrigTy: LLTy, TargetTy: PartLLT); |
335 | if (LCMTy == LLTy) { |
336 | // Common case where no padding is needed. |
337 | assert(DstRegs.size() == 1); |
338 | return B.buildConcatVectors(Res: DstRegs[0], Ops: SrcRegs); |
339 | } |
340 | |
341 | // We need to create an unmerge to the result registers, which may require |
342 | // widening the original value. |
343 | Register UnmergeSrcReg; |
344 | if (LCMTy != PartLLT) { |
345 | assert(DstRegs.size() == 1); |
346 | return B.buildDeleteTrailingVectorElements( |
347 | Res: DstRegs[0], Op0: B.buildMergeLikeInstr(Res: LCMTy, Ops: SrcRegs)); |
348 | } else { |
349 | // We don't need to widen anything if we're extracting a scalar which was |
350 | // promoted to a vector e.g. s8 -> v4s8 -> s8 |
351 | assert(SrcRegs.size() == 1); |
352 | UnmergeSrcReg = SrcRegs[0]; |
353 | } |
354 | |
355 | int NumDst = LCMTy.getSizeInBits() / LLTy.getSizeInBits(); |
356 | |
357 | SmallVector<Register, 8> PadDstRegs(NumDst); |
358 | std::copy(first: DstRegs.begin(), last: DstRegs.end(), result: PadDstRegs.begin()); |
359 | |
360 | // Create the excess dead defs for the unmerge. |
361 | for (int I = DstRegs.size(); I != NumDst; ++I) |
362 | PadDstRegs[I] = MRI.createGenericVirtualRegister(Ty: LLTy); |
363 | |
364 | if (PadDstRegs.size() == 1) |
365 | return B.buildDeleteTrailingVectorElements(Res: DstRegs[0], Op0: UnmergeSrcReg); |
366 | return B.buildUnmerge(Res: PadDstRegs, Op: UnmergeSrcReg); |
367 | } |
368 | |
369 | /// Create a sequence of instructions to combine pieces split into register |
370 | /// typed values to the original IR value. \p OrigRegs contains the destination |
371 | /// value registers of type \p LLTy, and \p Regs contains the legalized pieces |
372 | /// with type \p PartLLT. This is used for incoming values (physregs to vregs). |
373 | static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs, |
374 | ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, |
375 | const ISD::ArgFlagsTy Flags) { |
376 | MachineRegisterInfo &MRI = *B.getMRI(); |
377 | |
378 | if (PartLLT == LLTy) { |
379 | // We should have avoided introducing a new virtual register, and just |
380 | // directly assigned here. |
381 | assert(OrigRegs[0] == Regs[0]); |
382 | return; |
383 | } |
384 | |
385 | if (PartLLT.getSizeInBits() == LLTy.getSizeInBits() && OrigRegs.size() == 1 && |
386 | Regs.size() == 1) { |
387 | B.buildBitcast(Dst: OrigRegs[0], Src: Regs[0]); |
388 | return; |
389 | } |
390 | |
391 | // A vector PartLLT needs extending to LLTy's element size. |
392 | // E.g. <2 x s64> = G_SEXT <2 x s32>. |
393 | if (PartLLT.isVector() == LLTy.isVector() && |
394 | PartLLT.getScalarSizeInBits() > LLTy.getScalarSizeInBits() && |
395 | (!PartLLT.isVector() || |
396 | PartLLT.getElementCount() == LLTy.getElementCount()) && |
397 | OrigRegs.size() == 1 && Regs.size() == 1) { |
398 | Register SrcReg = Regs[0]; |
399 | |
400 | LLT LocTy = MRI.getType(Reg: SrcReg); |
401 | |
402 | if (Flags.isSExt()) { |
403 | SrcReg = B.buildAssertSExt(Res: LocTy, Op: SrcReg, Size: LLTy.getScalarSizeInBits()) |
404 | .getReg(Idx: 0); |
405 | } else if (Flags.isZExt()) { |
406 | SrcReg = B.buildAssertZExt(Res: LocTy, Op: SrcReg, Size: LLTy.getScalarSizeInBits()) |
407 | .getReg(Idx: 0); |
408 | } |
409 | |
410 | // Sometimes pointers are passed zero extended. |
411 | LLT OrigTy = MRI.getType(Reg: OrigRegs[0]); |
412 | if (OrigTy.isPointer()) { |
413 | LLT IntPtrTy = LLT::scalar(SizeInBits: OrigTy.getSizeInBits()); |
414 | B.buildIntToPtr(Dst: OrigRegs[0], Src: B.buildTrunc(Res: IntPtrTy, Op: SrcReg)); |
415 | return; |
416 | } |
417 | |
418 | B.buildTrunc(Res: OrigRegs[0], Op: SrcReg); |
419 | return; |
420 | } |
421 | |
422 | if (!LLTy.isVector() && !PartLLT.isVector()) { |
423 | assert(OrigRegs.size() == 1); |
424 | LLT OrigTy = MRI.getType(Reg: OrigRegs[0]); |
425 | |
426 | unsigned SrcSize = PartLLT.getSizeInBits().getFixedValue() * Regs.size(); |
427 | if (SrcSize == OrigTy.getSizeInBits()) |
428 | B.buildMergeValues(Res: OrigRegs[0], Ops: Regs); |
429 | else { |
430 | auto Widened = B.buildMergeLikeInstr(Res: LLT::scalar(SizeInBits: SrcSize), Ops: Regs); |
431 | B.buildTrunc(Res: OrigRegs[0], Op: Widened); |
432 | } |
433 | |
434 | return; |
435 | } |
436 | |
437 | if (PartLLT.isVector()) { |
438 | assert(OrigRegs.size() == 1); |
439 | SmallVector<Register> CastRegs(Regs.begin(), Regs.end()); |
440 | |
441 | // If PartLLT is a mismatched vector in both number of elements and element |
442 | // size, e.g. PartLLT == v2s64 and LLTy is v3s32, then first coerce it to |
443 | // have the same elt type, i.e. v4s32. |
444 | // TODO: Extend this coersion to element multiples other than just 2. |
445 | if (TypeSize::isKnownGT(LHS: PartLLT.getSizeInBits(), RHS: LLTy.getSizeInBits()) && |
446 | PartLLT.getScalarSizeInBits() == LLTy.getScalarSizeInBits() * 2 && |
447 | Regs.size() == 1) { |
448 | LLT NewTy = PartLLT.changeElementType(NewEltTy: LLTy.getElementType()) |
449 | .changeElementCount(EC: PartLLT.getElementCount() * 2); |
450 | CastRegs[0] = B.buildBitcast(Dst: NewTy, Src: Regs[0]).getReg(Idx: 0); |
451 | PartLLT = NewTy; |
452 | } |
453 | |
454 | if (LLTy.getScalarType() == PartLLT.getElementType()) { |
455 | mergeVectorRegsToResultRegs(B, DstRegs: OrigRegs, SrcRegs: CastRegs); |
456 | } else { |
457 | unsigned I = 0; |
458 | LLT GCDTy = getGCDType(OrigTy: LLTy, TargetTy: PartLLT); |
459 | |
460 | // We are both splitting a vector, and bitcasting its element types. Cast |
461 | // the source pieces into the appropriate number of pieces with the result |
462 | // element type. |
463 | for (Register SrcReg : CastRegs) |
464 | CastRegs[I++] = B.buildBitcast(Dst: GCDTy, Src: SrcReg).getReg(Idx: 0); |
465 | mergeVectorRegsToResultRegs(B, DstRegs: OrigRegs, SrcRegs: CastRegs); |
466 | } |
467 | |
468 | return; |
469 | } |
470 | |
471 | assert(LLTy.isVector() && !PartLLT.isVector()); |
472 | |
473 | LLT DstEltTy = LLTy.getElementType(); |
474 | |
475 | // Pointer information was discarded. We'll need to coerce some register types |
476 | // to avoid violating type constraints. |
477 | LLT RealDstEltTy = MRI.getType(Reg: OrigRegs[0]).getElementType(); |
478 | |
479 | assert(DstEltTy.getSizeInBits() == RealDstEltTy.getSizeInBits()); |
480 | |
481 | if (DstEltTy == PartLLT) { |
482 | // Vector was trivially scalarized. |
483 | |
484 | if (RealDstEltTy.isPointer()) { |
485 | for (Register Reg : Regs) |
486 | MRI.setType(VReg: Reg, Ty: RealDstEltTy); |
487 | } |
488 | |
489 | B.buildBuildVector(Res: OrigRegs[0], Ops: Regs); |
490 | } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) { |
491 | // Deal with vector with 64-bit elements decomposed to 32-bit |
492 | // registers. Need to create intermediate 64-bit elements. |
493 | SmallVector<Register, 8> EltMerges; |
494 | int PartsPerElt = |
495 | divideCeil(Numerator: DstEltTy.getSizeInBits(), Denominator: PartLLT.getSizeInBits()); |
496 | LLT ExtendedPartTy = LLT::scalar(SizeInBits: PartLLT.getSizeInBits() * PartsPerElt); |
497 | |
498 | for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) { |
499 | auto Merge = |
500 | B.buildMergeLikeInstr(Res: ExtendedPartTy, Ops: Regs.take_front(N: PartsPerElt)); |
501 | if (ExtendedPartTy.getSizeInBits() > RealDstEltTy.getSizeInBits()) |
502 | Merge = B.buildTrunc(Res: RealDstEltTy, Op: Merge); |
503 | // Fix the type in case this is really a vector of pointers. |
504 | MRI.setType(VReg: Merge.getReg(Idx: 0), Ty: RealDstEltTy); |
505 | EltMerges.push_back(Elt: Merge.getReg(Idx: 0)); |
506 | Regs = Regs.drop_front(N: PartsPerElt); |
507 | } |
508 | |
509 | B.buildBuildVector(Res: OrigRegs[0], Ops: EltMerges); |
510 | } else { |
511 | // Vector was split, and elements promoted to a wider type. |
512 | // FIXME: Should handle floating point promotions. |
513 | unsigned NumElts = LLTy.getNumElements(); |
514 | LLT BVType = LLT::fixed_vector(NumElements: NumElts, ScalarTy: PartLLT); |
515 | |
516 | Register BuildVec; |
517 | if (NumElts == Regs.size()) |
518 | BuildVec = B.buildBuildVector(Res: BVType, Ops: Regs).getReg(Idx: 0); |
519 | else { |
520 | // Vector elements are packed in the inputs. |
521 | // e.g. we have a <4 x s16> but 2 x s32 in regs. |
522 | assert(NumElts > Regs.size()); |
523 | LLT SrcEltTy = MRI.getType(Reg: Regs[0]); |
524 | |
525 | LLT OriginalEltTy = MRI.getType(Reg: OrigRegs[0]).getElementType(); |
526 | |
527 | // Input registers contain packed elements. |
528 | // Determine how many elements per reg. |
529 | assert((SrcEltTy.getSizeInBits() % OriginalEltTy.getSizeInBits()) == 0); |
530 | unsigned EltPerReg = |
531 | (SrcEltTy.getSizeInBits() / OriginalEltTy.getSizeInBits()); |
532 | |
533 | SmallVector<Register, 0> BVRegs; |
534 | BVRegs.reserve(N: Regs.size() * EltPerReg); |
535 | for (Register R : Regs) { |
536 | auto Unmerge = B.buildUnmerge(Res: OriginalEltTy, Op: R); |
537 | for (unsigned K = 0; K < EltPerReg; ++K) |
538 | BVRegs.push_back(Elt: B.buildAnyExt(Res: PartLLT, Op: Unmerge.getReg(Idx: K)).getReg(Idx: 0)); |
539 | } |
540 | |
541 | // We may have some more elements in BVRegs, e.g. if we have 2 s32 pieces |
542 | // for a <3 x s16> vector. We should have less than EltPerReg extra items. |
543 | if (BVRegs.size() > NumElts) { |
544 | assert((BVRegs.size() - NumElts) < EltPerReg); |
545 | BVRegs.truncate(N: NumElts); |
546 | } |
547 | BuildVec = B.buildBuildVector(Res: BVType, Ops: BVRegs).getReg(Idx: 0); |
548 | } |
549 | B.buildTrunc(Res: OrigRegs[0], Op: BuildVec); |
550 | } |
551 | } |
552 | |
553 | /// Create a sequence of instructions to expand the value in \p SrcReg (of type |
554 | /// \p SrcTy) to the types in \p DstRegs (of type \p PartTy). \p ExtendOp should |
555 | /// contain the type of scalar value extension if necessary. |
556 | /// |
557 | /// This is used for outgoing values (vregs to physregs) |
558 | static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs, |
559 | Register SrcReg, LLT SrcTy, LLT PartTy, |
560 | unsigned ExtendOp = TargetOpcode::G_ANYEXT) { |
561 | // We could just insert a regular copy, but this is unreachable at the moment. |
562 | assert(SrcTy != PartTy && "identical part types shouldn't reach here" ); |
563 | |
564 | const TypeSize PartSize = PartTy.getSizeInBits(); |
565 | |
566 | if (PartTy.isVector() == SrcTy.isVector() && |
567 | PartTy.getScalarSizeInBits() > SrcTy.getScalarSizeInBits()) { |
568 | assert(DstRegs.size() == 1); |
569 | B.buildInstr(Opc: ExtendOp, DstOps: {DstRegs[0]}, SrcOps: {SrcReg}); |
570 | return; |
571 | } |
572 | |
573 | if (SrcTy.isVector() && !PartTy.isVector() && |
574 | TypeSize::isKnownGT(LHS: PartSize, RHS: SrcTy.getElementType().getSizeInBits())) { |
575 | // Vector was scalarized, and the elements extended. |
576 | auto UnmergeToEltTy = B.buildUnmerge(Res: SrcTy.getElementType(), Op: SrcReg); |
577 | for (int i = 0, e = DstRegs.size(); i != e; ++i) |
578 | B.buildAnyExt(Res: DstRegs[i], Op: UnmergeToEltTy.getReg(Idx: i)); |
579 | return; |
580 | } |
581 | |
582 | if (SrcTy.isVector() && PartTy.isVector() && |
583 | PartTy.getSizeInBits() == SrcTy.getSizeInBits() && |
584 | ElementCount::isKnownLT(LHS: SrcTy.getElementCount(), |
585 | RHS: PartTy.getElementCount())) { |
586 | // A coercion like: v2f32 -> v4f32 or nxv2f32 -> nxv4f32 |
587 | Register DstReg = DstRegs.front(); |
588 | B.buildPadVectorWithUndefElements(Res: DstReg, Op0: SrcReg); |
589 | return; |
590 | } |
591 | |
592 | LLT GCDTy = getGCDType(OrigTy: SrcTy, TargetTy: PartTy); |
593 | if (GCDTy == PartTy) { |
594 | // If this already evenly divisible, we can create a simple unmerge. |
595 | B.buildUnmerge(Res: DstRegs, Op: SrcReg); |
596 | return; |
597 | } |
598 | |
599 | if (SrcTy.isVector() && !PartTy.isVector() && |
600 | SrcTy.getScalarSizeInBits() > PartTy.getSizeInBits()) { |
601 | LLT ExtTy = |
602 | LLT::vector(EC: SrcTy.getElementCount(), |
603 | ScalarTy: LLT::scalar(SizeInBits: PartTy.getScalarSizeInBits() * DstRegs.size() / |
604 | SrcTy.getNumElements())); |
605 | auto Ext = B.buildAnyExt(Res: ExtTy, Op: SrcReg); |
606 | B.buildUnmerge(Res: DstRegs, Op: Ext); |
607 | return; |
608 | } |
609 | |
610 | MachineRegisterInfo &MRI = *B.getMRI(); |
611 | LLT DstTy = MRI.getType(Reg: DstRegs[0]); |
612 | LLT LCMTy = getCoverTy(OrigTy: SrcTy, TargetTy: PartTy); |
613 | |
614 | if (PartTy.isVector() && LCMTy == PartTy) { |
615 | assert(DstRegs.size() == 1); |
616 | B.buildPadVectorWithUndefElements(Res: DstRegs[0], Op0: SrcReg); |
617 | return; |
618 | } |
619 | |
620 | const unsigned DstSize = DstTy.getSizeInBits(); |
621 | const unsigned SrcSize = SrcTy.getSizeInBits(); |
622 | unsigned CoveringSize = LCMTy.getSizeInBits(); |
623 | |
624 | Register UnmergeSrc = SrcReg; |
625 | |
626 | if (!LCMTy.isVector() && CoveringSize != SrcSize) { |
627 | // For scalars, it's common to be able to use a simple extension. |
628 | if (SrcTy.isScalar() && DstTy.isScalar()) { |
629 | CoveringSize = alignTo(Value: SrcSize, Align: DstSize); |
630 | LLT CoverTy = LLT::scalar(SizeInBits: CoveringSize); |
631 | UnmergeSrc = B.buildInstr(Opc: ExtendOp, DstOps: {CoverTy}, SrcOps: {SrcReg}).getReg(Idx: 0); |
632 | } else { |
633 | // Widen to the common type. |
634 | // FIXME: This should respect the extend type |
635 | Register Undef = B.buildUndef(Res: SrcTy).getReg(Idx: 0); |
636 | SmallVector<Register, 8> MergeParts(1, SrcReg); |
637 | for (unsigned Size = SrcSize; Size != CoveringSize; Size += SrcSize) |
638 | MergeParts.push_back(Elt: Undef); |
639 | UnmergeSrc = B.buildMergeLikeInstr(Res: LCMTy, Ops: MergeParts).getReg(Idx: 0); |
640 | } |
641 | } |
642 | |
643 | if (LCMTy.isVector() && CoveringSize != SrcSize) |
644 | UnmergeSrc = B.buildPadVectorWithUndefElements(Res: LCMTy, Op0: SrcReg).getReg(Idx: 0); |
645 | |
646 | B.buildUnmerge(Res: DstRegs, Op: UnmergeSrc); |
647 | } |
648 | |
649 | bool CallLowering::determineAndHandleAssignments( |
650 | ValueHandler &Handler, ValueAssigner &Assigner, |
651 | SmallVectorImpl<ArgInfo> &Args, MachineIRBuilder &MIRBuilder, |
652 | CallingConv::ID CallConv, bool IsVarArg, |
653 | ArrayRef<Register> ThisReturnRegs) const { |
654 | MachineFunction &MF = MIRBuilder.getMF(); |
655 | const Function &F = MF.getFunction(); |
656 | SmallVector<CCValAssign, 16> ArgLocs; |
657 | |
658 | CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, F.getContext()); |
659 | if (!determineAssignments(Assigner, Args, CCInfo)) |
660 | return false; |
661 | |
662 | return handleAssignments(Handler, Args, CCState&: CCInfo, ArgLocs, MIRBuilder, |
663 | ThisReturnRegs); |
664 | } |
665 | |
666 | static unsigned extendOpFromFlags(llvm::ISD::ArgFlagsTy Flags) { |
667 | if (Flags.isSExt()) |
668 | return TargetOpcode::G_SEXT; |
669 | if (Flags.isZExt()) |
670 | return TargetOpcode::G_ZEXT; |
671 | return TargetOpcode::G_ANYEXT; |
672 | } |
673 | |
674 | bool CallLowering::determineAssignments(ValueAssigner &Assigner, |
675 | SmallVectorImpl<ArgInfo> &Args, |
676 | CCState &CCInfo) const { |
677 | LLVMContext &Ctx = CCInfo.getContext(); |
678 | const CallingConv::ID CallConv = CCInfo.getCallingConv(); |
679 | |
680 | unsigned NumArgs = Args.size(); |
681 | for (unsigned i = 0; i != NumArgs; ++i) { |
682 | EVT CurVT = EVT::getEVT(Ty: Args[i].Ty); |
683 | |
684 | MVT NewVT = TLI->getRegisterTypeForCallingConv(Context&: Ctx, CC: CallConv, VT: CurVT); |
685 | |
686 | // If we need to split the type over multiple regs, check it's a scenario |
687 | // we currently support. |
688 | unsigned NumParts = |
689 | TLI->getNumRegistersForCallingConv(Context&: Ctx, CC: CallConv, VT: CurVT); |
690 | |
691 | if (NumParts == 1) { |
692 | // Try to use the register type if we couldn't assign the VT. |
693 | if (Assigner.assignArg(ValNo: i, OrigVT: CurVT, ValVT: NewVT, LocVT: NewVT, LocInfo: CCValAssign::Full, Info: Args[i], |
694 | Flags: Args[i].Flags[0], State&: CCInfo)) |
695 | return false; |
696 | continue; |
697 | } |
698 | |
699 | // For incoming arguments (physregs to vregs), we could have values in |
700 | // physregs (or memlocs) which we want to extract and copy to vregs. |
701 | // During this, we might have to deal with the LLT being split across |
702 | // multiple regs, so we have to record this information for later. |
703 | // |
704 | // If we have outgoing args, then we have the opposite case. We have a |
705 | // vreg with an LLT which we want to assign to a physical location, and |
706 | // we might have to record that the value has to be split later. |
707 | |
708 | // We're handling an incoming arg which is split over multiple regs. |
709 | // E.g. passing an s128 on AArch64. |
710 | ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; |
711 | Args[i].Flags.clear(); |
712 | |
713 | for (unsigned Part = 0; Part < NumParts; ++Part) { |
714 | ISD::ArgFlagsTy Flags = OrigFlags; |
715 | if (Part == 0) { |
716 | Flags.setSplit(); |
717 | } else { |
718 | Flags.setOrigAlign(Align(1)); |
719 | if (Part == NumParts - 1) |
720 | Flags.setSplitEnd(); |
721 | } |
722 | |
723 | Args[i].Flags.push_back(Elt: Flags); |
724 | if (Assigner.assignArg(ValNo: i, OrigVT: CurVT, ValVT: NewVT, LocVT: NewVT, LocInfo: CCValAssign::Full, Info: Args[i], |
725 | Flags: Args[i].Flags[Part], State&: CCInfo)) { |
726 | // Still couldn't assign this smaller part type for some reason. |
727 | return false; |
728 | } |
729 | } |
730 | } |
731 | |
732 | return true; |
733 | } |
734 | |
735 | bool CallLowering::handleAssignments(ValueHandler &Handler, |
736 | SmallVectorImpl<ArgInfo> &Args, |
737 | CCState &CCInfo, |
738 | SmallVectorImpl<CCValAssign> &ArgLocs, |
739 | MachineIRBuilder &MIRBuilder, |
740 | ArrayRef<Register> ThisReturnRegs) const { |
741 | MachineFunction &MF = MIRBuilder.getMF(); |
742 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
743 | const Function &F = MF.getFunction(); |
744 | const DataLayout &DL = F.getDataLayout(); |
745 | |
746 | const unsigned NumArgs = Args.size(); |
747 | |
748 | // Stores thunks for outgoing register assignments. This is used so we delay |
749 | // generating register copies until mem loc assignments are done. We do this |
750 | // so that if the target is using the delayed stack protector feature, we can |
751 | // find the split point of the block accurately. E.g. if we have: |
752 | // G_STORE %val, %memloc |
753 | // $x0 = COPY %foo |
754 | // $x1 = COPY %bar |
755 | // CALL func |
756 | // ... then the split point for the block will correctly be at, and including, |
757 | // the copy to $x0. If instead the G_STORE instruction immediately precedes |
758 | // the CALL, then we'd prematurely choose the CALL as the split point, thus |
759 | // generating a split block with a CALL that uses undefined physregs. |
760 | SmallVector<std::function<void()>> DelayedOutgoingRegAssignments; |
761 | |
762 | for (unsigned i = 0, j = 0; i != NumArgs; ++i, ++j) { |
763 | assert(j < ArgLocs.size() && "Skipped too many arg locs" ); |
764 | CCValAssign &VA = ArgLocs[j]; |
765 | assert(VA.getValNo() == i && "Location doesn't correspond to current arg" ); |
766 | |
767 | if (VA.needsCustom()) { |
768 | std::function<void()> Thunk; |
769 | unsigned NumArgRegs = Handler.assignCustomValue( |
770 | Arg&: Args[i], VAs: ArrayRef(ArgLocs).slice(N: j), Thunk: &Thunk); |
771 | if (Thunk) |
772 | DelayedOutgoingRegAssignments.emplace_back(Args&: Thunk); |
773 | if (!NumArgRegs) |
774 | return false; |
775 | j += (NumArgRegs - 1); |
776 | continue; |
777 | } |
778 | |
779 | auto AllocaAddressSpace = MF.getDataLayout().getAllocaAddrSpace(); |
780 | |
781 | const MVT ValVT = VA.getValVT(); |
782 | const MVT LocVT = VA.getLocVT(); |
783 | |
784 | const LLT LocTy(LocVT); |
785 | const LLT ValTy(ValVT); |
786 | const LLT NewLLT = Handler.isIncomingArgumentHandler() ? LocTy : ValTy; |
787 | const EVT OrigVT = EVT::getEVT(Ty: Args[i].Ty); |
788 | const LLT OrigTy = getLLTForType(Ty&: *Args[i].Ty, DL); |
789 | const LLT PointerTy = LLT::pointer( |
790 | AddressSpace: AllocaAddressSpace, SizeInBits: DL.getPointerSizeInBits(AS: AllocaAddressSpace)); |
791 | |
792 | // Expected to be multiple regs for a single incoming arg. |
793 | // There should be Regs.size() ArgLocs per argument. |
794 | // This should be the same as getNumRegistersForCallingConv |
795 | const unsigned NumParts = Args[i].Flags.size(); |
796 | |
797 | // Now split the registers into the assigned types. |
798 | Args[i].OrigRegs.assign(in_start: Args[i].Regs.begin(), in_end: Args[i].Regs.end()); |
799 | |
800 | if (NumParts != 1 || NewLLT != OrigTy) { |
801 | // If we can't directly assign the register, we need one or more |
802 | // intermediate values. |
803 | Args[i].Regs.resize(N: NumParts); |
804 | |
805 | // When we have indirect parameter passing we are receiving a pointer, |
806 | // that points to the actual value, so we need one "temporary" pointer. |
807 | if (VA.getLocInfo() == CCValAssign::Indirect) { |
808 | if (Handler.isIncomingArgumentHandler()) |
809 | Args[i].Regs[0] = MRI.createGenericVirtualRegister(Ty: PointerTy); |
810 | } else { |
811 | // For each split register, create and assign a vreg that will store |
812 | // the incoming component of the larger value. These will later be |
813 | // merged to form the final vreg. |
814 | for (unsigned Part = 0; Part < NumParts; ++Part) |
815 | Args[i].Regs[Part] = MRI.createGenericVirtualRegister(Ty: NewLLT); |
816 | } |
817 | } |
818 | |
819 | assert((j + (NumParts - 1)) < ArgLocs.size() && |
820 | "Too many regs for number of args" ); |
821 | |
822 | // Coerce into outgoing value types before register assignment. |
823 | if (!Handler.isIncomingArgumentHandler() && OrigTy != ValTy && |
824 | VA.getLocInfo() != CCValAssign::Indirect) { |
825 | assert(Args[i].OrigRegs.size() == 1); |
826 | buildCopyToRegs(B&: MIRBuilder, DstRegs: Args[i].Regs, SrcReg: Args[i].OrigRegs[0], SrcTy: OrigTy, |
827 | PartTy: ValTy, ExtendOp: extendOpFromFlags(Flags: Args[i].Flags[0])); |
828 | } |
829 | |
830 | bool IndirectParameterPassingHandled = false; |
831 | bool BigEndianPartOrdering = TLI->hasBigEndianPartOrdering(VT: OrigVT, DL); |
832 | for (unsigned Part = 0; Part < NumParts; ++Part) { |
833 | assert((VA.getLocInfo() != CCValAssign::Indirect || Part == 0) && |
834 | "Only the first parameter should be processed when " |
835 | "handling indirect passing!" ); |
836 | Register ArgReg = Args[i].Regs[Part]; |
837 | // There should be Regs.size() ArgLocs per argument. |
838 | unsigned Idx = BigEndianPartOrdering ? NumParts - 1 - Part : Part; |
839 | CCValAssign &VA = ArgLocs[j + Idx]; |
840 | const ISD::ArgFlagsTy Flags = Args[i].Flags[Part]; |
841 | |
842 | // We found an indirect parameter passing, and we have an |
843 | // OutgoingValueHandler as our handler (so we are at the call site or the |
844 | // return value). In this case, start the construction of the following |
845 | // GMIR, that is responsible for the preparation of indirect parameter |
846 | // passing: |
847 | // |
848 | // %1(indirectly passed type) = The value to pass |
849 | // %3(pointer) = G_FRAME_INDEX %stack.0 |
850 | // G_STORE %1, %3 :: (store (s128), align 8) |
851 | // |
852 | // After this GMIR, the remaining part of the loop body will decide how |
853 | // to get the value to the caller and we break out of the loop. |
854 | if (VA.getLocInfo() == CCValAssign::Indirect && |
855 | !Handler.isIncomingArgumentHandler()) { |
856 | Align AlignmentForStored = DL.getPrefTypeAlign(Ty: Args[i].Ty); |
857 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
858 | // Get some space on the stack for the value, so later we can pass it |
859 | // as a reference. |
860 | int FrameIdx = MFI.CreateStackObject(Size: OrigTy.getScalarSizeInBits(), |
861 | Alignment: AlignmentForStored, isSpillSlot: false); |
862 | Register PointerToStackReg = |
863 | MIRBuilder.buildFrameIndex(Res: PointerTy, Idx: FrameIdx).getReg(Idx: 0); |
864 | MachinePointerInfo StackPointerMPO = |
865 | MachinePointerInfo::getFixedStack(MF, FI: FrameIdx); |
866 | // Store the value in the previously created stack space. |
867 | MIRBuilder.buildStore(Val: Args[i].OrigRegs[Part], Addr: PointerToStackReg, |
868 | PtrInfo: StackPointerMPO, |
869 | Alignment: inferAlignFromPtrInfo(MF, MPO: StackPointerMPO)); |
870 | |
871 | ArgReg = PointerToStackReg; |
872 | IndirectParameterPassingHandled = true; |
873 | } |
874 | |
875 | if (VA.isMemLoc() && !Flags.isByVal()) { |
876 | // Individual pieces may have been spilled to the stack and others |
877 | // passed in registers. |
878 | |
879 | // TODO: The memory size may be larger than the value we need to |
880 | // store. We may need to adjust the offset for big endian targets. |
881 | LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags); |
882 | |
883 | MachinePointerInfo MPO; |
884 | Register StackAddr = |
885 | Handler.getStackAddress(MemSize: VA.getLocInfo() == CCValAssign::Indirect |
886 | ? PointerTy.getSizeInBytes() |
887 | : MemTy.getSizeInBytes(), |
888 | Offset: VA.getLocMemOffset(), MPO, Flags); |
889 | |
890 | // Finish the handling of indirect passing from the passers |
891 | // (OutgoingParameterHandler) side. |
892 | // This branch is needed, so the pointer to the value is loaded onto the |
893 | // stack. |
894 | if (VA.getLocInfo() == CCValAssign::Indirect) |
895 | Handler.assignValueToAddress(ValVReg: ArgReg, Addr: StackAddr, MemTy: PointerTy, MPO, VA); |
896 | else |
897 | Handler.assignValueToAddress(Arg: Args[i], ValRegIndex: Part, Addr: StackAddr, MemTy, MPO, |
898 | VA); |
899 | } else if (VA.isMemLoc() && Flags.isByVal()) { |
900 | assert(Args[i].Regs.size() == 1 && "didn't expect split byval pointer" ); |
901 | |
902 | if (Handler.isIncomingArgumentHandler()) { |
903 | // We just need to copy the frame index value to the pointer. |
904 | MachinePointerInfo MPO; |
905 | Register StackAddr = Handler.getStackAddress( |
906 | MemSize: Flags.getByValSize(), Offset: VA.getLocMemOffset(), MPO, Flags); |
907 | MIRBuilder.buildCopy(Res: Args[i].Regs[0], Op: StackAddr); |
908 | } else { |
909 | // For outgoing byval arguments, insert the implicit copy byval |
910 | // implies, such that writes in the callee do not modify the caller's |
911 | // value. |
912 | uint64_t MemSize = Flags.getByValSize(); |
913 | int64_t Offset = VA.getLocMemOffset(); |
914 | |
915 | MachinePointerInfo DstMPO; |
916 | Register StackAddr = |
917 | Handler.getStackAddress(MemSize, Offset, MPO&: DstMPO, Flags); |
918 | |
919 | MachinePointerInfo SrcMPO(Args[i].OrigValue); |
920 | if (!Args[i].OrigValue) { |
921 | // We still need to accurately track the stack address space if we |
922 | // don't know the underlying value. |
923 | const LLT PtrTy = MRI.getType(Reg: StackAddr); |
924 | SrcMPO = MachinePointerInfo(PtrTy.getAddressSpace()); |
925 | } |
926 | |
927 | Align DstAlign = std::max(a: Flags.getNonZeroByValAlign(), |
928 | b: inferAlignFromPtrInfo(MF, MPO: DstMPO)); |
929 | |
930 | Align SrcAlign = std::max(a: Flags.getNonZeroByValAlign(), |
931 | b: inferAlignFromPtrInfo(MF, MPO: SrcMPO)); |
932 | |
933 | Handler.copyArgumentMemory(Arg: Args[i], DstPtr: StackAddr, SrcPtr: Args[i].Regs[0], |
934 | DstPtrInfo: DstMPO, DstAlign, SrcPtrInfo: SrcMPO, SrcAlign, |
935 | MemSize, VA); |
936 | } |
937 | } else if (i == 0 && !ThisReturnRegs.empty() && |
938 | Handler.isIncomingArgumentHandler() && |
939 | isTypeIsValidForThisReturn(Ty: ValVT)) { |
940 | Handler.assignValueToReg(ValVReg: ArgReg, PhysReg: ThisReturnRegs[Part], VA); |
941 | } else if (Handler.isIncomingArgumentHandler()) { |
942 | Handler.assignValueToReg(ValVReg: ArgReg, PhysReg: VA.getLocReg(), VA); |
943 | } else { |
944 | DelayedOutgoingRegAssignments.emplace_back(Args: [=, &Handler]() { |
945 | Handler.assignValueToReg(ValVReg: ArgReg, PhysReg: VA.getLocReg(), VA); |
946 | }); |
947 | } |
948 | |
949 | // Finish the handling of indirect parameter passing when receiving |
950 | // the value (we are in the called function or the caller when receiving |
951 | // the return value). |
952 | if (VA.getLocInfo() == CCValAssign::Indirect && |
953 | Handler.isIncomingArgumentHandler()) { |
954 | Align Alignment = DL.getABITypeAlign(Ty: Args[i].Ty); |
955 | MachinePointerInfo MPO = MachinePointerInfo::getUnknownStack(MF); |
956 | |
957 | // Since we are doing indirect parameter passing, we know that the value |
958 | // in the temporary register is not the value passed to the function, |
959 | // but rather a pointer to that value. Let's load that value into the |
960 | // virtual register where the parameter should go. |
961 | MIRBuilder.buildLoad(Res: Args[i].OrigRegs[0], Addr: Args[i].Regs[0], PtrInfo: MPO, |
962 | Alignment); |
963 | |
964 | IndirectParameterPassingHandled = true; |
965 | } |
966 | |
967 | if (IndirectParameterPassingHandled) |
968 | break; |
969 | } |
970 | |
971 | // Now that all pieces have been assigned, re-pack the register typed values |
972 | // into the original value typed registers. This is only necessary, when |
973 | // the value was passed in multiple registers, not indirectly. |
974 | if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT && |
975 | !IndirectParameterPassingHandled) { |
976 | // Merge the split registers into the expected larger result vregs of |
977 | // the original call. |
978 | buildCopyFromRegs(B&: MIRBuilder, OrigRegs: Args[i].OrigRegs, Regs: Args[i].Regs, LLTy: OrigTy, |
979 | PartLLT: LocTy, Flags: Args[i].Flags[0]); |
980 | } |
981 | |
982 | j += NumParts - 1; |
983 | } |
984 | for (auto &Fn : DelayedOutgoingRegAssignments) |
985 | Fn(); |
986 | |
987 | return true; |
988 | } |
989 | |
990 | void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, |
991 | ArrayRef<Register> VRegs, Register DemoteReg, |
992 | int FI) const { |
993 | MachineFunction &MF = MIRBuilder.getMF(); |
994 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
995 | const DataLayout &DL = MF.getDataLayout(); |
996 | |
997 | SmallVector<EVT, 4> SplitVTs; |
998 | SmallVector<uint64_t, 4> Offsets; |
999 | ComputeValueVTs(TLI: *TLI, DL, Ty: RetTy, ValueVTs&: SplitVTs, FixedOffsets: &Offsets, StartingOffset: 0); |
1000 | |
1001 | assert(VRegs.size() == SplitVTs.size()); |
1002 | |
1003 | unsigned NumValues = SplitVTs.size(); |
1004 | Align BaseAlign = DL.getPrefTypeAlign(Ty: RetTy); |
1005 | Type *RetPtrTy = |
1006 | PointerType::get(C&: RetTy->getContext(), AddressSpace: DL.getAllocaAddrSpace()); |
1007 | LLT OffsetLLTy = getLLTForType(Ty&: *DL.getIndexType(PtrTy: RetPtrTy), DL); |
1008 | |
1009 | MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); |
1010 | |
1011 | for (unsigned I = 0; I < NumValues; ++I) { |
1012 | Register Addr; |
1013 | MIRBuilder.materializePtrAdd(Res&: Addr, Op0: DemoteReg, ValueTy: OffsetLLTy, Value: Offsets[I]); |
1014 | auto *MMO = MF.getMachineMemOperand(PtrInfo, f: MachineMemOperand::MOLoad, |
1015 | MemTy: MRI.getType(Reg: VRegs[I]), |
1016 | base_alignment: commonAlignment(A: BaseAlign, Offset: Offsets[I])); |
1017 | MIRBuilder.buildLoad(Res: VRegs[I], Addr, MMO&: *MMO); |
1018 | } |
1019 | } |
1020 | |
1021 | void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, |
1022 | ArrayRef<Register> VRegs, |
1023 | Register DemoteReg) const { |
1024 | MachineFunction &MF = MIRBuilder.getMF(); |
1025 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
1026 | const DataLayout &DL = MF.getDataLayout(); |
1027 | |
1028 | SmallVector<EVT, 4> SplitVTs; |
1029 | SmallVector<uint64_t, 4> Offsets; |
1030 | ComputeValueVTs(TLI: *TLI, DL, Ty: RetTy, ValueVTs&: SplitVTs, FixedOffsets: &Offsets, StartingOffset: 0); |
1031 | |
1032 | assert(VRegs.size() == SplitVTs.size()); |
1033 | |
1034 | unsigned NumValues = SplitVTs.size(); |
1035 | Align BaseAlign = DL.getPrefTypeAlign(Ty: RetTy); |
1036 | unsigned AS = DL.getAllocaAddrSpace(); |
1037 | LLT OffsetLLTy = getLLTForType(Ty&: *DL.getIndexType(PtrTy: RetTy->getPointerTo(AddrSpace: AS)), DL); |
1038 | |
1039 | MachinePointerInfo PtrInfo(AS); |
1040 | |
1041 | for (unsigned I = 0; I < NumValues; ++I) { |
1042 | Register Addr; |
1043 | MIRBuilder.materializePtrAdd(Res&: Addr, Op0: DemoteReg, ValueTy: OffsetLLTy, Value: Offsets[I]); |
1044 | auto *MMO = MF.getMachineMemOperand(PtrInfo, f: MachineMemOperand::MOStore, |
1045 | MemTy: MRI.getType(Reg: VRegs[I]), |
1046 | base_alignment: commonAlignment(A: BaseAlign, Offset: Offsets[I])); |
1047 | MIRBuilder.buildStore(Val: VRegs[I], Addr, MMO&: *MMO); |
1048 | } |
1049 | } |
1050 | |
1051 | void CallLowering::insertSRetIncomingArgument( |
1052 | const Function &F, SmallVectorImpl<ArgInfo> &SplitArgs, Register &DemoteReg, |
1053 | MachineRegisterInfo &MRI, const DataLayout &DL) const { |
1054 | unsigned AS = DL.getAllocaAddrSpace(); |
1055 | DemoteReg = MRI.createGenericVirtualRegister( |
1056 | Ty: LLT::pointer(AddressSpace: AS, SizeInBits: DL.getPointerSizeInBits(AS))); |
1057 | |
1058 | Type *PtrTy = PointerType::get(ElementType: F.getReturnType(), AddressSpace: AS); |
1059 | |
1060 | SmallVector<EVT, 1> ValueVTs; |
1061 | ComputeValueVTs(TLI: *TLI, DL, Ty: PtrTy, ValueVTs); |
1062 | |
1063 | // NOTE: Assume that a pointer won't get split into more than one VT. |
1064 | assert(ValueVTs.size() == 1); |
1065 | |
1066 | ArgInfo DemoteArg(DemoteReg, ValueVTs[0].getTypeForEVT(Context&: PtrTy->getContext()), |
1067 | ArgInfo::NoArgIndex); |
1068 | setArgFlags(Arg&: DemoteArg, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: F); |
1069 | DemoteArg.Flags[0].setSRet(); |
1070 | SplitArgs.insert(I: SplitArgs.begin(), Elt: DemoteArg); |
1071 | } |
1072 | |
1073 | void CallLowering::insertSRetOutgoingArgument(MachineIRBuilder &MIRBuilder, |
1074 | const CallBase &CB, |
1075 | CallLoweringInfo &Info) const { |
1076 | const DataLayout &DL = MIRBuilder.getDataLayout(); |
1077 | Type *RetTy = CB.getType(); |
1078 | unsigned AS = DL.getAllocaAddrSpace(); |
1079 | LLT FramePtrTy = LLT::pointer(AddressSpace: AS, SizeInBits: DL.getPointerSizeInBits(AS)); |
1080 | |
1081 | int FI = MIRBuilder.getMF().getFrameInfo().CreateStackObject( |
1082 | Size: DL.getTypeAllocSize(Ty: RetTy), Alignment: DL.getPrefTypeAlign(Ty: RetTy), isSpillSlot: false); |
1083 | |
1084 | Register DemoteReg = MIRBuilder.buildFrameIndex(Res: FramePtrTy, Idx: FI).getReg(Idx: 0); |
1085 | ArgInfo DemoteArg(DemoteReg, PointerType::get(ElementType: RetTy, AddressSpace: AS), |
1086 | ArgInfo::NoArgIndex); |
1087 | setArgFlags(Arg&: DemoteArg, OpIdx: AttributeList::ReturnIndex, DL, FuncInfo: CB); |
1088 | DemoteArg.Flags[0].setSRet(); |
1089 | |
1090 | Info.OrigArgs.insert(I: Info.OrigArgs.begin(), Elt: DemoteArg); |
1091 | Info.DemoteStackIndex = FI; |
1092 | Info.DemoteRegister = DemoteReg; |
1093 | } |
1094 | |
1095 | bool CallLowering::checkReturn(CCState &CCInfo, |
1096 | SmallVectorImpl<BaseArgInfo> &Outs, |
1097 | CCAssignFn *Fn) const { |
1098 | for (unsigned I = 0, E = Outs.size(); I < E; ++I) { |
1099 | MVT VT = MVT::getVT(Ty: Outs[I].Ty); |
1100 | if (Fn(I, VT, VT, CCValAssign::Full, Outs[I].Flags[0], CCInfo)) |
1101 | return false; |
1102 | } |
1103 | return true; |
1104 | } |
1105 | |
1106 | void CallLowering::getReturnInfo(CallingConv::ID CallConv, Type *RetTy, |
1107 | AttributeList Attrs, |
1108 | SmallVectorImpl<BaseArgInfo> &Outs, |
1109 | const DataLayout &DL) const { |
1110 | LLVMContext &Context = RetTy->getContext(); |
1111 | ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); |
1112 | |
1113 | SmallVector<EVT, 4> SplitVTs; |
1114 | ComputeValueVTs(TLI: *TLI, DL, Ty: RetTy, ValueVTs&: SplitVTs); |
1115 | addArgFlagsFromAttributes(Flags, Attrs, OpIdx: AttributeList::ReturnIndex); |
1116 | |
1117 | for (EVT VT : SplitVTs) { |
1118 | unsigned NumParts = |
1119 | TLI->getNumRegistersForCallingConv(Context, CC: CallConv, VT); |
1120 | MVT RegVT = TLI->getRegisterTypeForCallingConv(Context, CC: CallConv, VT); |
1121 | Type *PartTy = EVT(RegVT).getTypeForEVT(Context); |
1122 | |
1123 | for (unsigned I = 0; I < NumParts; ++I) { |
1124 | Outs.emplace_back(Args&: PartTy, Args&: Flags); |
1125 | } |
1126 | } |
1127 | } |
1128 | |
1129 | bool CallLowering::checkReturnTypeForCallConv(MachineFunction &MF) const { |
1130 | const auto &F = MF.getFunction(); |
1131 | Type *ReturnType = F.getReturnType(); |
1132 | CallingConv::ID CallConv = F.getCallingConv(); |
1133 | |
1134 | SmallVector<BaseArgInfo, 4> SplitArgs; |
1135 | getReturnInfo(CallConv, RetTy: ReturnType, Attrs: F.getAttributes(), Outs&: SplitArgs, |
1136 | DL: MF.getDataLayout()); |
1137 | return canLowerReturn(MF, CallConv, Outs&: SplitArgs, IsVarArg: F.isVarArg()); |
1138 | } |
1139 | |
1140 | bool CallLowering::parametersInCSRMatch( |
1141 | const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, |
1142 | const SmallVectorImpl<CCValAssign> &OutLocs, |
1143 | const SmallVectorImpl<ArgInfo> &OutArgs) const { |
1144 | for (unsigned i = 0; i < OutLocs.size(); ++i) { |
1145 | const auto &ArgLoc = OutLocs[i]; |
1146 | // If it's not a register, it's fine. |
1147 | if (!ArgLoc.isRegLoc()) |
1148 | continue; |
1149 | |
1150 | MCRegister PhysReg = ArgLoc.getLocReg(); |
1151 | |
1152 | // Only look at callee-saved registers. |
1153 | if (MachineOperand::clobbersPhysReg(RegMask: CallerPreservedMask, PhysReg)) |
1154 | continue; |
1155 | |
1156 | LLVM_DEBUG( |
1157 | dbgs() |
1158 | << "... Call has an argument passed in a callee-saved register.\n" ); |
1159 | |
1160 | // Check if it was copied from. |
1161 | const ArgInfo &OutInfo = OutArgs[i]; |
1162 | |
1163 | if (OutInfo.Regs.size() > 1) { |
1164 | LLVM_DEBUG( |
1165 | dbgs() << "... Cannot handle arguments in multiple registers.\n" ); |
1166 | return false; |
1167 | } |
1168 | |
1169 | // Check if we copy the register, walking through copies from virtual |
1170 | // registers. Note that getDefIgnoringCopies does not ignore copies from |
1171 | // physical registers. |
1172 | MachineInstr *RegDef = getDefIgnoringCopies(Reg: OutInfo.Regs[0], MRI); |
1173 | if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) { |
1174 | LLVM_DEBUG( |
1175 | dbgs() |
1176 | << "... Parameter was not copied into a VReg, cannot tail call.\n" ); |
1177 | return false; |
1178 | } |
1179 | |
1180 | // Got a copy. Verify that it's the same as the register we want. |
1181 | Register CopyRHS = RegDef->getOperand(i: 1).getReg(); |
1182 | if (CopyRHS != PhysReg) { |
1183 | LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into " |
1184 | "VReg, cannot tail call.\n" ); |
1185 | return false; |
1186 | } |
1187 | } |
1188 | |
1189 | return true; |
1190 | } |
1191 | |
1192 | bool CallLowering::resultsCompatible(CallLoweringInfo &Info, |
1193 | MachineFunction &MF, |
1194 | SmallVectorImpl<ArgInfo> &InArgs, |
1195 | ValueAssigner &CalleeAssigner, |
1196 | ValueAssigner &CallerAssigner) const { |
1197 | const Function &F = MF.getFunction(); |
1198 | CallingConv::ID CalleeCC = Info.CallConv; |
1199 | CallingConv::ID CallerCC = F.getCallingConv(); |
1200 | |
1201 | if (CallerCC == CalleeCC) |
1202 | return true; |
1203 | |
1204 | SmallVector<CCValAssign, 16> ArgLocs1; |
1205 | CCState CCInfo1(CalleeCC, Info.IsVarArg, MF, ArgLocs1, F.getContext()); |
1206 | if (!determineAssignments(Assigner&: CalleeAssigner, Args&: InArgs, CCInfo&: CCInfo1)) |
1207 | return false; |
1208 | |
1209 | SmallVector<CCValAssign, 16> ArgLocs2; |
1210 | CCState CCInfo2(CallerCC, F.isVarArg(), MF, ArgLocs2, F.getContext()); |
1211 | if (!determineAssignments(Assigner&: CallerAssigner, Args&: InArgs, CCInfo&: CCInfo2)) |
1212 | return false; |
1213 | |
1214 | // We need the argument locations to match up exactly. If there's more in |
1215 | // one than the other, then we are done. |
1216 | if (ArgLocs1.size() != ArgLocs2.size()) |
1217 | return false; |
1218 | |
1219 | // Make sure that each location is passed in exactly the same way. |
1220 | for (unsigned i = 0, e = ArgLocs1.size(); i < e; ++i) { |
1221 | const CCValAssign &Loc1 = ArgLocs1[i]; |
1222 | const CCValAssign &Loc2 = ArgLocs2[i]; |
1223 | |
1224 | // We need both of them to be the same. So if one is a register and one |
1225 | // isn't, we're done. |
1226 | if (Loc1.isRegLoc() != Loc2.isRegLoc()) |
1227 | return false; |
1228 | |
1229 | if (Loc1.isRegLoc()) { |
1230 | // If they don't have the same register location, we're done. |
1231 | if (Loc1.getLocReg() != Loc2.getLocReg()) |
1232 | return false; |
1233 | |
1234 | // They matched, so we can move to the next ArgLoc. |
1235 | continue; |
1236 | } |
1237 | |
1238 | // Loc1 wasn't a RegLoc, so they both must be MemLocs. Check if they match. |
1239 | if (Loc1.getLocMemOffset() != Loc2.getLocMemOffset()) |
1240 | return false; |
1241 | } |
1242 | |
1243 | return true; |
1244 | } |
1245 | |
1246 | LLT CallLowering::ValueHandler::getStackValueStoreType( |
1247 | const DataLayout &DL, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const { |
1248 | const MVT ValVT = VA.getValVT(); |
1249 | if (ValVT != MVT::iPTR) { |
1250 | LLT ValTy(ValVT); |
1251 | |
1252 | // We lost the pointeriness going through CCValAssign, so try to restore it |
1253 | // based on the flags. |
1254 | if (Flags.isPointer()) { |
1255 | LLT PtrTy = LLT::pointer(AddressSpace: Flags.getPointerAddrSpace(), |
1256 | SizeInBits: ValTy.getScalarSizeInBits()); |
1257 | if (ValVT.isVector()) |
1258 | return LLT::vector(EC: ValTy.getElementCount(), ScalarTy: PtrTy); |
1259 | return PtrTy; |
1260 | } |
1261 | |
1262 | return ValTy; |
1263 | } |
1264 | |
1265 | unsigned AddrSpace = Flags.getPointerAddrSpace(); |
1266 | return LLT::pointer(AddressSpace: AddrSpace, SizeInBits: DL.getPointerSize(AS: AddrSpace)); |
1267 | } |
1268 | |
1269 | void CallLowering::ValueHandler::copyArgumentMemory( |
1270 | const ArgInfo &Arg, Register DstPtr, Register SrcPtr, |
1271 | const MachinePointerInfo &DstPtrInfo, Align DstAlign, |
1272 | const MachinePointerInfo &SrcPtrInfo, Align SrcAlign, uint64_t MemSize, |
1273 | CCValAssign &VA) const { |
1274 | MachineFunction &MF = MIRBuilder.getMF(); |
1275 | MachineMemOperand *SrcMMO = MF.getMachineMemOperand( |
1276 | PtrInfo: SrcPtrInfo, |
1277 | F: MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable, Size: MemSize, |
1278 | BaseAlignment: SrcAlign); |
1279 | |
1280 | MachineMemOperand *DstMMO = MF.getMachineMemOperand( |
1281 | PtrInfo: DstPtrInfo, |
1282 | F: MachineMemOperand::MOStore | MachineMemOperand::MODereferenceable, |
1283 | Size: MemSize, BaseAlignment: DstAlign); |
1284 | |
1285 | const LLT PtrTy = MRI.getType(Reg: DstPtr); |
1286 | const LLT SizeTy = LLT::scalar(SizeInBits: PtrTy.getSizeInBits()); |
1287 | |
1288 | auto SizeConst = MIRBuilder.buildConstant(Res: SizeTy, Val: MemSize); |
1289 | MIRBuilder.buildMemCpy(DstPtr, SrcPtr, Size: SizeConst, DstMMO&: *DstMMO, SrcMMO&: *SrcMMO); |
1290 | } |
1291 | |
1292 | Register CallLowering::ValueHandler::extendRegister(Register ValReg, |
1293 | const CCValAssign &VA, |
1294 | unsigned MaxSizeBits) { |
1295 | LLT LocTy{VA.getLocVT()}; |
1296 | LLT ValTy{VA.getValVT()}; |
1297 | |
1298 | if (LocTy.getSizeInBits() == ValTy.getSizeInBits()) |
1299 | return ValReg; |
1300 | |
1301 | if (LocTy.isScalar() && MaxSizeBits && MaxSizeBits < LocTy.getSizeInBits()) { |
1302 | if (MaxSizeBits <= ValTy.getSizeInBits()) |
1303 | return ValReg; |
1304 | LocTy = LLT::scalar(SizeInBits: MaxSizeBits); |
1305 | } |
1306 | |
1307 | const LLT ValRegTy = MRI.getType(Reg: ValReg); |
1308 | if (ValRegTy.isPointer()) { |
1309 | // The x32 ABI wants to zero extend 32-bit pointers to 64-bit registers, so |
1310 | // we have to cast to do the extension. |
1311 | LLT IntPtrTy = LLT::scalar(SizeInBits: ValRegTy.getSizeInBits()); |
1312 | ValReg = MIRBuilder.buildPtrToInt(Dst: IntPtrTy, Src: ValReg).getReg(Idx: 0); |
1313 | } |
1314 | |
1315 | switch (VA.getLocInfo()) { |
1316 | default: |
1317 | break; |
1318 | case CCValAssign::Full: |
1319 | case CCValAssign::BCvt: |
1320 | // FIXME: bitconverting between vector types may or may not be a |
1321 | // nop in big-endian situations. |
1322 | return ValReg; |
1323 | case CCValAssign::AExt: { |
1324 | auto MIB = MIRBuilder.buildAnyExt(Res: LocTy, Op: ValReg); |
1325 | return MIB.getReg(Idx: 0); |
1326 | } |
1327 | case CCValAssign::SExt: { |
1328 | Register NewReg = MRI.createGenericVirtualRegister(Ty: LocTy); |
1329 | MIRBuilder.buildSExt(Res: NewReg, Op: ValReg); |
1330 | return NewReg; |
1331 | } |
1332 | case CCValAssign::ZExt: { |
1333 | Register NewReg = MRI.createGenericVirtualRegister(Ty: LocTy); |
1334 | MIRBuilder.buildZExt(Res: NewReg, Op: ValReg); |
1335 | return NewReg; |
1336 | } |
1337 | } |
1338 | llvm_unreachable("unable to extend register" ); |
1339 | } |
1340 | |
1341 | void CallLowering::ValueAssigner::anchor() {} |
1342 | |
1343 | Register CallLowering::IncomingValueHandler::buildExtensionHint( |
1344 | const CCValAssign &VA, Register SrcReg, LLT NarrowTy) { |
1345 | switch (VA.getLocInfo()) { |
1346 | case CCValAssign::LocInfo::ZExt: { |
1347 | return MIRBuilder |
1348 | .buildAssertZExt(Res: MRI.cloneVirtualRegister(VReg: SrcReg), Op: SrcReg, |
1349 | Size: NarrowTy.getScalarSizeInBits()) |
1350 | .getReg(Idx: 0); |
1351 | } |
1352 | case CCValAssign::LocInfo::SExt: { |
1353 | return MIRBuilder |
1354 | .buildAssertSExt(Res: MRI.cloneVirtualRegister(VReg: SrcReg), Op: SrcReg, |
1355 | Size: NarrowTy.getScalarSizeInBits()) |
1356 | .getReg(Idx: 0); |
1357 | break; |
1358 | } |
1359 | default: |
1360 | return SrcReg; |
1361 | } |
1362 | } |
1363 | |
1364 | /// Check if we can use a basic COPY instruction between the two types. |
1365 | /// |
1366 | /// We're currently building on top of the infrastructure using MVT, which loses |
1367 | /// pointer information in the CCValAssign. We accept copies from physical |
1368 | /// registers that have been reported as integers if it's to an equivalent sized |
1369 | /// pointer LLT. |
1370 | static bool isCopyCompatibleType(LLT SrcTy, LLT DstTy) { |
1371 | if (SrcTy == DstTy) |
1372 | return true; |
1373 | |
1374 | if (SrcTy.getSizeInBits() != DstTy.getSizeInBits()) |
1375 | return false; |
1376 | |
1377 | SrcTy = SrcTy.getScalarType(); |
1378 | DstTy = DstTy.getScalarType(); |
1379 | |
1380 | return (SrcTy.isPointer() && DstTy.isScalar()) || |
1381 | (DstTy.isPointer() && SrcTy.isScalar()); |
1382 | } |
1383 | |
1384 | void CallLowering::IncomingValueHandler::assignValueToReg( |
1385 | Register ValVReg, Register PhysReg, const CCValAssign &VA) { |
1386 | const MVT LocVT = VA.getLocVT(); |
1387 | const LLT LocTy(LocVT); |
1388 | const LLT RegTy = MRI.getType(Reg: ValVReg); |
1389 | |
1390 | if (isCopyCompatibleType(SrcTy: RegTy, DstTy: LocTy)) { |
1391 | MIRBuilder.buildCopy(Res: ValVReg, Op: PhysReg); |
1392 | return; |
1393 | } |
1394 | |
1395 | auto Copy = MIRBuilder.buildCopy(Res: LocTy, Op: PhysReg); |
1396 | auto Hint = buildExtensionHint(VA, SrcReg: Copy.getReg(Idx: 0), NarrowTy: RegTy); |
1397 | MIRBuilder.buildTrunc(Res: ValVReg, Op: Hint); |
1398 | } |
1399 | |