1 | //===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// This file implements the lowering of LLVM calls to DAG nodes. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "X86.h" |
15 | #include "X86CallingConv.h" |
16 | #include "X86FrameLowering.h" |
17 | #include "X86ISelLowering.h" |
18 | #include "X86InstrBuilder.h" |
19 | #include "X86MachineFunctionInfo.h" |
20 | #include "X86TargetMachine.h" |
21 | #include "X86TargetObjectFile.h" |
22 | #include "llvm/ADT/Statistic.h" |
23 | #include "llvm/Analysis/ObjCARCUtil.h" |
24 | #include "llvm/CodeGen/MachineJumpTableInfo.h" |
25 | #include "llvm/CodeGen/MachineModuleInfo.h" |
26 | #include "llvm/CodeGen/WinEHFuncInfo.h" |
27 | #include "llvm/IR/DiagnosticInfo.h" |
28 | #include "llvm/IR/IRBuilder.h" |
29 | #include "llvm/IR/Module.h" |
30 | |
31 | #define DEBUG_TYPE "x86-isel" |
32 | |
33 | using namespace llvm; |
34 | |
35 | STATISTIC(NumTailCalls, "Number of tail calls" ); |
36 | |
37 | /// Call this when the user attempts to do something unsupported, like |
38 | /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike |
39 | /// report_fatal_error, so calling code should attempt to recover without |
40 | /// crashing. |
41 | static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, |
42 | const char *Msg) { |
43 | MachineFunction &MF = DAG.getMachineFunction(); |
44 | DAG.getContext()->diagnose( |
45 | DI: DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc())); |
46 | } |
47 | |
48 | /// Returns true if a CC can dynamically exclude a register from the list of |
49 | /// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on |
50 | /// the return registers. |
51 | static bool shouldDisableRetRegFromCSR(CallingConv::ID CC) { |
52 | switch (CC) { |
53 | default: |
54 | return false; |
55 | case CallingConv::X86_RegCall: |
56 | case CallingConv::PreserveMost: |
57 | case CallingConv::PreserveAll: |
58 | return true; |
59 | } |
60 | } |
61 | |
62 | /// Returns true if a CC can dynamically exclude a register from the list of |
63 | /// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on |
64 | /// the parameters. |
65 | static bool shouldDisableArgRegFromCSR(CallingConv::ID CC) { |
66 | return CC == CallingConv::X86_RegCall; |
67 | } |
68 | |
69 | static std::pair<MVT, unsigned> |
70 | handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, |
71 | const X86Subtarget &Subtarget) { |
72 | // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling |
73 | // convention is one that uses k registers. |
74 | if (NumElts == 2) |
75 | return {MVT::v2i64, 1}; |
76 | if (NumElts == 4) |
77 | return {MVT::v4i32, 1}; |
78 | if (NumElts == 8 && CC != CallingConv::X86_RegCall && |
79 | CC != CallingConv::Intel_OCL_BI) |
80 | return {MVT::v8i16, 1}; |
81 | if (NumElts == 16 && CC != CallingConv::X86_RegCall && |
82 | CC != CallingConv::Intel_OCL_BI) |
83 | return {MVT::v16i8, 1}; |
84 | // v32i1 passes in ymm unless we have BWI and the calling convention is |
85 | // regcall. |
86 | if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall)) |
87 | return {MVT::v32i8, 1}; |
88 | // Split v64i1 vectors if we don't have v64i8 available. |
89 | if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) { |
90 | if (Subtarget.useAVX512Regs()) |
91 | return {MVT::v64i8, 1}; |
92 | return {MVT::v32i8, 2}; |
93 | } |
94 | |
95 | // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. |
96 | if (!isPowerOf2_32(Value: NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) || |
97 | NumElts > 64) |
98 | return {MVT::i8, NumElts}; |
99 | |
100 | return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0}; |
101 | } |
102 | |
103 | MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, |
104 | CallingConv::ID CC, |
105 | EVT VT) const { |
106 | if (VT.isVector()) { |
107 | if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) { |
108 | unsigned NumElts = VT.getVectorNumElements(); |
109 | |
110 | MVT RegisterVT; |
111 | unsigned NumRegisters; |
112 | std::tie(args&: RegisterVT, args&: NumRegisters) = |
113 | handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); |
114 | if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) |
115 | return RegisterVT; |
116 | } |
117 | |
118 | if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8) |
119 | return MVT::v8f16; |
120 | } |
121 | |
122 | // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled. |
123 | if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() && |
124 | !Subtarget.hasX87()) |
125 | return MVT::i32; |
126 | |
127 | if (VT.isVector() && VT.getVectorElementType() == MVT::bf16) |
128 | return getRegisterTypeForCallingConv(Context, CC, |
129 | VT: VT.changeVectorElementType(EltVT: MVT::f16)); |
130 | |
131 | if (VT == MVT::bf16) |
132 | return MVT::f16; |
133 | |
134 | return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); |
135 | } |
136 | |
137 | unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, |
138 | CallingConv::ID CC, |
139 | EVT VT) const { |
140 | if (VT.isVector()) { |
141 | if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) { |
142 | unsigned NumElts = VT.getVectorNumElements(); |
143 | |
144 | MVT RegisterVT; |
145 | unsigned NumRegisters; |
146 | std::tie(args&: RegisterVT, args&: NumRegisters) = |
147 | handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); |
148 | if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) |
149 | return NumRegisters; |
150 | } |
151 | |
152 | if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8) |
153 | return 1; |
154 | } |
155 | |
156 | // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if |
157 | // x87 is disabled. |
158 | if (!Subtarget.is64Bit() && !Subtarget.hasX87()) { |
159 | if (VT == MVT::f64) |
160 | return 2; |
161 | if (VT == MVT::f80) |
162 | return 3; |
163 | } |
164 | |
165 | if (VT.isVector() && VT.getVectorElementType() == MVT::bf16) |
166 | return getNumRegistersForCallingConv(Context, CC, |
167 | VT: VT.changeVectorElementType(EltVT: MVT::f16)); |
168 | |
169 | return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); |
170 | } |
171 | |
172 | unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv( |
173 | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
174 | unsigned &NumIntermediates, MVT &RegisterVT) const { |
175 | // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. |
176 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
177 | Subtarget.hasAVX512() && |
178 | (!isPowerOf2_32(Value: VT.getVectorNumElements()) || |
179 | (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) || |
180 | VT.getVectorNumElements() > 64)) { |
181 | RegisterVT = MVT::i8; |
182 | IntermediateVT = MVT::i1; |
183 | NumIntermediates = VT.getVectorNumElements(); |
184 | return NumIntermediates; |
185 | } |
186 | |
187 | // Split v64i1 vectors if we don't have v64i8 available. |
188 | if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && |
189 | CC != CallingConv::X86_RegCall) { |
190 | RegisterVT = MVT::v32i8; |
191 | IntermediateVT = MVT::v32i1; |
192 | NumIntermediates = 2; |
193 | return 2; |
194 | } |
195 | |
196 | // Split vNbf16 vectors according to vNf16. |
197 | if (VT.isVector() && VT.getVectorElementType() == MVT::bf16) |
198 | VT = VT.changeVectorElementType(EltVT: MVT::f16); |
199 | |
200 | return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT, |
201 | NumIntermediates, RegisterVT); |
202 | } |
203 | |
204 | EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, |
205 | LLVMContext& Context, |
206 | EVT VT) const { |
207 | if (!VT.isVector()) |
208 | return MVT::i8; |
209 | |
210 | if (Subtarget.hasAVX512()) { |
211 | // Figure out what this type will be legalized to. |
212 | EVT LegalVT = VT; |
213 | while (getTypeAction(Context, VT: LegalVT) != TypeLegal) |
214 | LegalVT = getTypeToTransformTo(Context, VT: LegalVT); |
215 | |
216 | // If we got a 512-bit vector then we'll definitely have a vXi1 compare. |
217 | if (LegalVT.getSimpleVT().is512BitVector()) |
218 | return EVT::getVectorVT(Context, VT: MVT::i1, EC: VT.getVectorElementCount()); |
219 | |
220 | if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) { |
221 | // If we legalized to less than a 512-bit vector, then we will use a vXi1 |
222 | // compare for vXi32/vXi64 for sure. If we have BWI we will also support |
223 | // vXi16/vXi8. |
224 | MVT EltVT = LegalVT.getSimpleVT().getVectorElementType(); |
225 | if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32) |
226 | return EVT::getVectorVT(Context, VT: MVT::i1, EC: VT.getVectorElementCount()); |
227 | } |
228 | } |
229 | |
230 | return VT.changeVectorElementTypeToInteger(); |
231 | } |
232 | |
233 | /// Helper for getByValTypeAlignment to determine |
234 | /// the desired ByVal argument alignment. |
235 | static void getMaxByValAlign(Type *Ty, Align &MaxAlign) { |
236 | if (MaxAlign == 16) |
237 | return; |
238 | if (VectorType *VTy = dyn_cast<VectorType>(Val: Ty)) { |
239 | if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128) |
240 | MaxAlign = Align(16); |
241 | } else if (ArrayType *ATy = dyn_cast<ArrayType>(Val: Ty)) { |
242 | Align EltAlign; |
243 | getMaxByValAlign(Ty: ATy->getElementType(), MaxAlign&: EltAlign); |
244 | if (EltAlign > MaxAlign) |
245 | MaxAlign = EltAlign; |
246 | } else if (StructType *STy = dyn_cast<StructType>(Val: Ty)) { |
247 | for (auto *EltTy : STy->elements()) { |
248 | Align EltAlign; |
249 | getMaxByValAlign(Ty: EltTy, MaxAlign&: EltAlign); |
250 | if (EltAlign > MaxAlign) |
251 | MaxAlign = EltAlign; |
252 | if (MaxAlign == 16) |
253 | break; |
254 | } |
255 | } |
256 | } |
257 | |
258 | /// Return the desired alignment for ByVal aggregate |
259 | /// function arguments in the caller parameter area. For X86, aggregates |
260 | /// that contain SSE vectors are placed at 16-byte boundaries while the rest |
261 | /// are at 4-byte boundaries. |
262 | uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty, |
263 | const DataLayout &DL) const { |
264 | if (Subtarget.is64Bit()) { |
265 | // Max of 8 and alignment of type. |
266 | Align TyAlign = DL.getABITypeAlign(Ty); |
267 | if (TyAlign > 8) |
268 | return TyAlign.value(); |
269 | return 8; |
270 | } |
271 | |
272 | Align Alignment(4); |
273 | if (Subtarget.hasSSE1()) |
274 | getMaxByValAlign(Ty, MaxAlign&: Alignment); |
275 | return Alignment.value(); |
276 | } |
277 | |
278 | /// It returns EVT::Other if the type should be determined using generic |
279 | /// target-independent logic. |
280 | /// For vector ops we check that the overall size isn't larger than our |
281 | /// preferred vector width. |
282 | EVT X86TargetLowering::getOptimalMemOpType( |
283 | const MemOp &Op, const AttributeList &FuncAttributes) const { |
284 | if (!FuncAttributes.hasFnAttr(Kind: Attribute::NoImplicitFloat)) { |
285 | if (Op.size() >= 16 && |
286 | (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(AlignCheck: Align(16)))) { |
287 | // FIXME: Check if unaligned 64-byte accesses are slow. |
288 | if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() && |
289 | (Subtarget.getPreferVectorWidth() >= 512)) { |
290 | return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32; |
291 | } |
292 | // FIXME: Check if unaligned 32-byte accesses are slow. |
293 | if (Op.size() >= 32 && Subtarget.hasAVX() && |
294 | Subtarget.useLight256BitInstructions()) { |
295 | // Although this isn't a well-supported type for AVX1, we'll let |
296 | // legalization and shuffle lowering produce the optimal codegen. If we |
297 | // choose an optimal type with a vector element larger than a byte, |
298 | // getMemsetStores() may create an intermediate splat (using an integer |
299 | // multiply) before we splat as a vector. |
300 | return MVT::v32i8; |
301 | } |
302 | if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128)) |
303 | return MVT::v16i8; |
304 | // TODO: Can SSE1 handle a byte vector? |
305 | // If we have SSE1 registers we should be able to use them. |
306 | if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) && |
307 | (Subtarget.getPreferVectorWidth() >= 128)) |
308 | return MVT::v4f32; |
309 | } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) && |
310 | Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) { |
311 | // Do not use f64 to lower memcpy if source is string constant. It's |
312 | // better to use i32 to avoid the loads. |
313 | // Also, do not use f64 to lower memset unless this is a memset of zeros. |
314 | // The gymnastics of splatting a byte value into an XMM register and then |
315 | // only using 8-byte stores (because this is a CPU with slow unaligned |
316 | // 16-byte accesses) makes that a loser. |
317 | return MVT::f64; |
318 | } |
319 | } |
320 | // This is a compromise. If we reach here, unaligned accesses may be slow on |
321 | // this target. However, creating smaller, aligned accesses could be even |
322 | // slower and would certainly be a lot more code. |
323 | if (Subtarget.is64Bit() && Op.size() >= 8) |
324 | return MVT::i64; |
325 | return MVT::i32; |
326 | } |
327 | |
328 | bool X86TargetLowering::isSafeMemOpType(MVT VT) const { |
329 | if (VT == MVT::f32) |
330 | return Subtarget.hasSSE1(); |
331 | if (VT == MVT::f64) |
332 | return Subtarget.hasSSE2(); |
333 | return true; |
334 | } |
335 | |
336 | static bool isBitAligned(Align Alignment, uint64_t SizeInBits) { |
337 | return (8 * Alignment.value()) % SizeInBits == 0; |
338 | } |
339 | |
340 | bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const { |
341 | if (isBitAligned(Alignment, SizeInBits: VT.getSizeInBits())) |
342 | return true; |
343 | switch (VT.getSizeInBits()) { |
344 | default: |
345 | // 8-byte and under are always assumed to be fast. |
346 | return true; |
347 | case 128: |
348 | return !Subtarget.isUnalignedMem16Slow(); |
349 | case 256: |
350 | return !Subtarget.isUnalignedMem32Slow(); |
351 | // TODO: What about AVX-512 (512-bit) accesses? |
352 | } |
353 | } |
354 | |
355 | bool X86TargetLowering::allowsMisalignedMemoryAccesses( |
356 | EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags, |
357 | unsigned *Fast) const { |
358 | if (Fast) |
359 | *Fast = isMemoryAccessFast(VT, Alignment); |
360 | // NonTemporal vector memory ops must be aligned. |
361 | if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) { |
362 | // NT loads can only be vector aligned, so if its less aligned than the |
363 | // minimum vector size (which we can split the vector down to), we might as |
364 | // well use a regular unaligned vector load. |
365 | // We don't have any NT loads pre-SSE41. |
366 | if (!!(Flags & MachineMemOperand::MOLoad)) |
367 | return (Alignment < 16 || !Subtarget.hasSSE41()); |
368 | return false; |
369 | } |
370 | // Misaligned accesses of any size are always allowed. |
371 | return true; |
372 | } |
373 | |
374 | bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context, |
375 | const DataLayout &DL, EVT VT, |
376 | unsigned AddrSpace, Align Alignment, |
377 | MachineMemOperand::Flags Flags, |
378 | unsigned *Fast) const { |
379 | if (Fast) |
380 | *Fast = isMemoryAccessFast(VT, Alignment); |
381 | if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) { |
382 | if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, |
383 | /*Fast=*/nullptr)) |
384 | return true; |
385 | // NonTemporal vector memory ops are special, and must be aligned. |
386 | if (!isBitAligned(Alignment, SizeInBits: VT.getSizeInBits())) |
387 | return false; |
388 | switch (VT.getSizeInBits()) { |
389 | case 128: |
390 | if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41()) |
391 | return true; |
392 | if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2()) |
393 | return true; |
394 | return false; |
395 | case 256: |
396 | if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2()) |
397 | return true; |
398 | if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX()) |
399 | return true; |
400 | return false; |
401 | case 512: |
402 | if (Subtarget.hasAVX512() && Subtarget.hasEVEX512()) |
403 | return true; |
404 | return false; |
405 | default: |
406 | return false; // Don't have NonTemporal vector memory ops of this size. |
407 | } |
408 | } |
409 | return true; |
410 | } |
411 | |
412 | /// Return the entry encoding for a jump table in the |
413 | /// current function. The returned value is a member of the |
414 | /// MachineJumpTableInfo::JTEntryKind enum. |
415 | unsigned X86TargetLowering::getJumpTableEncoding() const { |
416 | // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF |
417 | // symbol. |
418 | if (isPositionIndependent() && Subtarget.isPICStyleGOT()) |
419 | return MachineJumpTableInfo::EK_Custom32; |
420 | if (isPositionIndependent() && |
421 | getTargetMachine().getCodeModel() == CodeModel::Large && |
422 | !Subtarget.isTargetCOFF()) |
423 | return MachineJumpTableInfo::EK_LabelDifference64; |
424 | |
425 | // Otherwise, use the normal jump table encoding heuristics. |
426 | return TargetLowering::getJumpTableEncoding(); |
427 | } |
428 | |
429 | bool X86TargetLowering::useSoftFloat() const { |
430 | return Subtarget.useSoftFloat(); |
431 | } |
432 | |
433 | void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC, |
434 | ArgListTy &Args) const { |
435 | |
436 | // Only relabel X86-32 for C / Stdcall CCs. |
437 | if (Subtarget.is64Bit()) |
438 | return; |
439 | if (CC != CallingConv::C && CC != CallingConv::X86_StdCall) |
440 | return; |
441 | unsigned ParamRegs = 0; |
442 | if (auto *M = MF->getFunction().getParent()) |
443 | ParamRegs = M->getNumberRegisterParameters(); |
444 | |
445 | // Mark the first N int arguments as having reg |
446 | for (auto &Arg : Args) { |
447 | Type *T = Arg.Ty; |
448 | if (T->isIntOrPtrTy()) |
449 | if (MF->getDataLayout().getTypeAllocSize(Ty: T) <= 8) { |
450 | unsigned numRegs = 1; |
451 | if (MF->getDataLayout().getTypeAllocSize(Ty: T) > 4) |
452 | numRegs = 2; |
453 | if (ParamRegs < numRegs) |
454 | return; |
455 | ParamRegs -= numRegs; |
456 | Arg.IsInReg = true; |
457 | } |
458 | } |
459 | } |
460 | |
461 | const MCExpr * |
462 | X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, |
463 | const MachineBasicBlock *MBB, |
464 | unsigned uid,MCContext &Ctx) const{ |
465 | assert(isPositionIndependent() && Subtarget.isPICStyleGOT()); |
466 | // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF |
467 | // entries. |
468 | return MCSymbolRefExpr::create(Symbol: MBB->getSymbol(), |
469 | Kind: MCSymbolRefExpr::VK_GOTOFF, Ctx); |
470 | } |
471 | |
472 | /// Returns relocation base for the given PIC jumptable. |
473 | SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, |
474 | SelectionDAG &DAG) const { |
475 | if (!Subtarget.is64Bit()) |
476 | // This doesn't have SDLoc associated with it, but is not really the |
477 | // same as a Register. |
478 | return DAG.getNode(Opcode: X86ISD::GlobalBaseReg, DL: SDLoc(), |
479 | VT: getPointerTy(DL: DAG.getDataLayout())); |
480 | return Table; |
481 | } |
482 | |
483 | /// This returns the relocation base for the given PIC jumptable, |
484 | /// the same as getPICJumpTableRelocBase, but as an MCExpr. |
485 | const MCExpr *X86TargetLowering:: |
486 | getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, |
487 | MCContext &Ctx) const { |
488 | // X86-64 uses RIP relative addressing based on the jump table label. |
489 | if (Subtarget.isPICStyleRIPRel() || |
490 | (Subtarget.is64Bit() && |
491 | getTargetMachine().getCodeModel() == CodeModel::Large)) |
492 | return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); |
493 | |
494 | // Otherwise, the reference is relative to the PIC base. |
495 | return MCSymbolRefExpr::create(Symbol: MF->getPICBaseSymbol(), Ctx); |
496 | } |
497 | |
498 | std::pair<const TargetRegisterClass *, uint8_t> |
499 | X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, |
500 | MVT VT) const { |
501 | const TargetRegisterClass *RRC = nullptr; |
502 | uint8_t Cost = 1; |
503 | switch (VT.SimpleTy) { |
504 | default: |
505 | return TargetLowering::findRepresentativeClass(TRI, VT); |
506 | case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: |
507 | RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass; |
508 | break; |
509 | case MVT::x86mmx: |
510 | RRC = &X86::VR64RegClass; |
511 | break; |
512 | case MVT::f32: case MVT::f64: |
513 | case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: |
514 | case MVT::v4f32: case MVT::v2f64: |
515 | case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64: |
516 | case MVT::v8f32: case MVT::v4f64: |
517 | case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64: |
518 | case MVT::v16f32: case MVT::v8f64: |
519 | RRC = &X86::VR128XRegClass; |
520 | break; |
521 | } |
522 | return std::make_pair(x&: RRC, y&: Cost); |
523 | } |
524 | |
525 | unsigned X86TargetLowering::getAddressSpace() const { |
526 | if (Subtarget.is64Bit()) |
527 | return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257; |
528 | return 256; |
529 | } |
530 | |
531 | static bool hasStackGuardSlotTLS(const Triple &TargetTriple) { |
532 | return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() || |
533 | (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(Major: 17)); |
534 | } |
535 | |
536 | static Constant* SegmentOffset(IRBuilderBase &IRB, |
537 | int Offset, unsigned AddressSpace) { |
538 | return ConstantExpr::getIntToPtr( |
539 | C: ConstantInt::get(Ty: Type::getInt32Ty(C&: IRB.getContext()), V: Offset), |
540 | Ty: IRB.getPtrTy(AddrSpace: AddressSpace)); |
541 | } |
542 | |
543 | Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { |
544 | // glibc, bionic, and Fuchsia have a special slot for the stack guard in |
545 | // tcbhead_t; use it instead of the usual global variable (see |
546 | // sysdeps/{i386,x86_64}/nptl/tls.h) |
547 | if (hasStackGuardSlotTLS(TargetTriple: Subtarget.getTargetTriple())) { |
548 | unsigned AddressSpace = getAddressSpace(); |
549 | |
550 | // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value. |
551 | if (Subtarget.isTargetFuchsia()) |
552 | return SegmentOffset(IRB, Offset: 0x10, AddressSpace); |
553 | |
554 | Module *M = IRB.GetInsertBlock()->getParent()->getParent(); |
555 | // Specially, some users may customize the base reg and offset. |
556 | int Offset = M->getStackProtectorGuardOffset(); |
557 | // If we don't set -stack-protector-guard-offset value: |
558 | // %fs:0x28, unless we're using a Kernel code model, in which case |
559 | // it's %gs:0x28. gs:0x14 on i386. |
560 | if (Offset == INT_MAX) |
561 | Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14; |
562 | |
563 | StringRef GuardReg = M->getStackProtectorGuardReg(); |
564 | if (GuardReg == "fs" ) |
565 | AddressSpace = X86AS::FS; |
566 | else if (GuardReg == "gs" ) |
567 | AddressSpace = X86AS::GS; |
568 | |
569 | // Use symbol guard if user specify. |
570 | StringRef GuardSymb = M->getStackProtectorGuardSymbol(); |
571 | if (!GuardSymb.empty()) { |
572 | GlobalVariable *GV = M->getGlobalVariable(Name: GuardSymb); |
573 | if (!GV) { |
574 | Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(C&: M->getContext()) |
575 | : Type::getInt32Ty(C&: M->getContext()); |
576 | GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage, |
577 | nullptr, GuardSymb, nullptr, |
578 | GlobalValue::NotThreadLocal, AddressSpace); |
579 | if (!Subtarget.isTargetDarwin()) |
580 | GV->setDSOLocal(M->getDirectAccessExternalData()); |
581 | } |
582 | return GV; |
583 | } |
584 | |
585 | return SegmentOffset(IRB, Offset, AddressSpace); |
586 | } |
587 | return TargetLowering::getIRStackGuard(IRB); |
588 | } |
589 | |
590 | void X86TargetLowering::insertSSPDeclarations(Module &M) const { |
591 | // MSVC CRT provides functionalities for stack protection. |
592 | if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || |
593 | Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { |
594 | // MSVC CRT has a global variable holding security cookie. |
595 | M.getOrInsertGlobal(Name: "__security_cookie" , |
596 | Ty: PointerType::getUnqual(C&: M.getContext())); |
597 | |
598 | // MSVC CRT has a function to validate security cookie. |
599 | FunctionCallee SecurityCheckCookie = M.getOrInsertFunction( |
600 | Name: "__security_check_cookie" , RetTy: Type::getVoidTy(C&: M.getContext()), |
601 | Args: PointerType::getUnqual(C&: M.getContext())); |
602 | if (Function *F = dyn_cast<Function>(Val: SecurityCheckCookie.getCallee())) { |
603 | F->setCallingConv(CallingConv::X86_FastCall); |
604 | F->addParamAttr(ArgNo: 0, Kind: Attribute::AttrKind::InReg); |
605 | } |
606 | return; |
607 | } |
608 | |
609 | StringRef GuardMode = M.getStackProtectorGuard(); |
610 | |
611 | // glibc, bionic, and Fuchsia have a special slot for the stack guard. |
612 | if ((GuardMode == "tls" || GuardMode.empty()) && |
613 | hasStackGuardSlotTLS(TargetTriple: Subtarget.getTargetTriple())) |
614 | return; |
615 | TargetLowering::insertSSPDeclarations(M); |
616 | } |
617 | |
618 | Value *X86TargetLowering::getSDagStackGuard(const Module &M) const { |
619 | // MSVC CRT has a global variable holding security cookie. |
620 | if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || |
621 | Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { |
622 | return M.getGlobalVariable(Name: "__security_cookie" ); |
623 | } |
624 | return TargetLowering::getSDagStackGuard(M); |
625 | } |
626 | |
627 | Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const { |
628 | // MSVC CRT has a function to validate security cookie. |
629 | if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || |
630 | Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { |
631 | return M.getFunction(Name: "__security_check_cookie" ); |
632 | } |
633 | return TargetLowering::getSSPStackGuardCheck(M); |
634 | } |
635 | |
636 | Value * |
637 | X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const { |
638 | // Android provides a fixed TLS slot for the SafeStack pointer. See the |
639 | // definition of TLS_SLOT_SAFESTACK in |
640 | // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h |
641 | if (Subtarget.isTargetAndroid()) { |
642 | // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs: |
643 | // %gs:0x24 on i386 |
644 | int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24; |
645 | return SegmentOffset(IRB, Offset, AddressSpace: getAddressSpace()); |
646 | } |
647 | |
648 | // Fuchsia is similar. |
649 | if (Subtarget.isTargetFuchsia()) { |
650 | // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value. |
651 | return SegmentOffset(IRB, Offset: 0x18, AddressSpace: getAddressSpace()); |
652 | } |
653 | |
654 | return TargetLowering::getSafeStackPointerLocation(IRB); |
655 | } |
656 | |
657 | //===----------------------------------------------------------------------===// |
658 | // Return Value Calling Convention Implementation |
659 | //===----------------------------------------------------------------------===// |
660 | |
661 | bool X86TargetLowering::CanLowerReturn( |
662 | CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, |
663 | const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { |
664 | SmallVector<CCValAssign, 16> RVLocs; |
665 | CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); |
666 | return CCInfo.CheckReturn(Outs, Fn: RetCC_X86); |
667 | } |
668 | |
669 | const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const { |
670 | static const MCPhysReg ScratchRegs[] = { X86::R11, 0 }; |
671 | return ScratchRegs; |
672 | } |
673 | |
674 | ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const { |
675 | static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR}; |
676 | return RCRegs; |
677 | } |
678 | |
679 | /// Lowers masks values (v*i1) to the local register values |
680 | /// \returns DAG node after lowering to register type |
681 | static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, |
682 | const SDLoc &DL, SelectionDAG &DAG) { |
683 | EVT ValVT = ValArg.getValueType(); |
684 | |
685 | if (ValVT == MVT::v1i1) |
686 | return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: ValLoc, N1: ValArg, |
687 | N2: DAG.getIntPtrConstant(Val: 0, DL)); |
688 | |
689 | if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) || |
690 | (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) { |
691 | // Two stage lowering might be required |
692 | // bitcast: v8i1 -> i8 / v16i1 -> i16 |
693 | // anyextend: i8 -> i32 / i16 -> i32 |
694 | EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16; |
695 | SDValue ValToCopy = DAG.getBitcast(VT: TempValLoc, V: ValArg); |
696 | if (ValLoc == MVT::i32) |
697 | ValToCopy = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ValLoc, Operand: ValToCopy); |
698 | return ValToCopy; |
699 | } |
700 | |
701 | if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) || |
702 | (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) { |
703 | // One stage lowering is required |
704 | // bitcast: v32i1 -> i32 / v64i1 -> i64 |
705 | return DAG.getBitcast(VT: ValLoc, V: ValArg); |
706 | } |
707 | |
708 | return DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ValLoc, Operand: ValArg); |
709 | } |
710 | |
711 | /// Breaks v64i1 value into two registers and adds the new node to the DAG |
712 | static void Passv64i1ArgInRegs( |
713 | const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg, |
714 | SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA, |
715 | CCValAssign &NextVA, const X86Subtarget &Subtarget) { |
716 | assert(Subtarget.hasBWI() && "Expected AVX512BW target!" ); |
717 | assert(Subtarget.is32Bit() && "Expecting 32 bit target" ); |
718 | assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value" ); |
719 | assert(VA.isRegLoc() && NextVA.isRegLoc() && |
720 | "The value should reside in two registers" ); |
721 | |
722 | // Before splitting the value we cast it to i64 |
723 | Arg = DAG.getBitcast(VT: MVT::i64, V: Arg); |
724 | |
725 | // Splitting the value into two i32 types |
726 | SDValue Lo, Hi; |
727 | std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: Arg, DL, LoVT: MVT::i32, HiVT: MVT::i32); |
728 | |
729 | // Attach the two i32 types into corresponding registers |
730 | RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Lo)); |
731 | RegsToPass.push_back(Elt: std::make_pair(x: NextVA.getLocReg(), y&: Hi)); |
732 | } |
733 | |
734 | SDValue |
735 | X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, |
736 | bool isVarArg, |
737 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
738 | const SmallVectorImpl<SDValue> &OutVals, |
739 | const SDLoc &dl, SelectionDAG &DAG) const { |
740 | MachineFunction &MF = DAG.getMachineFunction(); |
741 | X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); |
742 | |
743 | // In some cases we need to disable registers from the default CSR list. |
744 | // For example, when they are used as return registers (preserve_* and X86's |
745 | // regcall) or for argument passing (X86's regcall). |
746 | bool ShouldDisableCalleeSavedRegister = |
747 | shouldDisableRetRegFromCSR(CC: CallConv) || |
748 | MF.getFunction().hasFnAttribute(Kind: "no_caller_saved_registers" ); |
749 | |
750 | if (CallConv == CallingConv::X86_INTR && !Outs.empty()) |
751 | report_fatal_error(reason: "X86 interrupts may not return any value" ); |
752 | |
753 | SmallVector<CCValAssign, 16> RVLocs; |
754 | CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext()); |
755 | CCInfo.AnalyzeReturn(Outs, Fn: RetCC_X86); |
756 | |
757 | SmallVector<std::pair<Register, SDValue>, 4> RetVals; |
758 | for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E; |
759 | ++I, ++OutsIndex) { |
760 | CCValAssign &VA = RVLocs[I]; |
761 | assert(VA.isRegLoc() && "Can only return in registers!" ); |
762 | |
763 | // Add the register to the CalleeSaveDisableRegs list. |
764 | if (ShouldDisableCalleeSavedRegister) |
765 | MF.getRegInfo().disableCalleeSavedRegister(Reg: VA.getLocReg()); |
766 | |
767 | SDValue ValToCopy = OutVals[OutsIndex]; |
768 | EVT ValVT = ValToCopy.getValueType(); |
769 | |
770 | // Promote values to the appropriate types. |
771 | if (VA.getLocInfo() == CCValAssign::SExt) |
772 | ValToCopy = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: ValToCopy); |
773 | else if (VA.getLocInfo() == CCValAssign::ZExt) |
774 | ValToCopy = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: ValToCopy); |
775 | else if (VA.getLocInfo() == CCValAssign::AExt) { |
776 | if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) |
777 | ValToCopy = lowerMasksToReg(ValArg: ValToCopy, ValLoc: VA.getLocVT(), DL: dl, DAG); |
778 | else |
779 | ValToCopy = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: ValToCopy); |
780 | } |
781 | else if (VA.getLocInfo() == CCValAssign::BCvt) |
782 | ValToCopy = DAG.getBitcast(VT: VA.getLocVT(), V: ValToCopy); |
783 | |
784 | assert(VA.getLocInfo() != CCValAssign::FPExt && |
785 | "Unexpected FP-extend for return value." ); |
786 | |
787 | // Report an error if we have attempted to return a value via an XMM |
788 | // register and SSE was disabled. |
789 | if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(Reg: VA.getLocReg())) { |
790 | errorUnsupported(DAG, dl, Msg: "SSE register return with SSE disabled" ); |
791 | VA.convertToReg(RegNo: X86::FP0); // Set reg to FP0, avoid hitting asserts. |
792 | } else if (!Subtarget.hasSSE2() && |
793 | X86::FR64XRegClass.contains(Reg: VA.getLocReg()) && |
794 | ValVT == MVT::f64) { |
795 | // When returning a double via an XMM register, report an error if SSE2 is |
796 | // not enabled. |
797 | errorUnsupported(DAG, dl, Msg: "SSE2 register return with SSE2 disabled" ); |
798 | VA.convertToReg(RegNo: X86::FP0); // Set reg to FP0, avoid hitting asserts. |
799 | } |
800 | |
801 | // Returns in ST0/ST1 are handled specially: these are pushed as operands to |
802 | // the RET instruction and handled by the FP Stackifier. |
803 | if (VA.getLocReg() == X86::FP0 || |
804 | VA.getLocReg() == X86::FP1) { |
805 | // If this is a copy from an xmm register to ST(0), use an FPExtend to |
806 | // change the value to the FP stack register class. |
807 | if (isScalarFPTypeInSSEReg(VT: VA.getValVT())) |
808 | ValToCopy = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f80, Operand: ValToCopy); |
809 | RetVals.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ValToCopy)); |
810 | // Don't emit a copytoreg. |
811 | continue; |
812 | } |
813 | |
814 | // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64 |
815 | // which is returned in RAX / RDX. |
816 | if (Subtarget.is64Bit()) { |
817 | if (ValVT == MVT::x86mmx) { |
818 | if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { |
819 | ValToCopy = DAG.getBitcast(VT: MVT::i64, V: ValToCopy); |
820 | ValToCopy = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL: dl, VT: MVT::v2i64, |
821 | Operand: ValToCopy); |
822 | // If we don't have SSE2 available, convert to v4f32 so the generated |
823 | // register is legal. |
824 | if (!Subtarget.hasSSE2()) |
825 | ValToCopy = DAG.getBitcast(VT: MVT::v4f32, V: ValToCopy); |
826 | } |
827 | } |
828 | } |
829 | |
830 | if (VA.needsCustom()) { |
831 | assert(VA.getValVT() == MVT::v64i1 && |
832 | "Currently the only custom case is when we split v64i1 to 2 regs" ); |
833 | |
834 | Passv64i1ArgInRegs(DL: dl, DAG, Arg&: ValToCopy, RegsToPass&: RetVals, VA, NextVA&: RVLocs[++I], |
835 | Subtarget); |
836 | |
837 | // Add the second register to the CalleeSaveDisableRegs list. |
838 | if (ShouldDisableCalleeSavedRegister) |
839 | MF.getRegInfo().disableCalleeSavedRegister(Reg: RVLocs[I].getLocReg()); |
840 | } else { |
841 | RetVals.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ValToCopy)); |
842 | } |
843 | } |
844 | |
845 | SDValue Glue; |
846 | SmallVector<SDValue, 6> RetOps; |
847 | RetOps.push_back(Elt: Chain); // Operand #0 = Chain (updated below) |
848 | // Operand #1 = Bytes To Pop |
849 | RetOps.push_back(Elt: DAG.getTargetConstant(Val: FuncInfo->getBytesToPopOnReturn(), DL: dl, |
850 | VT: MVT::i32)); |
851 | |
852 | // Copy the result values into the output registers. |
853 | for (auto &RetVal : RetVals) { |
854 | if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) { |
855 | RetOps.push_back(Elt: RetVal.second); |
856 | continue; // Don't emit a copytoreg. |
857 | } |
858 | |
859 | Chain = DAG.getCopyToReg(Chain, dl, Reg: RetVal.first, N: RetVal.second, Glue); |
860 | Glue = Chain.getValue(R: 1); |
861 | RetOps.push_back( |
862 | Elt: DAG.getRegister(Reg: RetVal.first, VT: RetVal.second.getValueType())); |
863 | } |
864 | |
865 | // Swift calling convention does not require we copy the sret argument |
866 | // into %rax/%eax for the return, and SRetReturnReg is not set for Swift. |
867 | |
868 | // All x86 ABIs require that for returning structs by value we copy |
869 | // the sret argument into %rax/%eax (depending on ABI) for the return. |
870 | // We saved the argument into a virtual register in the entry block, |
871 | // so now we copy the value out and into %rax/%eax. |
872 | // |
873 | // Checking Function.hasStructRetAttr() here is insufficient because the IR |
874 | // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is |
875 | // false, then an sret argument may be implicitly inserted in the SelDAG. In |
876 | // either case FuncInfo->setSRetReturnReg() will have been called. |
877 | if (Register SRetReg = FuncInfo->getSRetReturnReg()) { |
878 | // When we have both sret and another return value, we should use the |
879 | // original Chain stored in RetOps[0], instead of the current Chain updated |
880 | // in the above loop. If we only have sret, RetOps[0] equals to Chain. |
881 | |
882 | // For the case of sret and another return value, we have |
883 | // Chain_0 at the function entry |
884 | // Chain_1 = getCopyToReg(Chain_0) in the above loop |
885 | // If we use Chain_1 in getCopyFromReg, we will have |
886 | // Val = getCopyFromReg(Chain_1) |
887 | // Chain_2 = getCopyToReg(Chain_1, Val) from below |
888 | |
889 | // getCopyToReg(Chain_0) will be glued together with |
890 | // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be |
891 | // in Unit B, and we will have cyclic dependency between Unit A and Unit B: |
892 | // Data dependency from Unit B to Unit A due to usage of Val in |
893 | // getCopyToReg(Chain_1, Val) |
894 | // Chain dependency from Unit A to Unit B |
895 | |
896 | // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg. |
897 | SDValue Val = DAG.getCopyFromReg(Chain: RetOps[0], dl, Reg: SRetReg, |
898 | VT: getPointerTy(DL: MF.getDataLayout())); |
899 | |
900 | Register RetValReg |
901 | = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ? |
902 | X86::RAX : X86::EAX; |
903 | Chain = DAG.getCopyToReg(Chain, dl, Reg: RetValReg, N: Val, Glue); |
904 | Glue = Chain.getValue(R: 1); |
905 | |
906 | // RAX/EAX now acts like a return value. |
907 | RetOps.push_back( |
908 | Elt: DAG.getRegister(Reg: RetValReg, VT: getPointerTy(DL: DAG.getDataLayout()))); |
909 | |
910 | // Add the returned register to the CalleeSaveDisableRegs list. Don't do |
911 | // this however for preserve_most/preserve_all to minimize the number of |
912 | // callee-saved registers for these CCs. |
913 | if (ShouldDisableCalleeSavedRegister && |
914 | CallConv != CallingConv::PreserveAll && |
915 | CallConv != CallingConv::PreserveMost) |
916 | MF.getRegInfo().disableCalleeSavedRegister(Reg: RetValReg); |
917 | } |
918 | |
919 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
920 | const MCPhysReg *I = |
921 | TRI->getCalleeSavedRegsViaCopy(MF: &DAG.getMachineFunction()); |
922 | if (I) { |
923 | for (; *I; ++I) { |
924 | if (X86::GR64RegClass.contains(Reg: *I)) |
925 | RetOps.push_back(Elt: DAG.getRegister(Reg: *I, VT: MVT::i64)); |
926 | else |
927 | llvm_unreachable("Unexpected register class in CSRsViaCopy!" ); |
928 | } |
929 | } |
930 | |
931 | RetOps[0] = Chain; // Update chain. |
932 | |
933 | // Add the glue if we have it. |
934 | if (Glue.getNode()) |
935 | RetOps.push_back(Elt: Glue); |
936 | |
937 | X86ISD::NodeType opcode = X86ISD::RET_GLUE; |
938 | if (CallConv == CallingConv::X86_INTR) |
939 | opcode = X86ISD::IRET; |
940 | return DAG.getNode(Opcode: opcode, DL: dl, VT: MVT::Other, Ops: RetOps); |
941 | } |
942 | |
943 | bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { |
944 | if (N->getNumValues() != 1 || !N->hasNUsesOfValue(NUses: 1, Value: 0)) |
945 | return false; |
946 | |
947 | SDValue TCChain = Chain; |
948 | SDNode *Copy = *N->use_begin(); |
949 | if (Copy->getOpcode() == ISD::CopyToReg) { |
950 | // If the copy has a glue operand, we conservatively assume it isn't safe to |
951 | // perform a tail call. |
952 | if (Copy->getOperand(Num: Copy->getNumOperands()-1).getValueType() == MVT::Glue) |
953 | return false; |
954 | TCChain = Copy->getOperand(Num: 0); |
955 | } else if (Copy->getOpcode() != ISD::FP_EXTEND) |
956 | return false; |
957 | |
958 | bool HasRet = false; |
959 | for (const SDNode *U : Copy->uses()) { |
960 | if (U->getOpcode() != X86ISD::RET_GLUE) |
961 | return false; |
962 | // If we are returning more than one value, we can definitely |
963 | // not make a tail call see PR19530 |
964 | if (U->getNumOperands() > 4) |
965 | return false; |
966 | if (U->getNumOperands() == 4 && |
967 | U->getOperand(Num: U->getNumOperands() - 1).getValueType() != MVT::Glue) |
968 | return false; |
969 | HasRet = true; |
970 | } |
971 | |
972 | if (!HasRet) |
973 | return false; |
974 | |
975 | Chain = TCChain; |
976 | return true; |
977 | } |
978 | |
979 | EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT, |
980 | ISD::NodeType ExtendKind) const { |
981 | MVT ReturnMVT = MVT::i32; |
982 | |
983 | bool Darwin = Subtarget.getTargetTriple().isOSDarwin(); |
984 | if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) { |
985 | // The ABI does not require i1, i8 or i16 to be extended. |
986 | // |
987 | // On Darwin, there is code in the wild relying on Clang's old behaviour of |
988 | // always extending i8/i16 return values, so keep doing that for now. |
989 | // (PR26665). |
990 | ReturnMVT = MVT::i8; |
991 | } |
992 | |
993 | EVT MinVT = getRegisterType(Context, VT: ReturnMVT); |
994 | return VT.bitsLT(VT: MinVT) ? MinVT : VT; |
995 | } |
996 | |
997 | /// Reads two 32 bit registers and creates a 64 bit mask value. |
998 | /// \param VA The current 32 bit value that need to be assigned. |
999 | /// \param NextVA The next 32 bit value that need to be assigned. |
1000 | /// \param Root The parent DAG node. |
1001 | /// \param [in,out] InGlue Represents SDvalue in the parent DAG node for |
1002 | /// glue purposes. In the case the DAG is already using |
1003 | /// physical register instead of virtual, we should glue |
1004 | /// our new SDValue to InGlue SDvalue. |
1005 | /// \return a new SDvalue of size 64bit. |
1006 | static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA, |
1007 | SDValue &Root, SelectionDAG &DAG, |
1008 | const SDLoc &DL, const X86Subtarget &Subtarget, |
1009 | SDValue *InGlue = nullptr) { |
1010 | assert((Subtarget.hasBWI()) && "Expected AVX512BW target!" ); |
1011 | assert(Subtarget.is32Bit() && "Expecting 32 bit target" ); |
1012 | assert(VA.getValVT() == MVT::v64i1 && |
1013 | "Expecting first location of 64 bit width type" ); |
1014 | assert(NextVA.getValVT() == VA.getValVT() && |
1015 | "The locations should have the same type" ); |
1016 | assert(VA.isRegLoc() && NextVA.isRegLoc() && |
1017 | "The values should reside in two registers" ); |
1018 | |
1019 | SDValue Lo, Hi; |
1020 | SDValue ArgValueLo, ArgValueHi; |
1021 | |
1022 | MachineFunction &MF = DAG.getMachineFunction(); |
1023 | const TargetRegisterClass *RC = &X86::GR32RegClass; |
1024 | |
1025 | // Read a 32 bit value from the registers. |
1026 | if (nullptr == InGlue) { |
1027 | // When no physical register is present, |
1028 | // create an intermediate virtual register. |
1029 | Register Reg = MF.addLiveIn(PReg: VA.getLocReg(), RC); |
1030 | ArgValueLo = DAG.getCopyFromReg(Chain: Root, dl: DL, Reg, VT: MVT::i32); |
1031 | Reg = MF.addLiveIn(PReg: NextVA.getLocReg(), RC); |
1032 | ArgValueHi = DAG.getCopyFromReg(Chain: Root, dl: DL, Reg, VT: MVT::i32); |
1033 | } else { |
1034 | // When a physical register is available read the value from it and glue |
1035 | // the reads together. |
1036 | ArgValueLo = |
1037 | DAG.getCopyFromReg(Chain: Root, dl: DL, Reg: VA.getLocReg(), VT: MVT::i32, Glue: *InGlue); |
1038 | *InGlue = ArgValueLo.getValue(R: 2); |
1039 | ArgValueHi = |
1040 | DAG.getCopyFromReg(Chain: Root, dl: DL, Reg: NextVA.getLocReg(), VT: MVT::i32, Glue: *InGlue); |
1041 | *InGlue = ArgValueHi.getValue(R: 2); |
1042 | } |
1043 | |
1044 | // Convert the i32 type into v32i1 type. |
1045 | Lo = DAG.getBitcast(VT: MVT::v32i1, V: ArgValueLo); |
1046 | |
1047 | // Convert the i32 type into v32i1 type. |
1048 | Hi = DAG.getBitcast(VT: MVT::v32i1, V: ArgValueHi); |
1049 | |
1050 | // Concatenate the two values together. |
1051 | return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: MVT::v64i1, N1: Lo, N2: Hi); |
1052 | } |
1053 | |
1054 | /// The function will lower a register of various sizes (8/16/32/64) |
1055 | /// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1) |
1056 | /// \returns a DAG node contains the operand after lowering to mask type. |
1057 | static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, |
1058 | const EVT &ValLoc, const SDLoc &DL, |
1059 | SelectionDAG &DAG) { |
1060 | SDValue ValReturned = ValArg; |
1061 | |
1062 | if (ValVT == MVT::v1i1) |
1063 | return DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: MVT::v1i1, Operand: ValReturned); |
1064 | |
1065 | if (ValVT == MVT::v64i1) { |
1066 | // In 32 bit machine, this case is handled by getv64i1Argument |
1067 | assert(ValLoc == MVT::i64 && "Expecting only i64 locations" ); |
1068 | // In 64 bit machine, There is no need to truncate the value only bitcast |
1069 | } else { |
1070 | MVT MaskLenVT; |
1071 | switch (ValVT.getSimpleVT().SimpleTy) { |
1072 | case MVT::v8i1: |
1073 | MaskLenVT = MVT::i8; |
1074 | break; |
1075 | case MVT::v16i1: |
1076 | MaskLenVT = MVT::i16; |
1077 | break; |
1078 | case MVT::v32i1: |
1079 | MaskLenVT = MVT::i32; |
1080 | break; |
1081 | default: |
1082 | llvm_unreachable("Expecting a vector of i1 types" ); |
1083 | } |
1084 | |
1085 | ValReturned = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MaskLenVT, Operand: ValReturned); |
1086 | } |
1087 | return DAG.getBitcast(VT: ValVT, V: ValReturned); |
1088 | } |
1089 | |
1090 | /// Lower the result values of a call into the |
1091 | /// appropriate copies out of appropriate physical registers. |
1092 | /// |
1093 | SDValue X86TargetLowering::LowerCallResult( |
1094 | SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg, |
1095 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
1096 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, |
1097 | uint32_t *RegMask) const { |
1098 | |
1099 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
1100 | // Assign locations to each value returned by this call. |
1101 | SmallVector<CCValAssign, 16> RVLocs; |
1102 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, |
1103 | *DAG.getContext()); |
1104 | CCInfo.AnalyzeCallResult(Ins, Fn: RetCC_X86); |
1105 | |
1106 | // Copy all of the result registers out of their specified physreg. |
1107 | for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E; |
1108 | ++I, ++InsIndex) { |
1109 | CCValAssign &VA = RVLocs[I]; |
1110 | EVT CopyVT = VA.getLocVT(); |
1111 | |
1112 | // In some calling conventions we need to remove the used registers |
1113 | // from the register mask. |
1114 | if (RegMask) { |
1115 | for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg: VA.getLocReg())) |
1116 | RegMask[SubReg / 32] &= ~(1u << (SubReg % 32)); |
1117 | } |
1118 | |
1119 | // Report an error if there was an attempt to return FP values via XMM |
1120 | // registers. |
1121 | if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(Reg: VA.getLocReg())) { |
1122 | errorUnsupported(DAG, dl, Msg: "SSE register return with SSE disabled" ); |
1123 | if (VA.getLocReg() == X86::XMM1) |
1124 | VA.convertToReg(RegNo: X86::FP1); // Set reg to FP1, avoid hitting asserts. |
1125 | else |
1126 | VA.convertToReg(RegNo: X86::FP0); // Set reg to FP0, avoid hitting asserts. |
1127 | } else if (!Subtarget.hasSSE2() && |
1128 | X86::FR64XRegClass.contains(Reg: VA.getLocReg()) && |
1129 | CopyVT == MVT::f64) { |
1130 | errorUnsupported(DAG, dl, Msg: "SSE2 register return with SSE2 disabled" ); |
1131 | if (VA.getLocReg() == X86::XMM1) |
1132 | VA.convertToReg(RegNo: X86::FP1); // Set reg to FP1, avoid hitting asserts. |
1133 | else |
1134 | VA.convertToReg(RegNo: X86::FP0); // Set reg to FP0, avoid hitting asserts. |
1135 | } |
1136 | |
1137 | // If we prefer to use the value in xmm registers, copy it out as f80 and |
1138 | // use a truncate to move it from fp stack reg to xmm reg. |
1139 | bool RoundAfterCopy = false; |
1140 | if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) && |
1141 | isScalarFPTypeInSSEReg(VT: VA.getValVT())) { |
1142 | if (!Subtarget.hasX87()) |
1143 | report_fatal_error(reason: "X87 register return with X87 disabled" ); |
1144 | CopyVT = MVT::f80; |
1145 | RoundAfterCopy = (CopyVT != VA.getLocVT()); |
1146 | } |
1147 | |
1148 | SDValue Val; |
1149 | if (VA.needsCustom()) { |
1150 | assert(VA.getValVT() == MVT::v64i1 && |
1151 | "Currently the only custom case is when we split v64i1 to 2 regs" ); |
1152 | Val = |
1153 | getv64i1Argument(VA, NextVA&: RVLocs[++I], Root&: Chain, DAG, DL: dl, Subtarget, InGlue: &InGlue); |
1154 | } else { |
1155 | Chain = DAG.getCopyFromReg(Chain, dl, Reg: VA.getLocReg(), VT: CopyVT, Glue: InGlue) |
1156 | .getValue(R: 1); |
1157 | Val = Chain.getValue(R: 0); |
1158 | InGlue = Chain.getValue(R: 2); |
1159 | } |
1160 | |
1161 | if (RoundAfterCopy) |
1162 | Val = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: VA.getValVT(), N1: Val, |
1163 | // This truncation won't change the value. |
1164 | N2: DAG.getIntPtrConstant(Val: 1, DL: dl, /*isTarget=*/true)); |
1165 | |
1166 | if (VA.isExtInLoc()) { |
1167 | if (VA.getValVT().isVector() && |
1168 | VA.getValVT().getScalarType() == MVT::i1 && |
1169 | ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || |
1170 | (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { |
1171 | // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8 |
1172 | Val = lowerRegToMasks(ValArg: Val, ValVT: VA.getValVT(), ValLoc: VA.getLocVT(), DL: dl, DAG); |
1173 | } else |
1174 | Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val); |
1175 | } |
1176 | |
1177 | if (VA.getLocInfo() == CCValAssign::BCvt) |
1178 | Val = DAG.getBitcast(VT: VA.getValVT(), V: Val); |
1179 | |
1180 | InVals.push_back(Elt: Val); |
1181 | } |
1182 | |
1183 | return Chain; |
1184 | } |
1185 | |
1186 | //===----------------------------------------------------------------------===// |
1187 | // C & StdCall & Fast Calling Convention implementation |
1188 | //===----------------------------------------------------------------------===// |
1189 | // StdCall calling convention seems to be standard for many Windows' API |
1190 | // routines and around. It differs from C calling convention just a little: |
1191 | // callee should clean up the stack, not caller. Symbols should be also |
1192 | // decorated in some fancy way :) It doesn't support any vector arguments. |
1193 | // For info on fast calling convention see Fast Calling Convention (tail call) |
1194 | // implementation LowerX86_32FastCCCallTo. |
1195 | |
1196 | /// Determines whether Args, either a set of outgoing arguments to a call, or a |
1197 | /// set of incoming args of a call, contains an sret pointer that the callee |
1198 | /// pops |
1199 | template <typename T> |
1200 | static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args, |
1201 | const X86Subtarget &Subtarget) { |
1202 | // Not C++20 (yet), so no concepts available. |
1203 | static_assert(std::is_same_v<T, ISD::OutputArg> || |
1204 | std::is_same_v<T, ISD::InputArg>, |
1205 | "requires ISD::OutputArg or ISD::InputArg" ); |
1206 | |
1207 | // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out |
1208 | // for most compilations. |
1209 | if (!Subtarget.is32Bit()) |
1210 | return false; |
1211 | |
1212 | if (Args.empty()) |
1213 | return false; |
1214 | |
1215 | // Most calls do not have an sret argument, check the arg next. |
1216 | const ISD::ArgFlagsTy &Flags = Args[0].Flags; |
1217 | if (!Flags.isSRet() || Flags.isInReg()) |
1218 | return false; |
1219 | |
1220 | // The MSVCabi does not pop the sret. |
1221 | if (Subtarget.getTargetTriple().isOSMSVCRT()) |
1222 | return false; |
1223 | |
1224 | // MCUs don't pop the sret |
1225 | if (Subtarget.isTargetMCU()) |
1226 | return false; |
1227 | |
1228 | // Callee pops argument |
1229 | return true; |
1230 | } |
1231 | |
1232 | /// Make a copy of an aggregate at address specified by "Src" to address |
1233 | /// "Dst" with size and alignment information specified by the specific |
1234 | /// parameter attribute. The copy will be passed as a byval function parameter. |
1235 | static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, |
1236 | SDValue Chain, ISD::ArgFlagsTy Flags, |
1237 | SelectionDAG &DAG, const SDLoc &dl) { |
1238 | SDValue SizeNode = DAG.getIntPtrConstant(Val: Flags.getByValSize(), DL: dl); |
1239 | |
1240 | return DAG.getMemcpy( |
1241 | Chain, dl, Dst, Src, Size: SizeNode, Alignment: Flags.getNonZeroByValAlign(), |
1242 | /*isVolatile*/ isVol: false, /*AlwaysInline=*/true, |
1243 | /*CI=*/nullptr, OverrideTailCall: std::nullopt, DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo()); |
1244 | } |
1245 | |
1246 | /// Return true if the calling convention is one that we can guarantee TCO for. |
1247 | static bool canGuaranteeTCO(CallingConv::ID CC) { |
1248 | return (CC == CallingConv::Fast || CC == CallingConv::GHC || |
1249 | CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE || |
1250 | CC == CallingConv::Tail || CC == CallingConv::SwiftTail); |
1251 | } |
1252 | |
1253 | /// Return true if we might ever do TCO for calls with this calling convention. |
1254 | static bool mayTailCallThisCC(CallingConv::ID CC) { |
1255 | switch (CC) { |
1256 | // C calling conventions: |
1257 | case CallingConv::C: |
1258 | case CallingConv::Win64: |
1259 | case CallingConv::X86_64_SysV: |
1260 | case CallingConv::PreserveNone: |
1261 | // Callee pop conventions: |
1262 | case CallingConv::X86_ThisCall: |
1263 | case CallingConv::X86_StdCall: |
1264 | case CallingConv::X86_VectorCall: |
1265 | case CallingConv::X86_FastCall: |
1266 | // Swift: |
1267 | case CallingConv::Swift: |
1268 | return true; |
1269 | default: |
1270 | return canGuaranteeTCO(CC); |
1271 | } |
1272 | } |
1273 | |
1274 | /// Return true if the function is being made into a tailcall target by |
1275 | /// changing its ABI. |
1276 | static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) { |
1277 | return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || |
1278 | CC == CallingConv::Tail || CC == CallingConv::SwiftTail; |
1279 | } |
1280 | |
1281 | bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { |
1282 | if (!CI->isTailCall()) |
1283 | return false; |
1284 | |
1285 | CallingConv::ID CalleeCC = CI->getCallingConv(); |
1286 | if (!mayTailCallThisCC(CC: CalleeCC)) |
1287 | return false; |
1288 | |
1289 | return true; |
1290 | } |
1291 | |
1292 | SDValue |
1293 | X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, |
1294 | const SmallVectorImpl<ISD::InputArg> &Ins, |
1295 | const SDLoc &dl, SelectionDAG &DAG, |
1296 | const CCValAssign &VA, |
1297 | MachineFrameInfo &MFI, unsigned i) const { |
1298 | // Create the nodes corresponding to a load from this parameter slot. |
1299 | ISD::ArgFlagsTy Flags = Ins[i].Flags; |
1300 | bool AlwaysUseMutable = shouldGuaranteeTCO( |
1301 | CC: CallConv, GuaranteedTailCallOpt: DAG.getTarget().Options.GuaranteedTailCallOpt); |
1302 | bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); |
1303 | EVT ValVT; |
1304 | MVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
1305 | |
1306 | // If value is passed by pointer we have address passed instead of the value |
1307 | // itself. No need to extend if the mask value and location share the same |
1308 | // absolute size. |
1309 | bool ExtendedInMem = |
1310 | VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 && |
1311 | VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits(); |
1312 | |
1313 | if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem) |
1314 | ValVT = VA.getLocVT(); |
1315 | else |
1316 | ValVT = VA.getValVT(); |
1317 | |
1318 | // FIXME: For now, all byval parameter objects are marked mutable. This can be |
1319 | // changed with more analysis. |
1320 | // In case of tail call optimization mark all arguments mutable. Since they |
1321 | // could be overwritten by lowering of arguments in case of a tail call. |
1322 | if (Flags.isByVal()) { |
1323 | unsigned Bytes = Flags.getByValSize(); |
1324 | if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. |
1325 | |
1326 | // FIXME: For now, all byval parameter objects are marked as aliasing. This |
1327 | // can be improved with deeper analysis. |
1328 | int FI = MFI.CreateFixedObject(Size: Bytes, SPOffset: VA.getLocMemOffset(), IsImmutable: isImmutable, |
1329 | /*isAliased=*/true); |
1330 | return DAG.getFrameIndex(FI, VT: PtrVT); |
1331 | } |
1332 | |
1333 | EVT ArgVT = Ins[i].ArgVT; |
1334 | |
1335 | // If this is a vector that has been split into multiple parts, don't elide |
1336 | // the copy. The layout on the stack may not match the packed in-memory |
1337 | // layout. |
1338 | bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector(); |
1339 | |
1340 | // This is an argument in memory. We might be able to perform copy elision. |
1341 | // If the argument is passed directly in memory without any extension, then we |
1342 | // can perform copy elision. Large vector types, for example, may be passed |
1343 | // indirectly by pointer. |
1344 | if (Flags.isCopyElisionCandidate() && |
1345 | VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem && |
1346 | !ScalarizedVector) { |
1347 | SDValue PartAddr; |
1348 | if (Ins[i].PartOffset == 0) { |
1349 | // If this is a one-part value or the first part of a multi-part value, |
1350 | // create a stack object for the entire argument value type and return a |
1351 | // load from our portion of it. This assumes that if the first part of an |
1352 | // argument is in memory, the rest will also be in memory. |
1353 | int FI = MFI.CreateFixedObject(Size: ArgVT.getStoreSize(), SPOffset: VA.getLocMemOffset(), |
1354 | /*IsImmutable=*/false); |
1355 | PartAddr = DAG.getFrameIndex(FI, VT: PtrVT); |
1356 | return DAG.getLoad( |
1357 | VT: ValVT, dl, Chain, Ptr: PartAddr, |
1358 | PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI)); |
1359 | } |
1360 | |
1361 | // This is not the first piece of an argument in memory. See if there is |
1362 | // already a fixed stack object including this offset. If so, assume it |
1363 | // was created by the PartOffset == 0 branch above and create a load from |
1364 | // the appropriate offset into it. |
1365 | int64_t PartBegin = VA.getLocMemOffset(); |
1366 | int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8; |
1367 | int FI = MFI.getObjectIndexBegin(); |
1368 | for (; MFI.isFixedObjectIndex(ObjectIdx: FI); ++FI) { |
1369 | int64_t ObjBegin = MFI.getObjectOffset(ObjectIdx: FI); |
1370 | int64_t ObjEnd = ObjBegin + MFI.getObjectSize(ObjectIdx: FI); |
1371 | if (ObjBegin <= PartBegin && PartEnd <= ObjEnd) |
1372 | break; |
1373 | } |
1374 | if (MFI.isFixedObjectIndex(ObjectIdx: FI)) { |
1375 | SDValue Addr = |
1376 | DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: DAG.getFrameIndex(FI, VT: PtrVT), |
1377 | N2: DAG.getIntPtrConstant(Val: Ins[i].PartOffset, DL: dl)); |
1378 | return DAG.getLoad(VT: ValVT, dl, Chain, Ptr: Addr, |
1379 | PtrInfo: MachinePointerInfo::getFixedStack( |
1380 | MF&: DAG.getMachineFunction(), FI, Offset: Ins[i].PartOffset)); |
1381 | } |
1382 | } |
1383 | |
1384 | int FI = MFI.CreateFixedObject(Size: ValVT.getSizeInBits() / 8, |
1385 | SPOffset: VA.getLocMemOffset(), IsImmutable: isImmutable); |
1386 | |
1387 | // Set SExt or ZExt flag. |
1388 | if (VA.getLocInfo() == CCValAssign::ZExt) { |
1389 | MFI.setObjectZExt(ObjectIdx: FI, IsZExt: true); |
1390 | } else if (VA.getLocInfo() == CCValAssign::SExt) { |
1391 | MFI.setObjectSExt(ObjectIdx: FI, IsSExt: true); |
1392 | } |
1393 | |
1394 | MaybeAlign Alignment; |
1395 | if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() && |
1396 | ValVT != MVT::f80) |
1397 | Alignment = MaybeAlign(4); |
1398 | SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT); |
1399 | SDValue Val = DAG.getLoad( |
1400 | VT: ValVT, dl, Chain, Ptr: FIN, |
1401 | PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI), |
1402 | Alignment); |
1403 | return ExtendedInMem |
1404 | ? (VA.getValVT().isVector() |
1405 | ? DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL: dl, VT: VA.getValVT(), Operand: Val) |
1406 | : DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val)) |
1407 | : Val; |
1408 | } |
1409 | |
1410 | // FIXME: Get this from tablegen. |
1411 | static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv, |
1412 | const X86Subtarget &Subtarget) { |
1413 | assert(Subtarget.is64Bit()); |
1414 | |
1415 | if (Subtarget.isCallingConvWin64(CC: CallConv)) { |
1416 | static const MCPhysReg GPR64ArgRegsWin64[] = { |
1417 | X86::RCX, X86::RDX, X86::R8, X86::R9 |
1418 | }; |
1419 | return ArrayRef(std::begin(arr: GPR64ArgRegsWin64), std::end(arr: GPR64ArgRegsWin64)); |
1420 | } |
1421 | |
1422 | static const MCPhysReg GPR64ArgRegs64Bit[] = { |
1423 | X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 |
1424 | }; |
1425 | return ArrayRef(std::begin(arr: GPR64ArgRegs64Bit), std::end(arr: GPR64ArgRegs64Bit)); |
1426 | } |
1427 | |
1428 | // FIXME: Get this from tablegen. |
1429 | static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF, |
1430 | CallingConv::ID CallConv, |
1431 | const X86Subtarget &Subtarget) { |
1432 | assert(Subtarget.is64Bit()); |
1433 | if (Subtarget.isCallingConvWin64(CC: CallConv)) { |
1434 | // The XMM registers which might contain var arg parameters are shadowed |
1435 | // in their paired GPR. So we only need to save the GPR to their home |
1436 | // slots. |
1437 | // TODO: __vectorcall will change this. |
1438 | return std::nullopt; |
1439 | } |
1440 | |
1441 | bool isSoftFloat = Subtarget.useSoftFloat(); |
1442 | if (isSoftFloat || !Subtarget.hasSSE1()) |
1443 | // Kernel mode asks for SSE to be disabled, so there are no XMM argument |
1444 | // registers. |
1445 | return std::nullopt; |
1446 | |
1447 | static const MCPhysReg XMMArgRegs64Bit[] = { |
1448 | X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, |
1449 | X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 |
1450 | }; |
1451 | return ArrayRef(std::begin(arr: XMMArgRegs64Bit), std::end(arr: XMMArgRegs64Bit)); |
1452 | } |
1453 | |
1454 | #ifndef NDEBUG |
1455 | static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) { |
1456 | return llvm::is_sorted( |
1457 | ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool { |
1458 | return A.getValNo() < B.getValNo(); |
1459 | }); |
1460 | } |
1461 | #endif |
1462 | |
1463 | namespace { |
1464 | /// This is a helper class for lowering variable arguments parameters. |
1465 | class VarArgsLoweringHelper { |
1466 | public: |
1467 | VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc, |
1468 | SelectionDAG &DAG, const X86Subtarget &Subtarget, |
1469 | CallingConv::ID CallConv, CCState &CCInfo) |
1470 | : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget), |
1471 | TheMachineFunction(DAG.getMachineFunction()), |
1472 | TheFunction(TheMachineFunction.getFunction()), |
1473 | FrameInfo(TheMachineFunction.getFrameInfo()), |
1474 | FrameLowering(*Subtarget.getFrameLowering()), |
1475 | TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv), |
1476 | CCInfo(CCInfo) {} |
1477 | |
1478 | // Lower variable arguments parameters. |
1479 | void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize); |
1480 | |
1481 | private: |
1482 | void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize); |
1483 | |
1484 | void forwardMustTailParameters(SDValue &Chain); |
1485 | |
1486 | bool is64Bit() const { return Subtarget.is64Bit(); } |
1487 | bool isWin64() const { return Subtarget.isCallingConvWin64(CC: CallConv); } |
1488 | |
1489 | X86MachineFunctionInfo *FuncInfo; |
1490 | const SDLoc &DL; |
1491 | SelectionDAG &DAG; |
1492 | const X86Subtarget &Subtarget; |
1493 | MachineFunction &TheMachineFunction; |
1494 | const Function &TheFunction; |
1495 | MachineFrameInfo &FrameInfo; |
1496 | const TargetFrameLowering &FrameLowering; |
1497 | const TargetLowering &TargLowering; |
1498 | CallingConv::ID CallConv; |
1499 | CCState &CCInfo; |
1500 | }; |
1501 | } // namespace |
1502 | |
1503 | void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters( |
1504 | SDValue &Chain, unsigned StackSize) { |
1505 | // If the function takes variable number of arguments, make a frame index for |
1506 | // the start of the first vararg value... for expansion of llvm.va_start. We |
1507 | // can skip this if there are no va_start calls. |
1508 | if (is64Bit() || (CallConv != CallingConv::X86_FastCall && |
1509 | CallConv != CallingConv::X86_ThisCall)) { |
1510 | FuncInfo->setVarArgsFrameIndex( |
1511 | FrameInfo.CreateFixedObject(Size: 1, SPOffset: StackSize, IsImmutable: true)); |
1512 | } |
1513 | |
1514 | // 64-bit calling conventions support varargs and register parameters, so we |
1515 | // have to do extra work to spill them in the prologue. |
1516 | if (is64Bit()) { |
1517 | // Find the first unallocated argument registers. |
1518 | ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget); |
1519 | ArrayRef<MCPhysReg> ArgXMMs = |
1520 | get64BitArgumentXMMs(MF&: TheMachineFunction, CallConv, Subtarget); |
1521 | unsigned NumIntRegs = CCInfo.getFirstUnallocated(Regs: ArgGPRs); |
1522 | unsigned NumXMMRegs = CCInfo.getFirstUnallocated(Regs: ArgXMMs); |
1523 | |
1524 | assert(!(NumXMMRegs && !Subtarget.hasSSE1()) && |
1525 | "SSE register cannot be used when SSE is disabled!" ); |
1526 | |
1527 | if (isWin64()) { |
1528 | // Get to the caller-allocated home save location. Add 8 to account |
1529 | // for the return address. |
1530 | int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8; |
1531 | FuncInfo->setRegSaveFrameIndex( |
1532 | FrameInfo.CreateFixedObject(Size: 1, SPOffset: NumIntRegs * 8 + HomeOffset, IsImmutable: false)); |
1533 | // Fixup to set vararg frame on shadow area (4 x i64). |
1534 | if (NumIntRegs < 4) |
1535 | FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex()); |
1536 | } else { |
1537 | // For X86-64, if there are vararg parameters that are passed via |
1538 | // registers, then we must store them to their spots on the stack so |
1539 | // they may be loaded by dereferencing the result of va_next. |
1540 | FuncInfo->setVarArgsGPOffset(NumIntRegs * 8); |
1541 | FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16); |
1542 | FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject( |
1543 | Size: ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Alignment: Align(16), isSpillSlot: false)); |
1544 | } |
1545 | |
1546 | SmallVector<SDValue, 6> |
1547 | LiveGPRs; // list of SDValue for GPR registers keeping live input value |
1548 | SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers |
1549 | // keeping live input value |
1550 | SDValue ALVal; // if applicable keeps SDValue for %al register |
1551 | |
1552 | // Gather all the live in physical registers. |
1553 | for (MCPhysReg Reg : ArgGPRs.slice(N: NumIntRegs)) { |
1554 | Register GPR = TheMachineFunction.addLiveIn(PReg: Reg, RC: &X86::GR64RegClass); |
1555 | LiveGPRs.push_back(Elt: DAG.getCopyFromReg(Chain, dl: DL, Reg: GPR, VT: MVT::i64)); |
1556 | } |
1557 | const auto &AvailableXmms = ArgXMMs.slice(N: NumXMMRegs); |
1558 | if (!AvailableXmms.empty()) { |
1559 | Register AL = TheMachineFunction.addLiveIn(PReg: X86::AL, RC: &X86::GR8RegClass); |
1560 | ALVal = DAG.getCopyFromReg(Chain, dl: DL, Reg: AL, VT: MVT::i8); |
1561 | for (MCPhysReg Reg : AvailableXmms) { |
1562 | // FastRegisterAllocator spills virtual registers at basic |
1563 | // block boundary. That leads to usages of xmm registers |
1564 | // outside of check for %al. Pass physical registers to |
1565 | // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling. |
1566 | TheMachineFunction.getRegInfo().addLiveIn(Reg); |
1567 | LiveXMMRegs.push_back(Elt: DAG.getRegister(Reg, VT: MVT::v4f32)); |
1568 | } |
1569 | } |
1570 | |
1571 | // Store the integer parameter registers. |
1572 | SmallVector<SDValue, 8> MemOps; |
1573 | SDValue RSFIN = |
1574 | DAG.getFrameIndex(FI: FuncInfo->getRegSaveFrameIndex(), |
1575 | VT: TargLowering.getPointerTy(DL: DAG.getDataLayout())); |
1576 | unsigned Offset = FuncInfo->getVarArgsGPOffset(); |
1577 | for (SDValue Val : LiveGPRs) { |
1578 | SDValue FIN = DAG.getNode(Opcode: ISD::ADD, DL, |
1579 | VT: TargLowering.getPointerTy(DL: DAG.getDataLayout()), |
1580 | N1: RSFIN, N2: DAG.getIntPtrConstant(Val: Offset, DL)); |
1581 | SDValue Store = |
1582 | DAG.getStore(Chain: Val.getValue(R: 1), dl: DL, Val, Ptr: FIN, |
1583 | PtrInfo: MachinePointerInfo::getFixedStack( |
1584 | MF&: DAG.getMachineFunction(), |
1585 | FI: FuncInfo->getRegSaveFrameIndex(), Offset)); |
1586 | MemOps.push_back(Elt: Store); |
1587 | Offset += 8; |
1588 | } |
1589 | |
1590 | // Now store the XMM (fp + vector) parameter registers. |
1591 | if (!LiveXMMRegs.empty()) { |
1592 | SmallVector<SDValue, 12> SaveXMMOps; |
1593 | SaveXMMOps.push_back(Elt: Chain); |
1594 | SaveXMMOps.push_back(Elt: ALVal); |
1595 | SaveXMMOps.push_back(Elt: RSFIN); |
1596 | SaveXMMOps.push_back( |
1597 | Elt: DAG.getTargetConstant(Val: FuncInfo->getVarArgsFPOffset(), DL, VT: MVT::i32)); |
1598 | llvm::append_range(C&: SaveXMMOps, R&: LiveXMMRegs); |
1599 | MachineMemOperand *StoreMMO = |
1600 | DAG.getMachineFunction().getMachineMemOperand( |
1601 | PtrInfo: MachinePointerInfo::getFixedStack( |
1602 | MF&: DAG.getMachineFunction(), FI: FuncInfo->getRegSaveFrameIndex(), |
1603 | Offset), |
1604 | F: MachineMemOperand::MOStore, Size: 128, BaseAlignment: Align(16)); |
1605 | MemOps.push_back(Elt: DAG.getMemIntrinsicNode(Opcode: X86ISD::VASTART_SAVE_XMM_REGS, |
1606 | dl: DL, VTList: DAG.getVTList(VT: MVT::Other), |
1607 | Ops: SaveXMMOps, MemVT: MVT::i8, MMO: StoreMMO)); |
1608 | } |
1609 | |
1610 | if (!MemOps.empty()) |
1611 | Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOps); |
1612 | } |
1613 | } |
1614 | |
1615 | void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) { |
1616 | // Find the largest legal vector type. |
1617 | MVT VecVT = MVT::Other; |
1618 | // FIXME: Only some x86_32 calling conventions support AVX512. |
1619 | if (Subtarget.useAVX512Regs() && |
1620 | (is64Bit() || (CallConv == CallingConv::X86_VectorCall || |
1621 | CallConv == CallingConv::Intel_OCL_BI))) |
1622 | VecVT = MVT::v16f32; |
1623 | else if (Subtarget.hasAVX()) |
1624 | VecVT = MVT::v8f32; |
1625 | else if (Subtarget.hasSSE2()) |
1626 | VecVT = MVT::v4f32; |
1627 | |
1628 | // We forward some GPRs and some vector types. |
1629 | SmallVector<MVT, 2> RegParmTypes; |
1630 | MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32; |
1631 | RegParmTypes.push_back(Elt: IntVT); |
1632 | if (VecVT != MVT::Other) |
1633 | RegParmTypes.push_back(Elt: VecVT); |
1634 | |
1635 | // Compute the set of forwarded registers. The rest are scratch. |
1636 | SmallVectorImpl<ForwardedRegister> &Forwards = |
1637 | FuncInfo->getForwardedMustTailRegParms(); |
1638 | CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, Fn: CC_X86); |
1639 | |
1640 | // Forward AL for SysV x86_64 targets, since it is used for varargs. |
1641 | if (is64Bit() && !isWin64() && !CCInfo.isAllocated(Reg: X86::AL)) { |
1642 | Register ALVReg = TheMachineFunction.addLiveIn(PReg: X86::AL, RC: &X86::GR8RegClass); |
1643 | Forwards.push_back(Elt: ForwardedRegister(ALVReg, X86::AL, MVT::i8)); |
1644 | } |
1645 | |
1646 | // Copy all forwards from physical to virtual registers. |
1647 | for (ForwardedRegister &FR : Forwards) { |
1648 | // FIXME: Can we use a less constrained schedule? |
1649 | SDValue RegVal = DAG.getCopyFromReg(Chain, dl: DL, Reg: FR.VReg, VT: FR.VT); |
1650 | FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister( |
1651 | RegClass: TargLowering.getRegClassFor(VT: FR.VT)); |
1652 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: FR.VReg, N: RegVal); |
1653 | } |
1654 | } |
1655 | |
1656 | void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain, |
1657 | unsigned StackSize) { |
1658 | // Set FrameIndex to the 0xAAAAAAA value to mark unset state. |
1659 | // If necessary, it would be set into the correct value later. |
1660 | FuncInfo->setVarArgsFrameIndex(0xAAAAAAA); |
1661 | FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); |
1662 | |
1663 | if (FrameInfo.hasVAStart()) |
1664 | createVarArgAreaAndStoreRegisters(Chain, StackSize); |
1665 | |
1666 | if (FrameInfo.hasMustTailInVarArgFunc()) |
1667 | forwardMustTailParameters(Chain); |
1668 | } |
1669 | |
1670 | SDValue X86TargetLowering::LowerFormalArguments( |
1671 | SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
1672 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
1673 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
1674 | MachineFunction &MF = DAG.getMachineFunction(); |
1675 | X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); |
1676 | |
1677 | const Function &F = MF.getFunction(); |
1678 | if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() && |
1679 | F.getName() == "main" ) |
1680 | FuncInfo->setForceFramePointer(true); |
1681 | |
1682 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1683 | bool Is64Bit = Subtarget.is64Bit(); |
1684 | bool IsWin64 = Subtarget.isCallingConvWin64(CC: CallConv); |
1685 | |
1686 | assert( |
1687 | !(IsVarArg && canGuaranteeTCO(CallConv)) && |
1688 | "Var args not supported with calling conv' regcall, fastcc, ghc or hipe" ); |
1689 | |
1690 | // Assign locations to all of the incoming arguments. |
1691 | SmallVector<CCValAssign, 16> ArgLocs; |
1692 | CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); |
1693 | |
1694 | // Allocate shadow area for Win64. |
1695 | if (IsWin64) |
1696 | CCInfo.AllocateStack(Size: 32, Alignment: Align(8)); |
1697 | |
1698 | CCInfo.AnalyzeArguments(Ins, Fn: CC_X86); |
1699 | |
1700 | // In vectorcall calling convention a second pass is required for the HVA |
1701 | // types. |
1702 | if (CallingConv::X86_VectorCall == CallConv) { |
1703 | CCInfo.AnalyzeArgumentsSecondPass(Args: Ins, Fn: CC_X86); |
1704 | } |
1705 | |
1706 | // The next loop assumes that the locations are in the same order of the |
1707 | // input arguments. |
1708 | assert(isSortedByValueNo(ArgLocs) && |
1709 | "Argument Location list must be sorted before lowering" ); |
1710 | |
1711 | SDValue ArgValue; |
1712 | for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E; |
1713 | ++I, ++InsIndex) { |
1714 | assert(InsIndex < Ins.size() && "Invalid Ins index" ); |
1715 | CCValAssign &VA = ArgLocs[I]; |
1716 | |
1717 | if (VA.isRegLoc()) { |
1718 | EVT RegVT = VA.getLocVT(); |
1719 | if (VA.needsCustom()) { |
1720 | assert( |
1721 | VA.getValVT() == MVT::v64i1 && |
1722 | "Currently the only custom case is when we split v64i1 to 2 regs" ); |
1723 | |
1724 | // v64i1 values, in regcall calling convention, that are |
1725 | // compiled to 32 bit arch, are split up into two registers. |
1726 | ArgValue = |
1727 | getv64i1Argument(VA, NextVA&: ArgLocs[++I], Root&: Chain, DAG, DL: dl, Subtarget); |
1728 | } else { |
1729 | const TargetRegisterClass *RC; |
1730 | if (RegVT == MVT::i8) |
1731 | RC = &X86::GR8RegClass; |
1732 | else if (RegVT == MVT::i16) |
1733 | RC = &X86::GR16RegClass; |
1734 | else if (RegVT == MVT::i32) |
1735 | RC = &X86::GR32RegClass; |
1736 | else if (Is64Bit && RegVT == MVT::i64) |
1737 | RC = &X86::GR64RegClass; |
1738 | else if (RegVT == MVT::f16) |
1739 | RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass; |
1740 | else if (RegVT == MVT::f32) |
1741 | RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass; |
1742 | else if (RegVT == MVT::f64) |
1743 | RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass; |
1744 | else if (RegVT == MVT::f80) |
1745 | RC = &X86::RFP80RegClass; |
1746 | else if (RegVT == MVT::f128) |
1747 | RC = &X86::VR128RegClass; |
1748 | else if (RegVT.is512BitVector()) |
1749 | RC = &X86::VR512RegClass; |
1750 | else if (RegVT.is256BitVector()) |
1751 | RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass; |
1752 | else if (RegVT.is128BitVector()) |
1753 | RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass; |
1754 | else if (RegVT == MVT::x86mmx) |
1755 | RC = &X86::VR64RegClass; |
1756 | else if (RegVT == MVT::v1i1) |
1757 | RC = &X86::VK1RegClass; |
1758 | else if (RegVT == MVT::v8i1) |
1759 | RC = &X86::VK8RegClass; |
1760 | else if (RegVT == MVT::v16i1) |
1761 | RC = &X86::VK16RegClass; |
1762 | else if (RegVT == MVT::v32i1) |
1763 | RC = &X86::VK32RegClass; |
1764 | else if (RegVT == MVT::v64i1) |
1765 | RC = &X86::VK64RegClass; |
1766 | else |
1767 | llvm_unreachable("Unknown argument type!" ); |
1768 | |
1769 | Register Reg = MF.addLiveIn(PReg: VA.getLocReg(), RC); |
1770 | ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, VT: RegVT); |
1771 | } |
1772 | |
1773 | // If this is an 8 or 16-bit value, it is really passed promoted to 32 |
1774 | // bits. Insert an assert[sz]ext to capture this, then truncate to the |
1775 | // right size. |
1776 | if (VA.getLocInfo() == CCValAssign::SExt) |
1777 | ArgValue = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: RegVT, N1: ArgValue, |
1778 | N2: DAG.getValueType(VA.getValVT())); |
1779 | else if (VA.getLocInfo() == CCValAssign::ZExt) |
1780 | ArgValue = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: RegVT, N1: ArgValue, |
1781 | N2: DAG.getValueType(VA.getValVT())); |
1782 | else if (VA.getLocInfo() == CCValAssign::BCvt) |
1783 | ArgValue = DAG.getBitcast(VT: VA.getValVT(), V: ArgValue); |
1784 | |
1785 | if (VA.isExtInLoc()) { |
1786 | // Handle MMX values passed in XMM regs. |
1787 | if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1) |
1788 | ArgValue = DAG.getNode(Opcode: X86ISD::MOVDQ2Q, DL: dl, VT: VA.getValVT(), Operand: ArgValue); |
1789 | else if (VA.getValVT().isVector() && |
1790 | VA.getValVT().getScalarType() == MVT::i1 && |
1791 | ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || |
1792 | (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { |
1793 | // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8 |
1794 | ArgValue = lowerRegToMasks(ValArg: ArgValue, ValVT: VA.getValVT(), ValLoc: RegVT, DL: dl, DAG); |
1795 | } else |
1796 | ArgValue = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: ArgValue); |
1797 | } |
1798 | } else { |
1799 | assert(VA.isMemLoc()); |
1800 | ArgValue = |
1801 | LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i: InsIndex); |
1802 | } |
1803 | |
1804 | // If value is passed via pointer - do a load. |
1805 | if (VA.getLocInfo() == CCValAssign::Indirect && |
1806 | !(Ins[I].Flags.isByVal() && VA.isRegLoc())) { |
1807 | ArgValue = |
1808 | DAG.getLoad(VT: VA.getValVT(), dl, Chain, Ptr: ArgValue, PtrInfo: MachinePointerInfo()); |
1809 | } |
1810 | |
1811 | InVals.push_back(Elt: ArgValue); |
1812 | } |
1813 | |
1814 | for (unsigned I = 0, E = Ins.size(); I != E; ++I) { |
1815 | if (Ins[I].Flags.isSwiftAsync()) { |
1816 | auto X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
1817 | if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF)) |
1818 | X86FI->setHasSwiftAsyncContext(true); |
1819 | else { |
1820 | int PtrSize = Subtarget.is64Bit() ? 8 : 4; |
1821 | int FI = |
1822 | MF.getFrameInfo().CreateStackObject(Size: PtrSize, Alignment: Align(PtrSize), isSpillSlot: false); |
1823 | X86FI->setSwiftAsyncContextFrameIdx(FI); |
1824 | SDValue St = DAG.getStore( |
1825 | Chain: DAG.getEntryNode(), dl, Val: InVals[I], |
1826 | Ptr: DAG.getFrameIndex(FI, VT: PtrSize == 8 ? MVT::i64 : MVT::i32), |
1827 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)); |
1828 | Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: St, N2: Chain); |
1829 | } |
1830 | } |
1831 | |
1832 | // Swift calling convention does not require we copy the sret argument |
1833 | // into %rax/%eax for the return. We don't set SRetReturnReg for Swift. |
1834 | if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) |
1835 | continue; |
1836 | |
1837 | // All x86 ABIs require that for returning structs by value we copy the |
1838 | // sret argument into %rax/%eax (depending on ABI) for the return. Save |
1839 | // the argument into a virtual register so that we can access it from the |
1840 | // return points. |
1841 | if (Ins[I].Flags.isSRet()) { |
1842 | assert(!FuncInfo->getSRetReturnReg() && |
1843 | "SRet return has already been set" ); |
1844 | MVT PtrTy = getPointerTy(DL: DAG.getDataLayout()); |
1845 | Register Reg = |
1846 | MF.getRegInfo().createVirtualRegister(RegClass: getRegClassFor(VT: PtrTy)); |
1847 | FuncInfo->setSRetReturnReg(Reg); |
1848 | SDValue Copy = DAG.getCopyToReg(Chain: DAG.getEntryNode(), dl, Reg, N: InVals[I]); |
1849 | Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Copy, N2: Chain); |
1850 | break; |
1851 | } |
1852 | } |
1853 | |
1854 | unsigned StackSize = CCInfo.getStackSize(); |
1855 | // Align stack specially for tail calls. |
1856 | if (shouldGuaranteeTCO(CC: CallConv, |
1857 | GuaranteedTailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt)) |
1858 | StackSize = GetAlignedArgumentStackSize(StackSize, DAG); |
1859 | |
1860 | if (IsVarArg) |
1861 | VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo) |
1862 | .lowerVarArgsParameters(Chain, StackSize); |
1863 | |
1864 | // Some CCs need callee pop. |
1865 | if (X86::isCalleePop(CallingConv: CallConv, is64Bit: Is64Bit, IsVarArg, |
1866 | GuaranteeTCO: MF.getTarget().Options.GuaranteedTailCallOpt)) { |
1867 | FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything. |
1868 | } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) { |
1869 | // X86 interrupts must pop the error code (and the alignment padding) if |
1870 | // present. |
1871 | FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4); |
1872 | } else { |
1873 | FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. |
1874 | // If this is an sret function, the return should pop the hidden pointer. |
1875 | if (!canGuaranteeTCO(CC: CallConv) && hasCalleePopSRet(Args: Ins, Subtarget)) |
1876 | FuncInfo->setBytesToPopOnReturn(4); |
1877 | } |
1878 | |
1879 | if (!Is64Bit) { |
1880 | // RegSaveFrameIndex is X86-64 only. |
1881 | FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); |
1882 | } |
1883 | |
1884 | FuncInfo->setArgumentStackSize(StackSize); |
1885 | |
1886 | if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) { |
1887 | EHPersonality Personality = classifyEHPersonality(Pers: F.getPersonalityFn()); |
1888 | if (Personality == EHPersonality::CoreCLR) { |
1889 | assert(Is64Bit); |
1890 | // TODO: Add a mechanism to frame lowering that will allow us to indicate |
1891 | // that we'd prefer this slot be allocated towards the bottom of the frame |
1892 | // (i.e. near the stack pointer after allocating the frame). Every |
1893 | // funclet needs a copy of this slot in its (mostly empty) frame, and the |
1894 | // offset from the bottom of this and each funclet's frame must be the |
1895 | // same, so the size of funclets' (mostly empty) frames is dictated by |
1896 | // how far this slot is from the bottom (since they allocate just enough |
1897 | // space to accommodate holding this slot at the correct offset). |
1898 | int PSPSymFI = MFI.CreateStackObject(Size: 8, Alignment: Align(8), /*isSpillSlot=*/false); |
1899 | EHInfo->PSPSymFrameIdx = PSPSymFI; |
1900 | } |
1901 | } |
1902 | |
1903 | if (shouldDisableArgRegFromCSR(CC: CallConv) || |
1904 | F.hasFnAttribute(Kind: "no_caller_saved_registers" )) { |
1905 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
1906 | for (std::pair<Register, Register> Pair : MRI.liveins()) |
1907 | MRI.disableCalleeSavedRegister(Reg: Pair.first); |
1908 | } |
1909 | |
1910 | if (CallingConv::PreserveNone == CallConv) |
1911 | for (unsigned I = 0, E = Ins.size(); I != E; ++I) { |
1912 | if (Ins[I].Flags.isSwiftSelf() || Ins[I].Flags.isSwiftAsync() || |
1913 | Ins[I].Flags.isSwiftError()) { |
1914 | errorUnsupported(DAG, dl, |
1915 | Msg: "Swift attributes can't be used with preserve_none" ); |
1916 | break; |
1917 | } |
1918 | } |
1919 | |
1920 | return Chain; |
1921 | } |
1922 | |
1923 | SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, |
1924 | SDValue Arg, const SDLoc &dl, |
1925 | SelectionDAG &DAG, |
1926 | const CCValAssign &VA, |
1927 | ISD::ArgFlagsTy Flags, |
1928 | bool isByVal) const { |
1929 | unsigned LocMemOffset = VA.getLocMemOffset(); |
1930 | SDValue PtrOff = DAG.getIntPtrConstant(Val: LocMemOffset, DL: dl); |
1931 | PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()), |
1932 | N1: StackPtr, N2: PtrOff); |
1933 | if (isByVal) |
1934 | return CreateCopyOfByValArgument(Src: Arg, Dst: PtrOff, Chain, Flags, DAG, dl); |
1935 | |
1936 | MaybeAlign Alignment; |
1937 | if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() && |
1938 | Arg.getSimpleValueType() != MVT::f80) |
1939 | Alignment = MaybeAlign(4); |
1940 | return DAG.getStore( |
1941 | Chain, dl, Val: Arg, Ptr: PtrOff, |
1942 | PtrInfo: MachinePointerInfo::getStack(MF&: DAG.getMachineFunction(), Offset: LocMemOffset), |
1943 | Alignment); |
1944 | } |
1945 | |
1946 | /// Emit a load of return address if tail call |
1947 | /// optimization is performed and it is required. |
1948 | SDValue X86TargetLowering::EmitTailCallLoadRetAddr( |
1949 | SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, |
1950 | bool Is64Bit, int FPDiff, const SDLoc &dl) const { |
1951 | // Adjust the Return address stack slot. |
1952 | EVT VT = getPointerTy(DL: DAG.getDataLayout()); |
1953 | OutRetAddr = getReturnAddressFrameIndex(DAG); |
1954 | |
1955 | // Load the "old" Return address. |
1956 | OutRetAddr = DAG.getLoad(VT, dl, Chain, Ptr: OutRetAddr, PtrInfo: MachinePointerInfo()); |
1957 | return SDValue(OutRetAddr.getNode(), 1); |
1958 | } |
1959 | |
1960 | /// Emit a store of the return address if tail call |
1961 | /// optimization is performed and it is required (FPDiff!=0). |
1962 | static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, |
1963 | SDValue Chain, SDValue RetAddrFrIdx, |
1964 | EVT PtrVT, unsigned SlotSize, |
1965 | int FPDiff, const SDLoc &dl) { |
1966 | // Store the return address to the appropriate stack slot. |
1967 | if (!FPDiff) return Chain; |
1968 | // Calculate the new stack slot for the return address. |
1969 | int NewReturnAddrFI = |
1970 | MF.getFrameInfo().CreateFixedObject(Size: SlotSize, SPOffset: (int64_t)FPDiff - SlotSize, |
1971 | IsImmutable: false); |
1972 | SDValue NewRetAddrFrIdx = DAG.getFrameIndex(FI: NewReturnAddrFI, VT: PtrVT); |
1973 | Chain = DAG.getStore(Chain, dl, Val: RetAddrFrIdx, Ptr: NewRetAddrFrIdx, |
1974 | PtrInfo: MachinePointerInfo::getFixedStack( |
1975 | MF&: DAG.getMachineFunction(), FI: NewReturnAddrFI)); |
1976 | return Chain; |
1977 | } |
1978 | |
1979 | /// Returns a vector_shuffle mask for an movs{s|d}, movd |
1980 | /// operation of specified width. |
1981 | SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, |
1982 | SDValue V1, SDValue V2) const { |
1983 | unsigned NumElems = VT.getVectorNumElements(); |
1984 | SmallVector<int, 8> Mask; |
1985 | Mask.push_back(Elt: NumElems); |
1986 | for (unsigned i = 1; i != NumElems; ++i) |
1987 | Mask.push_back(Elt: i); |
1988 | return DAG.getVectorShuffle(VT, dl, N1: V1, N2: V2, Mask); |
1989 | } |
1990 | |
1991 | SDValue |
1992 | X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, |
1993 | SmallVectorImpl<SDValue> &InVals) const { |
1994 | SelectionDAG &DAG = CLI.DAG; |
1995 | SDLoc &dl = CLI.DL; |
1996 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; |
1997 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; |
1998 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; |
1999 | SDValue Chain = CLI.Chain; |
2000 | SDValue Callee = CLI.Callee; |
2001 | CallingConv::ID CallConv = CLI.CallConv; |
2002 | bool &isTailCall = CLI.IsTailCall; |
2003 | bool isVarArg = CLI.IsVarArg; |
2004 | const auto *CB = CLI.CB; |
2005 | |
2006 | MachineFunction &MF = DAG.getMachineFunction(); |
2007 | bool Is64Bit = Subtarget.is64Bit(); |
2008 | bool IsWin64 = Subtarget.isCallingConvWin64(CC: CallConv); |
2009 | bool IsSibcall = false; |
2010 | bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt || |
2011 | CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; |
2012 | bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Args: Outs, Subtarget); |
2013 | X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>(); |
2014 | bool HasNCSR = (CB && isa<CallInst>(Val: CB) && |
2015 | CB->hasFnAttr(Kind: "no_caller_saved_registers" )); |
2016 | bool HasNoCfCheck = (CB && CB->doesNoCfCheck()); |
2017 | bool IsIndirectCall = (CB && isa<CallInst>(Val: CB) && CB->isIndirectCall()); |
2018 | bool IsCFICall = IsIndirectCall && CLI.CFIType; |
2019 | const Module *M = MF.getFunction().getParent(); |
2020 | Metadata *IsCFProtectionSupported = M->getModuleFlag(Key: "cf-protection-branch" ); |
2021 | |
2022 | MachineFunction::CallSiteInfo CSInfo; |
2023 | if (CallConv == CallingConv::X86_INTR) |
2024 | report_fatal_error(reason: "X86 interrupts may not be called directly" ); |
2025 | |
2026 | // Analyze operands of the call, assigning locations to each operand. |
2027 | SmallVector<CCValAssign, 16> ArgLocs; |
2028 | CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); |
2029 | |
2030 | // Allocate shadow area for Win64. |
2031 | if (IsWin64) |
2032 | CCInfo.AllocateStack(Size: 32, Alignment: Align(8)); |
2033 | |
2034 | CCInfo.AnalyzeArguments(Outs, Fn: CC_X86); |
2035 | |
2036 | // In vectorcall calling convention a second pass is required for the HVA |
2037 | // types. |
2038 | if (CallingConv::X86_VectorCall == CallConv) { |
2039 | CCInfo.AnalyzeArgumentsSecondPass(Args: Outs, Fn: CC_X86); |
2040 | } |
2041 | |
2042 | bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall(); |
2043 | if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) { |
2044 | // If we are using a GOT, disable tail calls to external symbols with |
2045 | // default visibility. Tail calling such a symbol requires using a GOT |
2046 | // relocation, which forces early binding of the symbol. This breaks code |
2047 | // that require lazy function symbol resolution. Using musttail or |
2048 | // GuaranteedTailCallOpt will override this. |
2049 | GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee); |
2050 | if (!G || (!G->getGlobal()->hasLocalLinkage() && |
2051 | G->getGlobal()->hasDefaultVisibility())) |
2052 | isTailCall = false; |
2053 | } |
2054 | |
2055 | if (isTailCall && !IsMustTail) { |
2056 | // Check if it's really possible to do a tail call. |
2057 | isTailCall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs, |
2058 | IsCalleePopSRet); |
2059 | |
2060 | // Sibcalls are automatically detected tailcalls which do not require |
2061 | // ABI changes. |
2062 | if (!IsGuaranteeTCO && isTailCall) |
2063 | IsSibcall = true; |
2064 | |
2065 | if (isTailCall) |
2066 | ++NumTailCalls; |
2067 | } |
2068 | |
2069 | if (IsMustTail && !isTailCall) |
2070 | report_fatal_error(reason: "failed to perform tail call elimination on a call " |
2071 | "site marked musttail" ); |
2072 | |
2073 | assert(!(isVarArg && canGuaranteeTCO(CallConv)) && |
2074 | "Var args not supported with calling convention fastcc, ghc or hipe" ); |
2075 | |
2076 | // Get a count of how many bytes are to be pushed on the stack. |
2077 | unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); |
2078 | if (IsSibcall) |
2079 | // This is a sibcall. The memory operands are available in caller's |
2080 | // own caller's stack. |
2081 | NumBytes = 0; |
2082 | else if (IsGuaranteeTCO && canGuaranteeTCO(CC: CallConv)) |
2083 | NumBytes = GetAlignedArgumentStackSize(StackSize: NumBytes, DAG); |
2084 | |
2085 | int FPDiff = 0; |
2086 | if (isTailCall && |
2087 | shouldGuaranteeTCO(CC: CallConv, |
2088 | GuaranteedTailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt)) { |
2089 | // Lower arguments at fp - stackoffset + fpdiff. |
2090 | unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn(); |
2091 | |
2092 | FPDiff = NumBytesCallerPushed - NumBytes; |
2093 | |
2094 | // Set the delta of movement of the returnaddr stackslot. |
2095 | // But only set if delta is greater than previous delta. |
2096 | if (FPDiff < X86Info->getTCReturnAddrDelta()) |
2097 | X86Info->setTCReturnAddrDelta(FPDiff); |
2098 | } |
2099 | |
2100 | unsigned NumBytesToPush = NumBytes; |
2101 | unsigned NumBytesToPop = NumBytes; |
2102 | |
2103 | // If we have an inalloca argument, all stack space has already been allocated |
2104 | // for us and be right at the top of the stack. We don't support multiple |
2105 | // arguments passed in memory when using inalloca. |
2106 | if (!Outs.empty() && Outs.back().Flags.isInAlloca()) { |
2107 | NumBytesToPush = 0; |
2108 | if (!ArgLocs.back().isMemLoc()) |
2109 | report_fatal_error(reason: "cannot use inalloca attribute on a register " |
2110 | "parameter" ); |
2111 | if (ArgLocs.back().getLocMemOffset() != 0) |
2112 | report_fatal_error(reason: "any parameter with the inalloca attribute must be " |
2113 | "the only memory argument" ); |
2114 | } else if (CLI.IsPreallocated) { |
2115 | assert(ArgLocs.back().isMemLoc() && |
2116 | "cannot use preallocated attribute on a register " |
2117 | "parameter" ); |
2118 | SmallVector<size_t, 4> PreallocatedOffsets; |
2119 | for (size_t i = 0; i < CLI.OutVals.size(); ++i) { |
2120 | if (CLI.CB->paramHasAttr(ArgNo: i, Kind: Attribute::Preallocated)) { |
2121 | PreallocatedOffsets.push_back(Elt: ArgLocs[i].getLocMemOffset()); |
2122 | } |
2123 | } |
2124 | auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>(); |
2125 | size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CS: CLI.CB); |
2126 | MFI->setPreallocatedStackSize(Id: PreallocatedId, StackSize: NumBytes); |
2127 | MFI->setPreallocatedArgOffsets(Id: PreallocatedId, AO: PreallocatedOffsets); |
2128 | NumBytesToPush = 0; |
2129 | } |
2130 | |
2131 | if (!IsSibcall && !IsMustTail) |
2132 | Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytesToPush, |
2133 | OutSize: NumBytes - NumBytesToPush, DL: dl); |
2134 | |
2135 | SDValue RetAddrFrIdx; |
2136 | // Load return address for tail calls. |
2137 | if (isTailCall && FPDiff) |
2138 | Chain = EmitTailCallLoadRetAddr(DAG, OutRetAddr&: RetAddrFrIdx, Chain, IsTailCall: isTailCall, |
2139 | Is64Bit, FPDiff, dl); |
2140 | |
2141 | SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; |
2142 | SmallVector<SDValue, 8> MemOpChains; |
2143 | SDValue StackPtr; |
2144 | |
2145 | // The next loop assumes that the locations are in the same order of the |
2146 | // input arguments. |
2147 | assert(isSortedByValueNo(ArgLocs) && |
2148 | "Argument Location list must be sorted before lowering" ); |
2149 | |
2150 | // Walk the register/memloc assignments, inserting copies/loads. In the case |
2151 | // of tail call optimization arguments are handle later. |
2152 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
2153 | for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E; |
2154 | ++I, ++OutIndex) { |
2155 | assert(OutIndex < Outs.size() && "Invalid Out index" ); |
2156 | // Skip inalloca/preallocated arguments, they have already been written. |
2157 | ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags; |
2158 | if (Flags.isInAlloca() || Flags.isPreallocated()) |
2159 | continue; |
2160 | |
2161 | CCValAssign &VA = ArgLocs[I]; |
2162 | EVT RegVT = VA.getLocVT(); |
2163 | SDValue Arg = OutVals[OutIndex]; |
2164 | bool isByVal = Flags.isByVal(); |
2165 | |
2166 | // Promote the value if needed. |
2167 | switch (VA.getLocInfo()) { |
2168 | default: llvm_unreachable("Unknown loc info!" ); |
2169 | case CCValAssign::Full: break; |
2170 | case CCValAssign::SExt: |
2171 | Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: RegVT, Operand: Arg); |
2172 | break; |
2173 | case CCValAssign::ZExt: |
2174 | Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: RegVT, Operand: Arg); |
2175 | break; |
2176 | case CCValAssign::AExt: |
2177 | if (Arg.getValueType().isVector() && |
2178 | Arg.getValueType().getVectorElementType() == MVT::i1) |
2179 | Arg = lowerMasksToReg(ValArg: Arg, ValLoc: RegVT, DL: dl, DAG); |
2180 | else if (RegVT.is128BitVector()) { |
2181 | // Special case: passing MMX values in XMM registers. |
2182 | Arg = DAG.getBitcast(VT: MVT::i64, V: Arg); |
2183 | Arg = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL: dl, VT: MVT::v2i64, Operand: Arg); |
2184 | Arg = getMOVL(DAG, dl, VT: MVT::v2i64, V1: DAG.getUNDEF(VT: MVT::v2i64), V2: Arg); |
2185 | } else |
2186 | Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: RegVT, Operand: Arg); |
2187 | break; |
2188 | case CCValAssign::BCvt: |
2189 | Arg = DAG.getBitcast(VT: RegVT, V: Arg); |
2190 | break; |
2191 | case CCValAssign::Indirect: { |
2192 | if (isByVal) { |
2193 | // Memcpy the argument to a temporary stack slot to prevent |
2194 | // the caller from seeing any modifications the callee may make |
2195 | // as guaranteed by the `byval` attribute. |
2196 | int FrameIdx = MF.getFrameInfo().CreateStackObject( |
2197 | Size: Flags.getByValSize(), |
2198 | Alignment: std::max(a: Align(16), b: Flags.getNonZeroByValAlign()), isSpillSlot: false); |
2199 | SDValue StackSlot = |
2200 | DAG.getFrameIndex(FI: FrameIdx, VT: getPointerTy(DL: DAG.getDataLayout())); |
2201 | Chain = |
2202 | CreateCopyOfByValArgument(Src: Arg, Dst: StackSlot, Chain, Flags, DAG, dl); |
2203 | // From now on treat this as a regular pointer |
2204 | Arg = StackSlot; |
2205 | isByVal = false; |
2206 | } else { |
2207 | // Store the argument. |
2208 | SDValue SpillSlot = DAG.CreateStackTemporary(VT: VA.getValVT()); |
2209 | int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex(); |
2210 | Chain = DAG.getStore( |
2211 | Chain, dl, Val: Arg, Ptr: SpillSlot, |
2212 | PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI)); |
2213 | Arg = SpillSlot; |
2214 | } |
2215 | break; |
2216 | } |
2217 | } |
2218 | |
2219 | if (VA.needsCustom()) { |
2220 | assert(VA.getValVT() == MVT::v64i1 && |
2221 | "Currently the only custom case is when we split v64i1 to 2 regs" ); |
2222 | // Split v64i1 value into two registers |
2223 | Passv64i1ArgInRegs(DL: dl, DAG, Arg, RegsToPass, VA, NextVA&: ArgLocs[++I], Subtarget); |
2224 | } else if (VA.isRegLoc()) { |
2225 | RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg)); |
2226 | const TargetOptions &Options = DAG.getTarget().Options; |
2227 | if (Options.EmitCallSiteInfo) |
2228 | CSInfo.ArgRegPairs.emplace_back(Args: VA.getLocReg(), Args&: I); |
2229 | if (isVarArg && IsWin64) { |
2230 | // Win64 ABI requires argument XMM reg to be copied to the corresponding |
2231 | // shadow reg if callee is a varargs function. |
2232 | Register ShadowReg; |
2233 | switch (VA.getLocReg()) { |
2234 | case X86::XMM0: ShadowReg = X86::RCX; break; |
2235 | case X86::XMM1: ShadowReg = X86::RDX; break; |
2236 | case X86::XMM2: ShadowReg = X86::R8; break; |
2237 | case X86::XMM3: ShadowReg = X86::R9; break; |
2238 | } |
2239 | if (ShadowReg) |
2240 | RegsToPass.push_back(Elt: std::make_pair(x&: ShadowReg, y&: Arg)); |
2241 | } |
2242 | } else if (!IsSibcall && (!isTailCall || isByVal)) { |
2243 | assert(VA.isMemLoc()); |
2244 | if (!StackPtr.getNode()) |
2245 | StackPtr = DAG.getCopyFromReg(Chain, dl, Reg: RegInfo->getStackRegister(), |
2246 | VT: getPointerTy(DL: DAG.getDataLayout())); |
2247 | MemOpChains.push_back(Elt: LowerMemOpCallTo(Chain, StackPtr, Arg, |
2248 | dl, DAG, VA, Flags, isByVal)); |
2249 | } |
2250 | } |
2251 | |
2252 | if (!MemOpChains.empty()) |
2253 | Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains); |
2254 | |
2255 | if (Subtarget.isPICStyleGOT()) { |
2256 | // ELF / PIC requires GOT in the EBX register before function calls via PLT |
2257 | // GOT pointer (except regcall). |
2258 | if (!isTailCall) { |
2259 | // Indirect call with RegCall calling convertion may use up all the |
2260 | // general registers, so it is not suitable to bind EBX reister for |
2261 | // GOT address, just let register allocator handle it. |
2262 | if (CallConv != CallingConv::X86_RegCall) |
2263 | RegsToPass.push_back(Elt: std::make_pair( |
2264 | x: Register(X86::EBX), y: DAG.getNode(Opcode: X86ISD::GlobalBaseReg, DL: SDLoc(), |
2265 | VT: getPointerTy(DL: DAG.getDataLayout())))); |
2266 | } else { |
2267 | // If we are tail calling and generating PIC/GOT style code load the |
2268 | // address of the callee into ECX. The value in ecx is used as target of |
2269 | // the tail jump. This is done to circumvent the ebx/callee-saved problem |
2270 | // for tail calls on PIC/GOT architectures. Normally we would just put the |
2271 | // address of GOT into ebx and then call target@PLT. But for tail calls |
2272 | // ebx would be restored (since ebx is callee saved) before jumping to the |
2273 | // target@PLT. |
2274 | |
2275 | // Note: The actual moving to ECX is done further down. |
2276 | GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee); |
2277 | if (G && !G->getGlobal()->hasLocalLinkage() && |
2278 | G->getGlobal()->hasDefaultVisibility()) |
2279 | Callee = LowerGlobalAddress(Op: Callee, DAG); |
2280 | else if (isa<ExternalSymbolSDNode>(Val: Callee)) |
2281 | Callee = LowerExternalSymbol(Op: Callee, DAG); |
2282 | } |
2283 | } |
2284 | |
2285 | if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail && |
2286 | (Subtarget.hasSSE1() || !M->getModuleFlag(Key: "SkipRaxSetup" ))) { |
2287 | // From AMD64 ABI document: |
2288 | // For calls that may call functions that use varargs or stdargs |
2289 | // (prototype-less calls or calls to functions containing ellipsis (...) in |
2290 | // the declaration) %al is used as hidden argument to specify the number |
2291 | // of SSE registers used. The contents of %al do not need to match exactly |
2292 | // the number of registers, but must be an ubound on the number of SSE |
2293 | // registers used and is in the range 0 - 8 inclusive. |
2294 | |
2295 | // Count the number of XMM registers allocated. |
2296 | static const MCPhysReg XMMArgRegs[] = { |
2297 | X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, |
2298 | X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 |
2299 | }; |
2300 | unsigned NumXMMRegs = CCInfo.getFirstUnallocated(Regs: XMMArgRegs); |
2301 | assert((Subtarget.hasSSE1() || !NumXMMRegs) |
2302 | && "SSE registers cannot be used when SSE is disabled" ); |
2303 | RegsToPass.push_back(Elt: std::make_pair(x: Register(X86::AL), |
2304 | y: DAG.getConstant(Val: NumXMMRegs, DL: dl, |
2305 | VT: MVT::i8))); |
2306 | } |
2307 | |
2308 | if (isVarArg && IsMustTail) { |
2309 | const auto &Forwards = X86Info->getForwardedMustTailRegParms(); |
2310 | for (const auto &F : Forwards) { |
2311 | SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: F.VReg, VT: F.VT); |
2312 | RegsToPass.push_back(Elt: std::make_pair(x: F.PReg, y&: Val)); |
2313 | } |
2314 | } |
2315 | |
2316 | // For tail calls lower the arguments to the 'real' stack slots. Sibcalls |
2317 | // don't need this because the eligibility check rejects calls that require |
2318 | // shuffling arguments passed in memory. |
2319 | if (!IsSibcall && isTailCall) { |
2320 | // Force all the incoming stack arguments to be loaded from the stack |
2321 | // before any new outgoing arguments are stored to the stack, because the |
2322 | // outgoing stack slots may alias the incoming argument stack slots, and |
2323 | // the alias isn't otherwise explicit. This is slightly more conservative |
2324 | // than necessary, because it means that each store effectively depends |
2325 | // on every argument instead of just those arguments it would clobber. |
2326 | SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain); |
2327 | |
2328 | SmallVector<SDValue, 8> MemOpChains2; |
2329 | SDValue FIN; |
2330 | int FI = 0; |
2331 | for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E; |
2332 | ++I, ++OutsIndex) { |
2333 | CCValAssign &VA = ArgLocs[I]; |
2334 | |
2335 | if (VA.isRegLoc()) { |
2336 | if (VA.needsCustom()) { |
2337 | assert((CallConv == CallingConv::X86_RegCall) && |
2338 | "Expecting custom case only in regcall calling convention" ); |
2339 | // This means that we are in special case where one argument was |
2340 | // passed through two register locations - Skip the next location |
2341 | ++I; |
2342 | } |
2343 | |
2344 | continue; |
2345 | } |
2346 | |
2347 | assert(VA.isMemLoc()); |
2348 | SDValue Arg = OutVals[OutsIndex]; |
2349 | ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags; |
2350 | // Skip inalloca/preallocated arguments. They don't require any work. |
2351 | if (Flags.isInAlloca() || Flags.isPreallocated()) |
2352 | continue; |
2353 | // Create frame index. |
2354 | int32_t Offset = VA.getLocMemOffset()+FPDiff; |
2355 | uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; |
2356 | FI = MF.getFrameInfo().CreateFixedObject(Size: OpSize, SPOffset: Offset, IsImmutable: true); |
2357 | FIN = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout())); |
2358 | |
2359 | if (Flags.isByVal()) { |
2360 | // Copy relative to framepointer. |
2361 | SDValue Source = DAG.getIntPtrConstant(Val: VA.getLocMemOffset(), DL: dl); |
2362 | if (!StackPtr.getNode()) |
2363 | StackPtr = DAG.getCopyFromReg(Chain, dl, Reg: RegInfo->getStackRegister(), |
2364 | VT: getPointerTy(DL: DAG.getDataLayout())); |
2365 | Source = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()), |
2366 | N1: StackPtr, N2: Source); |
2367 | |
2368 | MemOpChains2.push_back(Elt: CreateCopyOfByValArgument(Src: Source, Dst: FIN, |
2369 | Chain: ArgChain, |
2370 | Flags, DAG, dl)); |
2371 | } else { |
2372 | // Store relative to framepointer. |
2373 | MemOpChains2.push_back(Elt: DAG.getStore( |
2374 | Chain: ArgChain, dl, Val: Arg, Ptr: FIN, |
2375 | PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI))); |
2376 | } |
2377 | } |
2378 | |
2379 | if (!MemOpChains2.empty()) |
2380 | Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains2); |
2381 | |
2382 | // Store the return address to the appropriate stack slot. |
2383 | Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, |
2384 | PtrVT: getPointerTy(DL: DAG.getDataLayout()), |
2385 | SlotSize: RegInfo->getSlotSize(), FPDiff, dl); |
2386 | } |
2387 | |
2388 | // Build a sequence of copy-to-reg nodes chained together with token chain |
2389 | // and glue operands which copy the outgoing args into registers. |
2390 | SDValue InGlue; |
2391 | for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { |
2392 | Chain = DAG.getCopyToReg(Chain, dl, Reg: RegsToPass[i].first, |
2393 | N: RegsToPass[i].second, Glue: InGlue); |
2394 | InGlue = Chain.getValue(R: 1); |
2395 | } |
2396 | |
2397 | if (DAG.getTarget().getCodeModel() == CodeModel::Large) { |
2398 | assert(Is64Bit && "Large code model is only legal in 64-bit mode." ); |
2399 | // In the 64-bit large code model, we have to make all calls |
2400 | // through a register, since the call instruction's 32-bit |
2401 | // pc-relative offset may not be large enough to hold the whole |
2402 | // address. |
2403 | } else if (Callee->getOpcode() == ISD::GlobalAddress || |
2404 | Callee->getOpcode() == ISD::ExternalSymbol) { |
2405 | // Lower direct calls to global addresses and external symbols. Setting |
2406 | // ForCall to true here has the effect of removing WrapperRIP when possible |
2407 | // to allow direct calls to be selected without first materializing the |
2408 | // address into a register. |
2409 | Callee = LowerGlobalOrExternal(Op: Callee, DAG, /*ForCall=*/true); |
2410 | } else if (Subtarget.isTarget64BitILP32() && |
2411 | Callee.getValueType() == MVT::i32) { |
2412 | // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI |
2413 | Callee = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MVT::i64, Operand: Callee); |
2414 | } |
2415 | |
2416 | // Returns a chain & a glue for retval copy to use. |
2417 | SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue); |
2418 | SmallVector<SDValue, 8> Ops; |
2419 | |
2420 | if (!IsSibcall && isTailCall && !IsMustTail) { |
2421 | Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytesToPop, Size2: 0, Glue: InGlue, DL: dl); |
2422 | InGlue = Chain.getValue(R: 1); |
2423 | } |
2424 | |
2425 | Ops.push_back(Elt: Chain); |
2426 | Ops.push_back(Elt: Callee); |
2427 | |
2428 | if (isTailCall) |
2429 | Ops.push_back(Elt: DAG.getTargetConstant(Val: FPDiff, DL: dl, VT: MVT::i32)); |
2430 | |
2431 | // Add argument registers to the end of the list so that they are known live |
2432 | // into the call. |
2433 | for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) |
2434 | Ops.push_back(Elt: DAG.getRegister(Reg: RegsToPass[i].first, |
2435 | VT: RegsToPass[i].second.getValueType())); |
2436 | |
2437 | // Add a register mask operand representing the call-preserved registers. |
2438 | const uint32_t *Mask = [&]() { |
2439 | auto AdaptedCC = CallConv; |
2440 | // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists), |
2441 | // use X86_INTR calling convention because it has the same CSR mask |
2442 | // (same preserved registers). |
2443 | if (HasNCSR) |
2444 | AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR; |
2445 | // If NoCalleeSavedRegisters is requested, than use GHC since it happens |
2446 | // to use the CSR_NoRegs_RegMask. |
2447 | if (CB && CB->hasFnAttr(Kind: "no_callee_saved_registers" )) |
2448 | AdaptedCC = (CallingConv::ID)CallingConv::GHC; |
2449 | return RegInfo->getCallPreservedMask(MF, AdaptedCC); |
2450 | }(); |
2451 | assert(Mask && "Missing call preserved mask for calling convention" ); |
2452 | |
2453 | // If this is an invoke in a 32-bit function using a funclet-based |
2454 | // personality, assume the function clobbers all registers. If an exception |
2455 | // is thrown, the runtime will not restore CSRs. |
2456 | // FIXME: Model this more precisely so that we can register allocate across |
2457 | // the normal edge and spill and fill across the exceptional edge. |
2458 | if (!Is64Bit && CLI.CB && isa<InvokeInst>(Val: CLI.CB)) { |
2459 | const Function &CallerFn = MF.getFunction(); |
2460 | EHPersonality Pers = |
2461 | CallerFn.hasPersonalityFn() |
2462 | ? classifyEHPersonality(Pers: CallerFn.getPersonalityFn()) |
2463 | : EHPersonality::Unknown; |
2464 | if (isFuncletEHPersonality(Pers)) |
2465 | Mask = RegInfo->getNoPreservedMask(); |
2466 | } |
2467 | |
2468 | // Define a new register mask from the existing mask. |
2469 | uint32_t *RegMask = nullptr; |
2470 | |
2471 | // In some calling conventions we need to remove the used physical registers |
2472 | // from the reg mask. Create a new RegMask for such calling conventions. |
2473 | // RegMask for calling conventions that disable only return registers (e.g. |
2474 | // preserve_most) will be modified later in LowerCallResult. |
2475 | bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CC: CallConv) || HasNCSR; |
2476 | if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CC: CallConv)) { |
2477 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
2478 | |
2479 | // Allocate a new Reg Mask and copy Mask. |
2480 | RegMask = MF.allocateRegMask(); |
2481 | unsigned RegMaskSize = MachineOperand::getRegMaskSize(NumRegs: TRI->getNumRegs()); |
2482 | memcpy(dest: RegMask, src: Mask, n: sizeof(RegMask[0]) * RegMaskSize); |
2483 | |
2484 | // Make sure all sub registers of the argument registers are reset |
2485 | // in the RegMask. |
2486 | if (ShouldDisableArgRegs) { |
2487 | for (auto const &RegPair : RegsToPass) |
2488 | for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg: RegPair.first)) |
2489 | RegMask[SubReg / 32] &= ~(1u << (SubReg % 32)); |
2490 | } |
2491 | |
2492 | // Create the RegMask Operand according to our updated mask. |
2493 | Ops.push_back(Elt: DAG.getRegisterMask(RegMask)); |
2494 | } else { |
2495 | // Create the RegMask Operand according to the static mask. |
2496 | Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask)); |
2497 | } |
2498 | |
2499 | if (InGlue.getNode()) |
2500 | Ops.push_back(Elt: InGlue); |
2501 | |
2502 | if (isTailCall) { |
2503 | // We used to do: |
2504 | //// If this is the first return lowered for this function, add the regs |
2505 | //// to the liveout set for the function. |
2506 | // This isn't right, although it's probably harmless on x86; liveouts |
2507 | // should be computed from returns not tail calls. Consider a void |
2508 | // function making a tail call to a function returning int. |
2509 | MF.getFrameInfo().setHasTailCall(); |
2510 | SDValue Ret = DAG.getNode(Opcode: X86ISD::TC_RETURN, DL: dl, VTList: NodeTys, Ops); |
2511 | |
2512 | if (IsCFICall) |
2513 | Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue()); |
2514 | |
2515 | DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge); |
2516 | DAG.addCallSiteInfo(Node: Ret.getNode(), CallInfo: std::move(CSInfo)); |
2517 | return Ret; |
2518 | } |
2519 | |
2520 | if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) { |
2521 | Chain = DAG.getNode(Opcode: X86ISD::NT_CALL, DL: dl, VTList: NodeTys, Ops); |
2522 | } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CB: CLI.CB)) { |
2523 | // Calls with a "clang.arc.attachedcall" bundle are special. They should be |
2524 | // expanded to the call, directly followed by a special marker sequence and |
2525 | // a call to a ObjC library function. Use the CALL_RVMARKER to do that. |
2526 | assert(!isTailCall && |
2527 | "tail calls cannot be marked with clang.arc.attachedcall" ); |
2528 | assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode" ); |
2529 | |
2530 | // Add a target global address for the retainRV/claimRV runtime function |
2531 | // just before the call target. |
2532 | Function *ARCFn = *objcarc::getAttachedARCFunction(CB: CLI.CB); |
2533 | auto PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
2534 | auto GA = DAG.getTargetGlobalAddress(GV: ARCFn, DL: dl, VT: PtrVT); |
2535 | Ops.insert(I: Ops.begin() + 1, Elt: GA); |
2536 | Chain = DAG.getNode(Opcode: X86ISD::CALL_RVMARKER, DL: dl, VTList: NodeTys, Ops); |
2537 | } else { |
2538 | Chain = DAG.getNode(Opcode: X86ISD::CALL, DL: dl, VTList: NodeTys, Ops); |
2539 | } |
2540 | |
2541 | if (IsCFICall) |
2542 | Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue()); |
2543 | |
2544 | InGlue = Chain.getValue(R: 1); |
2545 | DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge); |
2546 | DAG.addCallSiteInfo(Node: Chain.getNode(), CallInfo: std::move(CSInfo)); |
2547 | |
2548 | // Save heapallocsite metadata. |
2549 | if (CLI.CB) |
2550 | if (MDNode *HeapAlloc = CLI.CB->getMetadata(Kind: "heapallocsite" )) |
2551 | DAG.addHeapAllocSite(Node: Chain.getNode(), MD: HeapAlloc); |
2552 | |
2553 | // Create the CALLSEQ_END node. |
2554 | unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing. |
2555 | if (X86::isCalleePop(CallingConv: CallConv, is64Bit: Is64Bit, IsVarArg: isVarArg, |
2556 | GuaranteeTCO: DAG.getTarget().Options.GuaranteedTailCallOpt)) |
2557 | NumBytesForCalleeToPop = NumBytes; // Callee pops everything |
2558 | else if (!canGuaranteeTCO(CC: CallConv) && IsCalleePopSRet) |
2559 | // If this call passes a struct-return pointer, the callee |
2560 | // pops that struct pointer. |
2561 | NumBytesForCalleeToPop = 4; |
2562 | |
2563 | // Returns a glue for retval copy to use. |
2564 | if (!IsSibcall) { |
2565 | Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytesToPop, Size2: NumBytesForCalleeToPop, |
2566 | Glue: InGlue, DL: dl); |
2567 | InGlue = Chain.getValue(R: 1); |
2568 | } |
2569 | |
2570 | if (CallingConv::PreserveNone == CallConv) |
2571 | for (unsigned I = 0, E = Outs.size(); I != E; ++I) { |
2572 | if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftAsync() || |
2573 | Outs[I].Flags.isSwiftError()) { |
2574 | errorUnsupported(DAG, dl, |
2575 | Msg: "Swift attributes can't be used with preserve_none" ); |
2576 | break; |
2577 | } |
2578 | } |
2579 | |
2580 | // Handle result values, copying them out of physregs into vregs that we |
2581 | // return. |
2582 | return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG, |
2583 | InVals, RegMask); |
2584 | } |
2585 | |
2586 | //===----------------------------------------------------------------------===// |
2587 | // Fast Calling Convention (tail call) implementation |
2588 | //===----------------------------------------------------------------------===// |
2589 | |
2590 | // Like std call, callee cleans arguments, convention except that ECX is |
2591 | // reserved for storing the tail called function address. Only 2 registers are |
2592 | // free for argument passing (inreg). Tail call optimization is performed |
2593 | // provided: |
2594 | // * tailcallopt is enabled |
2595 | // * caller/callee are fastcc |
2596 | // On X86_64 architecture with GOT-style position independent code only local |
2597 | // (within module) calls are supported at the moment. |
2598 | // To keep the stack aligned according to platform abi the function |
2599 | // GetAlignedArgumentStackSize ensures that argument delta is always multiples |
2600 | // of stack alignment. (Dynamic linkers need this - Darwin's dyld for example) |
2601 | // If a tail called function callee has more arguments than the caller the |
2602 | // caller needs to make sure that there is room to move the RETADDR to. This is |
2603 | // achieved by reserving an area the size of the argument delta right after the |
2604 | // original RETADDR, but before the saved framepointer or the spilled registers |
2605 | // e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) |
2606 | // stack layout: |
2607 | // arg1 |
2608 | // arg2 |
2609 | // RETADDR |
2610 | // [ new RETADDR |
2611 | // move area ] |
2612 | // (possible EBP) |
2613 | // ESI |
2614 | // EDI |
2615 | // local1 .. |
2616 | |
2617 | /// Make the stack size align e.g 16n + 12 aligned for a 16-byte align |
2618 | /// requirement. |
2619 | unsigned |
2620 | X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize, |
2621 | SelectionDAG &DAG) const { |
2622 | const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign(); |
2623 | const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize(); |
2624 | assert(StackSize % SlotSize == 0 && |
2625 | "StackSize must be a multiple of SlotSize" ); |
2626 | return alignTo(Size: StackSize + SlotSize, A: StackAlignment) - SlotSize; |
2627 | } |
2628 | |
2629 | /// Return true if the given stack call argument is already available in the |
2630 | /// same position (relatively) of the caller's incoming argument stack. |
2631 | static |
2632 | bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, |
2633 | MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, |
2634 | const X86InstrInfo *TII, const CCValAssign &VA) { |
2635 | unsigned Bytes = Arg.getValueSizeInBits() / 8; |
2636 | |
2637 | for (;;) { |
2638 | // Look through nodes that don't alter the bits of the incoming value. |
2639 | unsigned Op = Arg.getOpcode(); |
2640 | if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST || |
2641 | Op == ISD::AssertZext) { |
2642 | Arg = Arg.getOperand(i: 0); |
2643 | continue; |
2644 | } |
2645 | if (Op == ISD::TRUNCATE) { |
2646 | const SDValue &TruncInput = Arg.getOperand(i: 0); |
2647 | if (TruncInput.getOpcode() == ISD::AssertZext && |
2648 | cast<VTSDNode>(Val: TruncInput.getOperand(i: 1))->getVT() == |
2649 | Arg.getValueType()) { |
2650 | Arg = TruncInput.getOperand(i: 0); |
2651 | continue; |
2652 | } |
2653 | } |
2654 | break; |
2655 | } |
2656 | |
2657 | int FI = INT_MAX; |
2658 | if (Arg.getOpcode() == ISD::CopyFromReg) { |
2659 | Register VR = cast<RegisterSDNode>(Val: Arg.getOperand(i: 1))->getReg(); |
2660 | if (!VR.isVirtual()) |
2661 | return false; |
2662 | MachineInstr *Def = MRI->getVRegDef(Reg: VR); |
2663 | if (!Def) |
2664 | return false; |
2665 | if (!Flags.isByVal()) { |
2666 | if (!TII->isLoadFromStackSlot(MI: *Def, FrameIndex&: FI)) |
2667 | return false; |
2668 | } else { |
2669 | unsigned Opcode = Def->getOpcode(); |
2670 | if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r || |
2671 | Opcode == X86::LEA64_32r) && |
2672 | Def->getOperand(i: 1).isFI()) { |
2673 | FI = Def->getOperand(i: 1).getIndex(); |
2674 | Bytes = Flags.getByValSize(); |
2675 | } else |
2676 | return false; |
2677 | } |
2678 | } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val&: Arg)) { |
2679 | if (Flags.isByVal()) |
2680 | // ByVal argument is passed in as a pointer but it's now being |
2681 | // dereferenced. e.g. |
2682 | // define @foo(%struct.X* %A) { |
2683 | // tail call @bar(%struct.X* byval %A) |
2684 | // } |
2685 | return false; |
2686 | SDValue Ptr = Ld->getBasePtr(); |
2687 | FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Val&: Ptr); |
2688 | if (!FINode) |
2689 | return false; |
2690 | FI = FINode->getIndex(); |
2691 | } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) { |
2692 | FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Val&: Arg); |
2693 | FI = FINode->getIndex(); |
2694 | Bytes = Flags.getByValSize(); |
2695 | } else |
2696 | return false; |
2697 | |
2698 | assert(FI != INT_MAX); |
2699 | if (!MFI.isFixedObjectIndex(ObjectIdx: FI)) |
2700 | return false; |
2701 | |
2702 | if (Offset != MFI.getObjectOffset(ObjectIdx: FI)) |
2703 | return false; |
2704 | |
2705 | // If this is not byval, check that the argument stack object is immutable. |
2706 | // inalloca and argument copy elision can create mutable argument stack |
2707 | // objects. Byval objects can be mutated, but a byval call intends to pass the |
2708 | // mutated memory. |
2709 | if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(ObjectIdx: FI)) |
2710 | return false; |
2711 | |
2712 | if (VA.getLocVT().getFixedSizeInBits() > |
2713 | Arg.getValueSizeInBits().getFixedValue()) { |
2714 | // If the argument location is wider than the argument type, check that any |
2715 | // extension flags match. |
2716 | if (Flags.isZExt() != MFI.isObjectZExt(ObjectIdx: FI) || |
2717 | Flags.isSExt() != MFI.isObjectSExt(ObjectIdx: FI)) { |
2718 | return false; |
2719 | } |
2720 | } |
2721 | |
2722 | return Bytes == MFI.getObjectSize(ObjectIdx: FI); |
2723 | } |
2724 | |
2725 | /// Check whether the call is eligible for tail call optimization. Targets |
2726 | /// that want to do tail call optimization should implement this function. |
2727 | /// Note that the x86 backend does not check musttail calls for eligibility! The |
2728 | /// rest of x86 tail call lowering must be prepared to forward arguments of any |
2729 | /// type. |
2730 | bool X86TargetLowering::IsEligibleForTailCallOptimization( |
2731 | TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo, |
2732 | SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const { |
2733 | SelectionDAG &DAG = CLI.DAG; |
2734 | const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; |
2735 | const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; |
2736 | const SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; |
2737 | SDValue Callee = CLI.Callee; |
2738 | CallingConv::ID CalleeCC = CLI.CallConv; |
2739 | bool isVarArg = CLI.IsVarArg; |
2740 | |
2741 | if (!mayTailCallThisCC(CC: CalleeCC)) |
2742 | return false; |
2743 | |
2744 | // If -tailcallopt is specified, make fastcc functions tail-callable. |
2745 | MachineFunction &MF = DAG.getMachineFunction(); |
2746 | const Function &CallerF = MF.getFunction(); |
2747 | |
2748 | // If the function return type is x86_fp80 and the callee return type is not, |
2749 | // then the FP_EXTEND of the call result is not a nop. It's not safe to |
2750 | // perform a tailcall optimization here. |
2751 | if (CallerF.getReturnType()->isX86_FP80Ty() && !CLI.RetTy->isX86_FP80Ty()) |
2752 | return false; |
2753 | |
2754 | CallingConv::ID CallerCC = CallerF.getCallingConv(); |
2755 | bool CCMatch = CallerCC == CalleeCC; |
2756 | bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CC: CalleeCC); |
2757 | bool IsCallerWin64 = Subtarget.isCallingConvWin64(CC: CallerCC); |
2758 | bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt || |
2759 | CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail; |
2760 | |
2761 | // Win64 functions have extra shadow space for argument homing. Don't do the |
2762 | // sibcall if the caller and callee have mismatched expectations for this |
2763 | // space. |
2764 | if (IsCalleeWin64 != IsCallerWin64) |
2765 | return false; |
2766 | |
2767 | if (IsGuaranteeTCO) { |
2768 | if (canGuaranteeTCO(CC: CalleeCC) && CCMatch) |
2769 | return true; |
2770 | return false; |
2771 | } |
2772 | |
2773 | // Look for obvious safe cases to perform tail call optimization that do not |
2774 | // require ABI changes. This is what gcc calls sibcall. |
2775 | |
2776 | // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to |
2777 | // emit a special epilogue. |
2778 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
2779 | if (RegInfo->hasStackRealignment(MF)) |
2780 | return false; |
2781 | |
2782 | // Also avoid sibcall optimization if we're an sret return fn and the callee |
2783 | // is incompatible. See comment in LowerReturn about why hasStructRetAttr is |
2784 | // insufficient. |
2785 | if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) { |
2786 | // For a compatible tail call the callee must return our sret pointer. So it |
2787 | // needs to be (a) an sret function itself and (b) we pass our sret as its |
2788 | // sret. Condition #b is harder to determine. |
2789 | return false; |
2790 | } else if (IsCalleePopSRet) |
2791 | // The callee pops an sret, so we cannot tail-call, as our caller doesn't |
2792 | // expect that. |
2793 | return false; |
2794 | |
2795 | // Do not sibcall optimize vararg calls unless all arguments are passed via |
2796 | // registers. |
2797 | LLVMContext &C = *DAG.getContext(); |
2798 | if (isVarArg && !Outs.empty()) { |
2799 | // Optimizing for varargs on Win64 is unlikely to be safe without |
2800 | // additional testing. |
2801 | if (IsCalleeWin64 || IsCallerWin64) |
2802 | return false; |
2803 | |
2804 | for (const auto &VA : ArgLocs) |
2805 | if (!VA.isRegLoc()) |
2806 | return false; |
2807 | } |
2808 | |
2809 | // If the call result is in ST0 / ST1, it needs to be popped off the x87 |
2810 | // stack. Therefore, if it's not used by the call it is not safe to optimize |
2811 | // this into a sibcall. |
2812 | bool Unused = false; |
2813 | for (const auto &In : Ins) { |
2814 | if (!In.Used) { |
2815 | Unused = true; |
2816 | break; |
2817 | } |
2818 | } |
2819 | if (Unused) { |
2820 | SmallVector<CCValAssign, 16> RVLocs; |
2821 | CCState RVCCInfo(CalleeCC, false, MF, RVLocs, C); |
2822 | RVCCInfo.AnalyzeCallResult(Ins, Fn: RetCC_X86); |
2823 | for (const auto &VA : RVLocs) { |
2824 | if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) |
2825 | return false; |
2826 | } |
2827 | } |
2828 | |
2829 | // Check that the call results are passed in the same way. |
2830 | if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, |
2831 | CalleeFn: RetCC_X86, CallerFn: RetCC_X86)) |
2832 | return false; |
2833 | // The callee has to preserve all registers the caller needs to preserve. |
2834 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
2835 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); |
2836 | if (!CCMatch) { |
2837 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); |
2838 | if (!TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved)) |
2839 | return false; |
2840 | } |
2841 | |
2842 | unsigned StackArgsSize = CCInfo.getStackSize(); |
2843 | |
2844 | // If the callee takes no arguments then go on to check the results of the |
2845 | // call. |
2846 | if (!Outs.empty()) { |
2847 | if (StackArgsSize > 0) { |
2848 | // Check if the arguments are already laid out in the right way as |
2849 | // the caller's fixed stack objects. |
2850 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
2851 | const MachineRegisterInfo *MRI = &MF.getRegInfo(); |
2852 | const X86InstrInfo *TII = Subtarget.getInstrInfo(); |
2853 | for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { |
2854 | const CCValAssign &VA = ArgLocs[I]; |
2855 | SDValue Arg = OutVals[I]; |
2856 | ISD::ArgFlagsTy Flags = Outs[I].Flags; |
2857 | if (VA.getLocInfo() == CCValAssign::Indirect) |
2858 | return false; |
2859 | if (!VA.isRegLoc()) { |
2860 | if (!MatchingStackOffset(Arg, Offset: VA.getLocMemOffset(), Flags, MFI, MRI, |
2861 | TII, VA)) |
2862 | return false; |
2863 | } |
2864 | } |
2865 | } |
2866 | |
2867 | bool PositionIndependent = isPositionIndependent(); |
2868 | // If the tailcall address may be in a register, then make sure it's |
2869 | // possible to register allocate for it. In 32-bit, the call address can |
2870 | // only target EAX, EDX, or ECX since the tail call must be scheduled after |
2871 | // callee-saved registers are restored. These happen to be the same |
2872 | // registers used to pass 'inreg' arguments so watch out for those. |
2873 | if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Val: Callee) && |
2874 | !isa<ExternalSymbolSDNode>(Val: Callee)) || |
2875 | PositionIndependent)) { |
2876 | unsigned NumInRegs = 0; |
2877 | // In PIC we need an extra register to formulate the address computation |
2878 | // for the callee. |
2879 | unsigned MaxInRegs = PositionIndependent ? 2 : 3; |
2880 | |
2881 | for (const auto &VA : ArgLocs) { |
2882 | if (!VA.isRegLoc()) |
2883 | continue; |
2884 | Register Reg = VA.getLocReg(); |
2885 | switch (Reg) { |
2886 | default: break; |
2887 | case X86::EAX: case X86::EDX: case X86::ECX: |
2888 | if (++NumInRegs == MaxInRegs) |
2889 | return false; |
2890 | break; |
2891 | } |
2892 | } |
2893 | } |
2894 | |
2895 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
2896 | if (!parametersInCSRMatch(MRI, CallerPreservedMask: CallerPreserved, ArgLocs, OutVals)) |
2897 | return false; |
2898 | } |
2899 | |
2900 | bool CalleeWillPop = |
2901 | X86::isCalleePop(CallingConv: CalleeCC, is64Bit: Subtarget.is64Bit(), IsVarArg: isVarArg, |
2902 | GuaranteeTCO: MF.getTarget().Options.GuaranteedTailCallOpt); |
2903 | |
2904 | if (unsigned BytesToPop = |
2905 | MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) { |
2906 | // If we have bytes to pop, the callee must pop them. |
2907 | bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize; |
2908 | if (!CalleePopMatches) |
2909 | return false; |
2910 | } else if (CalleeWillPop && StackArgsSize > 0) { |
2911 | // If we don't have bytes to pop, make sure the callee doesn't pop any. |
2912 | return false; |
2913 | } |
2914 | |
2915 | return true; |
2916 | } |
2917 | |
2918 | /// Determines whether the callee is required to pop its own arguments. |
2919 | /// Callee pop is necessary to support tail calls. |
2920 | bool X86::isCalleePop(CallingConv::ID CallingConv, |
2921 | bool is64Bit, bool IsVarArg, bool GuaranteeTCO) { |
2922 | // If GuaranteeTCO is true, we force some calls to be callee pop so that we |
2923 | // can guarantee TCO. |
2924 | if (!IsVarArg && shouldGuaranteeTCO(CC: CallingConv, GuaranteedTailCallOpt: GuaranteeTCO)) |
2925 | return true; |
2926 | |
2927 | switch (CallingConv) { |
2928 | default: |
2929 | return false; |
2930 | case CallingConv::X86_StdCall: |
2931 | case CallingConv::X86_FastCall: |
2932 | case CallingConv::X86_ThisCall: |
2933 | case CallingConv::X86_VectorCall: |
2934 | return !is64Bit; |
2935 | } |
2936 | } |
2937 | |