1//===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file implements the lowering of LLVM calls to DAG nodes.
11//
12//===----------------------------------------------------------------------===//
13
14#include "MCTargetDesc/X86MCAsmInfo.h"
15#include "X86.h"
16#include "X86CallingConv.h"
17#include "X86FrameLowering.h"
18#include "X86ISelLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86MachineFunctionInfo.h"
21#include "X86TargetMachine.h"
22#include "llvm/ADT/Statistic.h"
23#include "llvm/Analysis/ObjCARCUtil.h"
24#include "llvm/CodeGen/MachineJumpTableInfo.h"
25#include "llvm/CodeGen/MachineModuleInfo.h"
26#include "llvm/CodeGen/WinEHFuncInfo.h"
27#include "llvm/IR/DiagnosticInfo.h"
28#include "llvm/IR/IRBuilder.h"
29#include "llvm/IR/Module.h"
30
31#define DEBUG_TYPE "x86-isel"
32
33using namespace llvm;
34
35STATISTIC(NumTailCalls, "Number of tail calls");
36
37/// Call this when the user attempts to do something unsupported, like
38/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
39/// report_fatal_error, so calling code should attempt to recover without
40/// crashing.
41static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
42 const char *Msg) {
43 MachineFunction &MF = DAG.getMachineFunction();
44 DAG.getContext()->diagnose(
45 DI: DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
46}
47
48/// Returns true if a CC can dynamically exclude a register from the list of
49/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
50/// the return registers.
51static bool shouldDisableRetRegFromCSR(CallingConv::ID CC) {
52 switch (CC) {
53 default:
54 return false;
55 case CallingConv::X86_RegCall:
56 case CallingConv::PreserveMost:
57 case CallingConv::PreserveAll:
58 return true;
59 }
60}
61
62/// Returns true if a CC can dynamically exclude a register from the list of
63/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
64/// the parameters.
65static bool shouldDisableArgRegFromCSR(CallingConv::ID CC) {
66 return CC == CallingConv::X86_RegCall;
67}
68
69static std::pair<MVT, unsigned>
70handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
71 const X86Subtarget &Subtarget) {
72 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
73 // convention is one that uses k registers.
74 if (NumElts == 2)
75 return {MVT::v2i64, 1};
76 if (NumElts == 4)
77 return {MVT::v4i32, 1};
78 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
79 CC != CallingConv::Intel_OCL_BI)
80 return {MVT::v8i16, 1};
81 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
82 CC != CallingConv::Intel_OCL_BI)
83 return {MVT::v16i8, 1};
84 // v32i1 passes in ymm unless we have BWI and the calling convention is
85 // regcall.
86 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
87 return {MVT::v32i8, 1};
88 // Split v64i1 vectors if we don't have v64i8 available.
89 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
90 if (Subtarget.useAVX512Regs())
91 return {MVT::v64i8, 1};
92 return {MVT::v32i8, 2};
93 }
94
95 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
96 if (!isPowerOf2_32(Value: NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
97 NumElts > 64)
98 return {MVT::i8, NumElts};
99
100 return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
101}
102
103MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
104 CallingConv::ID CC,
105 EVT VT) const {
106 if (VT.isVector()) {
107 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
108 unsigned NumElts = VT.getVectorNumElements();
109
110 MVT RegisterVT;
111 unsigned NumRegisters;
112 std::tie(args&: RegisterVT, args&: NumRegisters) =
113 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
114 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
115 return RegisterVT;
116 }
117
118 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
119 return MVT::v8f16;
120 }
121
122 // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
123 if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
124 !Subtarget.hasX87())
125 return MVT::i32;
126
127 if (isTypeLegal(VT: MVT::f16)) {
128 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
129 return getRegisterTypeForCallingConv(
130 Context, CC, VT: VT.changeVectorElementType(EltVT: MVT::f16));
131
132 if (VT == MVT::bf16)
133 return MVT::f16;
134 }
135
136 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
137}
138
139unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
140 CallingConv::ID CC,
141 EVT VT) const {
142 if (VT.isVector()) {
143 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
144 unsigned NumElts = VT.getVectorNumElements();
145
146 MVT RegisterVT;
147 unsigned NumRegisters;
148 std::tie(args&: RegisterVT, args&: NumRegisters) =
149 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
150 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
151 return NumRegisters;
152 }
153
154 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
155 return 1;
156 }
157
158 // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
159 // x87 is disabled.
160 if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
161 if (VT == MVT::f64)
162 return 2;
163 if (VT == MVT::f80)
164 return 3;
165 }
166
167 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 &&
168 isTypeLegal(VT: MVT::f16))
169 return getNumRegistersForCallingConv(Context, CC,
170 VT: VT.changeVectorElementType(EltVT: MVT::f16));
171
172 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
173}
174
175unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
176 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
177 unsigned &NumIntermediates, MVT &RegisterVT) const {
178 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
179 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
180 Subtarget.hasAVX512() &&
181 (!isPowerOf2_32(Value: VT.getVectorNumElements()) ||
182 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
183 VT.getVectorNumElements() > 64)) {
184 RegisterVT = MVT::i8;
185 IntermediateVT = MVT::i1;
186 NumIntermediates = VT.getVectorNumElements();
187 return NumIntermediates;
188 }
189
190 // Split v64i1 vectors if we don't have v64i8 available.
191 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
192 CC != CallingConv::X86_RegCall) {
193 RegisterVT = MVT::v32i8;
194 IntermediateVT = MVT::v32i1;
195 NumIntermediates = 2;
196 return 2;
197 }
198
199 // Split vNbf16 vectors according to vNf16.
200 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 &&
201 isTypeLegal(VT: MVT::f16))
202 VT = VT.changeVectorElementType(EltVT: MVT::f16);
203
204 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
205 NumIntermediates, RegisterVT);
206}
207
208EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
209 LLVMContext& Context,
210 EVT VT) const {
211 if (!VT.isVector())
212 return MVT::i8;
213
214 if (Subtarget.hasAVX512()) {
215 // Figure out what this type will be legalized to.
216 EVT LegalVT = VT;
217 while (getTypeAction(Context, VT: LegalVT) != TypeLegal)
218 LegalVT = getTypeToTransformTo(Context, VT: LegalVT);
219
220 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
221 if (LegalVT.getSimpleVT().is512BitVector())
222 return EVT::getVectorVT(Context, VT: MVT::i1, EC: VT.getVectorElementCount());
223
224 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
225 // If we legalized to less than a 512-bit vector, then we will use a vXi1
226 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
227 // vXi16/vXi8.
228 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
229 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
230 return EVT::getVectorVT(Context, VT: MVT::i1, EC: VT.getVectorElementCount());
231 }
232 }
233
234 return VT.changeVectorElementTypeToInteger();
235}
236
237bool X86TargetLowering::functionArgumentNeedsConsecutiveRegisters(
238 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
239 const DataLayout &DL) const {
240 // i128 split into i64 needs to be allocated to two consecutive registers,
241 // or spilled to the stack as a whole.
242 return Ty->isIntegerTy(Bitwidth: 128);
243}
244
245/// Helper for getByValTypeAlignment to determine
246/// the desired ByVal argument alignment.
247static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
248 if (MaxAlign == 16)
249 return;
250 if (VectorType *VTy = dyn_cast<VectorType>(Val: Ty)) {
251 if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
252 MaxAlign = Align(16);
253 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Val: Ty)) {
254 Align EltAlign;
255 getMaxByValAlign(Ty: ATy->getElementType(), MaxAlign&: EltAlign);
256 if (EltAlign > MaxAlign)
257 MaxAlign = EltAlign;
258 } else if (StructType *STy = dyn_cast<StructType>(Val: Ty)) {
259 for (auto *EltTy : STy->elements()) {
260 Align EltAlign;
261 getMaxByValAlign(Ty: EltTy, MaxAlign&: EltAlign);
262 if (EltAlign > MaxAlign)
263 MaxAlign = EltAlign;
264 if (MaxAlign == 16)
265 break;
266 }
267 }
268}
269
270/// Return the desired alignment for ByVal aggregate
271/// function arguments in the caller parameter area. For X86, aggregates
272/// that contain SSE vectors are placed at 16-byte boundaries while the rest
273/// are at 4-byte boundaries.
274Align X86TargetLowering::getByValTypeAlignment(Type *Ty,
275 const DataLayout &DL) const {
276 if (Subtarget.is64Bit())
277 return std::max(a: DL.getABITypeAlign(Ty), b: Align::Constant<8>());
278
279 Align Alignment(4);
280 if (Subtarget.hasSSE1())
281 getMaxByValAlign(Ty, MaxAlign&: Alignment);
282 return Alignment;
283}
284
285/// It returns EVT::Other if the type should be determined using generic
286/// target-independent logic.
287/// For vector ops we check that the overall size isn't larger than our
288/// preferred vector width.
289EVT X86TargetLowering::getOptimalMemOpType(
290 const MemOp &Op, const AttributeList &FuncAttributes) const {
291 if (!FuncAttributes.hasFnAttr(Kind: Attribute::NoImplicitFloat)) {
292 if (Op.size() >= 16 &&
293 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(AlignCheck: Align(16)))) {
294 // FIXME: Check if unaligned 64-byte accesses are slow.
295 if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() &&
296 (Subtarget.getPreferVectorWidth() >= 512)) {
297 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
298 }
299 // FIXME: Check if unaligned 32-byte accesses are slow.
300 if (Op.size() >= 32 && Subtarget.hasAVX() &&
301 Subtarget.useLight256BitInstructions()) {
302 // Although this isn't a well-supported type for AVX1, we'll let
303 // legalization and shuffle lowering produce the optimal codegen. If we
304 // choose an optimal type with a vector element larger than a byte,
305 // getMemsetStores() may create an intermediate splat (using an integer
306 // multiply) before we splat as a vector.
307 return MVT::v32i8;
308 }
309 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
310 return MVT::v16i8;
311 // TODO: Can SSE1 handle a byte vector?
312 // If we have SSE1 registers we should be able to use them.
313 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
314 (Subtarget.getPreferVectorWidth() >= 128))
315 return MVT::v4f32;
316 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
317 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
318 // Do not use f64 to lower memcpy if source is string constant. It's
319 // better to use i32 to avoid the loads.
320 // Also, do not use f64 to lower memset unless this is a memset of zeros.
321 // The gymnastics of splatting a byte value into an XMM register and then
322 // only using 8-byte stores (because this is a CPU with slow unaligned
323 // 16-byte accesses) makes that a loser.
324 return MVT::f64;
325 }
326 }
327 // This is a compromise. If we reach here, unaligned accesses may be slow on
328 // this target. However, creating smaller, aligned accesses could be even
329 // slower and would certainly be a lot more code.
330 if (Subtarget.is64Bit() && Op.size() >= 8)
331 return MVT::i64;
332 return MVT::i32;
333}
334
335bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
336 if (VT == MVT::f32)
337 return Subtarget.hasSSE1();
338 if (VT == MVT::f64)
339 return Subtarget.hasSSE2();
340 return true;
341}
342
343static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
344 return (8 * Alignment.value()) % SizeInBits == 0;
345}
346
347bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const {
348 if (isBitAligned(Alignment, SizeInBits: VT.getSizeInBits()))
349 return true;
350 switch (VT.getSizeInBits()) {
351 default:
352 // 8-byte and under are always assumed to be fast.
353 return true;
354 case 128:
355 return !Subtarget.isUnalignedMem16Slow();
356 case 256:
357 return !Subtarget.isUnalignedMem32Slow();
358 // TODO: What about AVX-512 (512-bit) accesses?
359 }
360}
361
362bool X86TargetLowering::allowsMisalignedMemoryAccesses(
363 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
364 unsigned *Fast) const {
365 if (Fast)
366 *Fast = isMemoryAccessFast(VT, Alignment);
367 // NonTemporal vector memory ops must be aligned.
368 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
369 // NT loads can only be vector aligned, so if its less aligned than the
370 // minimum vector size (which we can split the vector down to), we might as
371 // well use a regular unaligned vector load.
372 // We don't have any NT loads pre-SSE41.
373 if (!!(Flags & MachineMemOperand::MOLoad))
374 return (Alignment < 16 || !Subtarget.hasSSE41());
375 return false;
376 }
377 // Misaligned accesses of any size are always allowed.
378 return true;
379}
380
381bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,
382 const DataLayout &DL, EVT VT,
383 unsigned AddrSpace, Align Alignment,
384 MachineMemOperand::Flags Flags,
385 unsigned *Fast) const {
386 if (Fast)
387 *Fast = isMemoryAccessFast(VT, Alignment);
388 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
389 if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
390 /*Fast=*/nullptr))
391 return true;
392 // NonTemporal vector memory ops are special, and must be aligned.
393 if (!isBitAligned(Alignment, SizeInBits: VT.getSizeInBits()))
394 return false;
395 switch (VT.getSizeInBits()) {
396 case 128:
397 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
398 return true;
399 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
400 return true;
401 return false;
402 case 256:
403 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
404 return true;
405 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
406 return true;
407 return false;
408 case 512:
409 if (Subtarget.hasAVX512() && Subtarget.hasEVEX512())
410 return true;
411 return false;
412 default:
413 return false; // Don't have NonTemporal vector memory ops of this size.
414 }
415 }
416 return true;
417}
418
419/// Return the entry encoding for a jump table in the
420/// current function. The returned value is a member of the
421/// MachineJumpTableInfo::JTEntryKind enum.
422unsigned X86TargetLowering::getJumpTableEncoding() const {
423 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
424 // symbol.
425 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
426 return MachineJumpTableInfo::EK_Custom32;
427 if (isPositionIndependent() &&
428 getTargetMachine().getCodeModel() == CodeModel::Large &&
429 !Subtarget.isTargetCOFF())
430 return MachineJumpTableInfo::EK_LabelDifference64;
431
432 // Otherwise, use the normal jump table encoding heuristics.
433 return TargetLowering::getJumpTableEncoding();
434}
435
436bool X86TargetLowering::useSoftFloat() const {
437 return Subtarget.useSoftFloat();
438}
439
440void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
441 ArgListTy &Args) const {
442
443 // Only relabel X86-32 for C / Stdcall CCs.
444 if (Subtarget.is64Bit())
445 return;
446 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
447 return;
448 unsigned ParamRegs = 0;
449 if (auto *M = MF->getFunction().getParent())
450 ParamRegs = M->getNumberRegisterParameters();
451
452 // Mark the first N int arguments as having reg
453 for (auto &Arg : Args) {
454 Type *T = Arg.Ty;
455 if (T->isIntOrPtrTy())
456 if (MF->getDataLayout().getTypeAllocSize(Ty: T) <= 8) {
457 unsigned numRegs = 1;
458 if (MF->getDataLayout().getTypeAllocSize(Ty: T) > 4)
459 numRegs = 2;
460 if (ParamRegs < numRegs)
461 return;
462 ParamRegs -= numRegs;
463 Arg.IsInReg = true;
464 }
465 }
466}
467
468const MCExpr *
469X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
470 const MachineBasicBlock *MBB,
471 unsigned uid,MCContext &Ctx) const{
472 assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
473 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
474 // entries.
475 return MCSymbolRefExpr::create(Symbol: MBB->getSymbol(), specifier: X86::S_GOTOFF, Ctx);
476}
477
478/// Returns relocation base for the given PIC jumptable.
479SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
480 SelectionDAG &DAG) const {
481 if (!Subtarget.is64Bit())
482 // This doesn't have SDLoc associated with it, but is not really the
483 // same as a Register.
484 return DAG.getNode(Opcode: X86ISD::GlobalBaseReg, DL: SDLoc(),
485 VT: getPointerTy(DL: DAG.getDataLayout()));
486 return Table;
487}
488
489/// This returns the relocation base for the given PIC jumptable,
490/// the same as getPICJumpTableRelocBase, but as an MCExpr.
491const MCExpr *X86TargetLowering::
492getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
493 MCContext &Ctx) const {
494 // X86-64 uses RIP relative addressing based on the jump table label.
495 if (Subtarget.isPICStyleRIPRel() ||
496 (Subtarget.is64Bit() &&
497 getTargetMachine().getCodeModel() == CodeModel::Large))
498 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
499
500 // Otherwise, the reference is relative to the PIC base.
501 return MCSymbolRefExpr::create(Symbol: MF->getPICBaseSymbol(), Ctx);
502}
503
504std::pair<const TargetRegisterClass *, uint8_t>
505X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
506 MVT VT) const {
507 const TargetRegisterClass *RRC = nullptr;
508 uint8_t Cost = 1;
509 switch (VT.SimpleTy) {
510 default:
511 return TargetLowering::findRepresentativeClass(TRI, VT);
512 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
513 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
514 break;
515 case MVT::x86mmx:
516 RRC = &X86::VR64RegClass;
517 break;
518 case MVT::f32: case MVT::f64:
519 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
520 case MVT::v4f32: case MVT::v2f64:
521 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
522 case MVT::v8f32: case MVT::v4f64:
523 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
524 case MVT::v16f32: case MVT::v8f64:
525 RRC = &X86::VR128XRegClass;
526 break;
527 }
528 return std::make_pair(x&: RRC, y&: Cost);
529}
530
531unsigned X86TargetLowering::getAddressSpace() const {
532 if (Subtarget.is64Bit())
533 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? X86AS::GS
534 : X86AS::FS;
535 return X86AS::GS;
536}
537
538static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
539 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
540 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(Major: 17));
541}
542
543static Constant* SegmentOffset(IRBuilderBase &IRB,
544 int Offset, unsigned AddressSpace) {
545 return ConstantExpr::getIntToPtr(
546 C: ConstantInt::get(Ty: Type::getInt32Ty(C&: IRB.getContext()), V: Offset),
547 Ty: IRB.getPtrTy(AddrSpace: AddressSpace));
548}
549
550Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
551 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
552 // tcbhead_t; use it instead of the usual global variable (see
553 // sysdeps/{i386,x86_64}/nptl/tls.h)
554 if (hasStackGuardSlotTLS(TargetTriple: Subtarget.getTargetTriple())) {
555 unsigned AddressSpace = getAddressSpace();
556
557 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
558 if (Subtarget.isTargetFuchsia())
559 return SegmentOffset(IRB, Offset: 0x10, AddressSpace);
560
561 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
562 // Specially, some users may customize the base reg and offset.
563 int Offset = M->getStackProtectorGuardOffset();
564 // If we don't set -stack-protector-guard-offset value:
565 // %fs:0x28, unless we're using a Kernel code model, in which case
566 // it's %gs:0x28. gs:0x14 on i386.
567 if (Offset == INT_MAX)
568 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
569
570 StringRef GuardReg = M->getStackProtectorGuardReg();
571 if (GuardReg == "fs")
572 AddressSpace = X86AS::FS;
573 else if (GuardReg == "gs")
574 AddressSpace = X86AS::GS;
575
576 // Use symbol guard if user specify.
577 StringRef GuardSymb = M->getStackProtectorGuardSymbol();
578 if (!GuardSymb.empty()) {
579 GlobalVariable *GV = M->getGlobalVariable(Name: GuardSymb);
580 if (!GV) {
581 Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(C&: M->getContext())
582 : Type::getInt32Ty(C&: M->getContext());
583 GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
584 nullptr, GuardSymb, nullptr,
585 GlobalValue::NotThreadLocal, AddressSpace);
586 if (!Subtarget.isTargetDarwin())
587 GV->setDSOLocal(M->getDirectAccessExternalData());
588 }
589 return GV;
590 }
591
592 return SegmentOffset(IRB, Offset, AddressSpace);
593 }
594 return TargetLowering::getIRStackGuard(IRB);
595}
596
597void X86TargetLowering::insertSSPDeclarations(Module &M) const {
598 // MSVC CRT provides functionalities for stack protection.
599 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
600 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
601 // MSVC CRT has a global variable holding security cookie.
602 M.getOrInsertGlobal(Name: "__security_cookie",
603 Ty: PointerType::getUnqual(C&: M.getContext()));
604
605 // MSVC CRT has a function to validate security cookie.
606 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
607 Name: "__security_check_cookie", RetTy: Type::getVoidTy(C&: M.getContext()),
608 Args: PointerType::getUnqual(C&: M.getContext()));
609 if (Function *F = dyn_cast<Function>(Val: SecurityCheckCookie.getCallee())) {
610 F->setCallingConv(CallingConv::X86_FastCall);
611 F->addParamAttr(ArgNo: 0, Kind: Attribute::AttrKind::InReg);
612 }
613 return;
614 }
615
616 StringRef GuardMode = M.getStackProtectorGuard();
617
618 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
619 if ((GuardMode == "tls" || GuardMode.empty()) &&
620 hasStackGuardSlotTLS(TargetTriple: Subtarget.getTargetTriple()))
621 return;
622 TargetLowering::insertSSPDeclarations(M);
623}
624
625Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
626 // MSVC CRT has a global variable holding security cookie.
627 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
628 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
629 return M.getGlobalVariable(Name: "__security_cookie");
630 }
631 return TargetLowering::getSDagStackGuard(M);
632}
633
634Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
635 // MSVC CRT has a function to validate security cookie.
636 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
637 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
638 return M.getFunction(Name: "__security_check_cookie");
639 }
640 return TargetLowering::getSSPStackGuardCheck(M);
641}
642
643Value *
644X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
645 // Android provides a fixed TLS slot for the SafeStack pointer. See the
646 // definition of TLS_SLOT_SAFESTACK in
647 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
648 if (Subtarget.isTargetAndroid()) {
649 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
650 // %gs:0x24 on i386
651 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
652 return SegmentOffset(IRB, Offset, AddressSpace: getAddressSpace());
653 }
654
655 // Fuchsia is similar.
656 if (Subtarget.isTargetFuchsia()) {
657 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
658 return SegmentOffset(IRB, Offset: 0x18, AddressSpace: getAddressSpace());
659 }
660
661 return TargetLowering::getSafeStackPointerLocation(IRB);
662}
663
664//===----------------------------------------------------------------------===//
665// Return Value Calling Convention Implementation
666//===----------------------------------------------------------------------===//
667
668bool X86TargetLowering::CanLowerReturn(
669 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
670 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
671 const Type *RetTy) const {
672 SmallVector<CCValAssign, 16> RVLocs;
673 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
674 return CCInfo.CheckReturn(Outs, Fn: RetCC_X86);
675}
676
677const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
678 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
679 return ScratchRegs;
680}
681
682ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
683 static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
684 return RCRegs;
685}
686
687/// Lowers masks values (v*i1) to the local register values
688/// \returns DAG node after lowering to register type
689static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
690 const SDLoc &DL, SelectionDAG &DAG) {
691 EVT ValVT = ValArg.getValueType();
692
693 if (ValVT == MVT::v1i1)
694 return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: ValLoc, N1: ValArg,
695 N2: DAG.getIntPtrConstant(Val: 0, DL));
696
697 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
698 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
699 // Two stage lowering might be required
700 // bitcast: v8i1 -> i8 / v16i1 -> i16
701 // anyextend: i8 -> i32 / i16 -> i32
702 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
703 SDValue ValToCopy = DAG.getBitcast(VT: TempValLoc, V: ValArg);
704 if (ValLoc == MVT::i32)
705 ValToCopy = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ValLoc, Operand: ValToCopy);
706 return ValToCopy;
707 }
708
709 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
710 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
711 // One stage lowering is required
712 // bitcast: v32i1 -> i32 / v64i1 -> i64
713 return DAG.getBitcast(VT: ValLoc, V: ValArg);
714 }
715
716 return DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ValLoc, Operand: ValArg);
717}
718
719/// Breaks v64i1 value into two registers and adds the new node to the DAG
720static void Passv64i1ArgInRegs(
721 const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
722 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
723 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
724 assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
725 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
726 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
727 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
728 "The value should reside in two registers");
729
730 // Before splitting the value we cast it to i64
731 Arg = DAG.getBitcast(VT: MVT::i64, V: Arg);
732
733 // Splitting the value into two i32 types
734 SDValue Lo, Hi;
735 std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: Arg, DL, LoVT: MVT::i32, HiVT: MVT::i32);
736
737 // Attach the two i32 types into corresponding registers
738 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Lo));
739 RegsToPass.push_back(Elt: std::make_pair(x: NextVA.getLocReg(), y&: Hi));
740}
741
742SDValue
743X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
744 bool isVarArg,
745 const SmallVectorImpl<ISD::OutputArg> &Outs,
746 const SmallVectorImpl<SDValue> &OutVals,
747 const SDLoc &dl, SelectionDAG &DAG) const {
748 MachineFunction &MF = DAG.getMachineFunction();
749 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
750
751 // In some cases we need to disable registers from the default CSR list.
752 // For example, when they are used as return registers (preserve_* and X86's
753 // regcall) or for argument passing (X86's regcall).
754 bool ShouldDisableCalleeSavedRegister =
755 shouldDisableRetRegFromCSR(CC: CallConv) ||
756 MF.getFunction().hasFnAttribute(Kind: "no_caller_saved_registers");
757
758 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
759 report_fatal_error(reason: "X86 interrupts may not return any value");
760
761 SmallVector<CCValAssign, 16> RVLocs;
762 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
763 CCInfo.AnalyzeReturn(Outs, Fn: RetCC_X86);
764
765 SmallVector<std::pair<Register, SDValue>, 4> RetVals;
766 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
767 ++I, ++OutsIndex) {
768 CCValAssign &VA = RVLocs[I];
769 assert(VA.isRegLoc() && "Can only return in registers!");
770
771 // Add the register to the CalleeSaveDisableRegs list.
772 if (ShouldDisableCalleeSavedRegister)
773 MF.getRegInfo().disableCalleeSavedRegister(Reg: VA.getLocReg());
774
775 SDValue ValToCopy = OutVals[OutsIndex];
776 EVT ValVT = ValToCopy.getValueType();
777
778 // Promote values to the appropriate types.
779 if (VA.getLocInfo() == CCValAssign::SExt)
780 ValToCopy = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: ValToCopy);
781 else if (VA.getLocInfo() == CCValAssign::ZExt)
782 ValToCopy = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: ValToCopy);
783 else if (VA.getLocInfo() == CCValAssign::AExt) {
784 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
785 ValToCopy = lowerMasksToReg(ValArg: ValToCopy, ValLoc: VA.getLocVT(), DL: dl, DAG);
786 else
787 ValToCopy = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: ValToCopy);
788 }
789 else if (VA.getLocInfo() == CCValAssign::BCvt)
790 ValToCopy = DAG.getBitcast(VT: VA.getLocVT(), V: ValToCopy);
791
792 assert(VA.getLocInfo() != CCValAssign::FPExt &&
793 "Unexpected FP-extend for return value.");
794
795 // Report an error if we have attempted to return a value via an XMM
796 // register and SSE was disabled.
797 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(Reg: VA.getLocReg())) {
798 errorUnsupported(DAG, dl, Msg: "SSE register return with SSE disabled");
799 VA.convertToReg(Reg: X86::FP0); // Set reg to FP0, avoid hitting asserts.
800 } else if (!Subtarget.hasSSE2() &&
801 X86::FR64XRegClass.contains(Reg: VA.getLocReg()) &&
802 ValVT == MVT::f64) {
803 // When returning a double via an XMM register, report an error if SSE2 is
804 // not enabled.
805 errorUnsupported(DAG, dl, Msg: "SSE2 register return with SSE2 disabled");
806 VA.convertToReg(Reg: X86::FP0); // Set reg to FP0, avoid hitting asserts.
807 }
808
809 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
810 // the RET instruction and handled by the FP Stackifier.
811 if (VA.getLocReg() == X86::FP0 ||
812 VA.getLocReg() == X86::FP1) {
813 // If this is a copy from an xmm register to ST(0), use an FPExtend to
814 // change the value to the FP stack register class.
815 if (isScalarFPTypeInSSEReg(VT: VA.getValVT()))
816 ValToCopy = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f80, Operand: ValToCopy);
817 RetVals.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ValToCopy));
818 // Don't emit a copytoreg.
819 continue;
820 }
821
822 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
823 // which is returned in RAX / RDX.
824 if (Subtarget.is64Bit()) {
825 if (ValVT == MVT::x86mmx) {
826 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
827 ValToCopy = DAG.getBitcast(VT: MVT::i64, V: ValToCopy);
828 ValToCopy = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL: dl, VT: MVT::v2i64,
829 Operand: ValToCopy);
830 // If we don't have SSE2 available, convert to v4f32 so the generated
831 // register is legal.
832 if (!Subtarget.hasSSE2())
833 ValToCopy = DAG.getBitcast(VT: MVT::v4f32, V: ValToCopy);
834 }
835 }
836 }
837
838 if (VA.needsCustom()) {
839 assert(VA.getValVT() == MVT::v64i1 &&
840 "Currently the only custom case is when we split v64i1 to 2 regs");
841
842 Passv64i1ArgInRegs(DL: dl, DAG, Arg&: ValToCopy, RegsToPass&: RetVals, VA, NextVA&: RVLocs[++I],
843 Subtarget);
844
845 // Add the second register to the CalleeSaveDisableRegs list.
846 if (ShouldDisableCalleeSavedRegister)
847 MF.getRegInfo().disableCalleeSavedRegister(Reg: RVLocs[I].getLocReg());
848 } else {
849 RetVals.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ValToCopy));
850 }
851 }
852
853 SDValue Glue;
854 SmallVector<SDValue, 6> RetOps;
855 RetOps.push_back(Elt: Chain); // Operand #0 = Chain (updated below)
856 // Operand #1 = Bytes To Pop
857 RetOps.push_back(Elt: DAG.getTargetConstant(Val: FuncInfo->getBytesToPopOnReturn(), DL: dl,
858 VT: MVT::i32));
859
860 // Copy the result values into the output registers.
861 for (auto &RetVal : RetVals) {
862 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
863 RetOps.push_back(Elt: RetVal.second);
864 continue; // Don't emit a copytoreg.
865 }
866
867 Chain = DAG.getCopyToReg(Chain, dl, Reg: RetVal.first, N: RetVal.second, Glue);
868 Glue = Chain.getValue(R: 1);
869 RetOps.push_back(
870 Elt: DAG.getRegister(Reg: RetVal.first, VT: RetVal.second.getValueType()));
871 }
872
873 // Swift calling convention does not require we copy the sret argument
874 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
875
876 // All x86 ABIs require that for returning structs by value we copy
877 // the sret argument into %rax/%eax (depending on ABI) for the return.
878 // We saved the argument into a virtual register in the entry block,
879 // so now we copy the value out and into %rax/%eax.
880 //
881 // Checking Function.hasStructRetAttr() here is insufficient because the IR
882 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
883 // false, then an sret argument may be implicitly inserted in the SelDAG. In
884 // either case FuncInfo->setSRetReturnReg() will have been called.
885 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
886 // When we have both sret and another return value, we should use the
887 // original Chain stored in RetOps[0], instead of the current Chain updated
888 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
889
890 // For the case of sret and another return value, we have
891 // Chain_0 at the function entry
892 // Chain_1 = getCopyToReg(Chain_0) in the above loop
893 // If we use Chain_1 in getCopyFromReg, we will have
894 // Val = getCopyFromReg(Chain_1)
895 // Chain_2 = getCopyToReg(Chain_1, Val) from below
896
897 // getCopyToReg(Chain_0) will be glued together with
898 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
899 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
900 // Data dependency from Unit B to Unit A due to usage of Val in
901 // getCopyToReg(Chain_1, Val)
902 // Chain dependency from Unit A to Unit B
903
904 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
905 SDValue Val = DAG.getCopyFromReg(Chain: RetOps[0], dl, Reg: SRetReg,
906 VT: getPointerTy(DL: MF.getDataLayout()));
907
908 Register RetValReg
909 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
910 X86::RAX : X86::EAX;
911 Chain = DAG.getCopyToReg(Chain, dl, Reg: RetValReg, N: Val, Glue);
912 Glue = Chain.getValue(R: 1);
913
914 // RAX/EAX now acts like a return value.
915 RetOps.push_back(
916 Elt: DAG.getRegister(Reg: RetValReg, VT: getPointerTy(DL: DAG.getDataLayout())));
917
918 // Add the returned register to the CalleeSaveDisableRegs list. Don't do
919 // this however for preserve_most/preserve_all to minimize the number of
920 // callee-saved registers for these CCs.
921 if (ShouldDisableCalleeSavedRegister &&
922 CallConv != CallingConv::PreserveAll &&
923 CallConv != CallingConv::PreserveMost)
924 MF.getRegInfo().disableCalleeSavedRegister(Reg: RetValReg);
925 }
926
927 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
928 const MCPhysReg *I =
929 TRI->getCalleeSavedRegsViaCopy(MF: &DAG.getMachineFunction());
930 if (I) {
931 for (; *I; ++I) {
932 if (X86::GR64RegClass.contains(Reg: *I))
933 RetOps.push_back(Elt: DAG.getRegister(Reg: *I, VT: MVT::i64));
934 else
935 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
936 }
937 }
938
939 RetOps[0] = Chain; // Update chain.
940
941 // Add the glue if we have it.
942 if (Glue.getNode())
943 RetOps.push_back(Elt: Glue);
944
945 X86ISD::NodeType opcode = X86ISD::RET_GLUE;
946 if (CallConv == CallingConv::X86_INTR)
947 opcode = X86ISD::IRET;
948 return DAG.getNode(Opcode: opcode, DL: dl, VT: MVT::Other, Ops: RetOps);
949}
950
951bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
952 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(NUses: 1, Value: 0))
953 return false;
954
955 SDValue TCChain = Chain;
956 SDNode *Copy = *N->user_begin();
957 if (Copy->getOpcode() == ISD::CopyToReg) {
958 // If the copy has a glue operand, we conservatively assume it isn't safe to
959 // perform a tail call.
960 if (Copy->getOperand(Num: Copy->getNumOperands()-1).getValueType() == MVT::Glue)
961 return false;
962 TCChain = Copy->getOperand(Num: 0);
963 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
964 return false;
965
966 bool HasRet = false;
967 for (const SDNode *U : Copy->users()) {
968 if (U->getOpcode() != X86ISD::RET_GLUE)
969 return false;
970 // If we are returning more than one value, we can definitely
971 // not make a tail call see PR19530
972 if (U->getNumOperands() > 4)
973 return false;
974 if (U->getNumOperands() == 4 &&
975 U->getOperand(Num: U->getNumOperands() - 1).getValueType() != MVT::Glue)
976 return false;
977 HasRet = true;
978 }
979
980 if (!HasRet)
981 return false;
982
983 Chain = TCChain;
984 return true;
985}
986
987EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
988 ISD::NodeType ExtendKind) const {
989 MVT ReturnMVT = MVT::i32;
990
991 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
992 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
993 // The ABI does not require i1, i8 or i16 to be extended.
994 //
995 // On Darwin, there is code in the wild relying on Clang's old behaviour of
996 // always extending i8/i16 return values, so keep doing that for now.
997 // (PR26665).
998 ReturnMVT = MVT::i8;
999 }
1000
1001 EVT MinVT = getRegisterType(Context, VT: ReturnMVT);
1002 return VT.bitsLT(VT: MinVT) ? MinVT : VT;
1003}
1004
1005/// Reads two 32 bit registers and creates a 64 bit mask value.
1006/// \param VA The current 32 bit value that need to be assigned.
1007/// \param NextVA The next 32 bit value that need to be assigned.
1008/// \param Root The parent DAG node.
1009/// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
1010/// glue purposes. In the case the DAG is already using
1011/// physical register instead of virtual, we should glue
1012/// our new SDValue to InGlue SDvalue.
1013/// \return a new SDvalue of size 64bit.
1014static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
1015 SDValue &Root, SelectionDAG &DAG,
1016 const SDLoc &DL, const X86Subtarget &Subtarget,
1017 SDValue *InGlue = nullptr) {
1018 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
1019 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
1020 assert(VA.getValVT() == MVT::v64i1 &&
1021 "Expecting first location of 64 bit width type");
1022 assert(NextVA.getValVT() == VA.getValVT() &&
1023 "The locations should have the same type");
1024 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
1025 "The values should reside in two registers");
1026
1027 SDValue Lo, Hi;
1028 SDValue ArgValueLo, ArgValueHi;
1029
1030 MachineFunction &MF = DAG.getMachineFunction();
1031 const TargetRegisterClass *RC = &X86::GR32RegClass;
1032
1033 // Read a 32 bit value from the registers.
1034 if (nullptr == InGlue) {
1035 // When no physical register is present,
1036 // create an intermediate virtual register.
1037 Register Reg = MF.addLiveIn(PReg: VA.getLocReg(), RC);
1038 ArgValueLo = DAG.getCopyFromReg(Chain: Root, dl: DL, Reg, VT: MVT::i32);
1039 Reg = MF.addLiveIn(PReg: NextVA.getLocReg(), RC);
1040 ArgValueHi = DAG.getCopyFromReg(Chain: Root, dl: DL, Reg, VT: MVT::i32);
1041 } else {
1042 // When a physical register is available read the value from it and glue
1043 // the reads together.
1044 ArgValueLo =
1045 DAG.getCopyFromReg(Chain: Root, dl: DL, Reg: VA.getLocReg(), VT: MVT::i32, Glue: *InGlue);
1046 *InGlue = ArgValueLo.getValue(R: 2);
1047 ArgValueHi =
1048 DAG.getCopyFromReg(Chain: Root, dl: DL, Reg: NextVA.getLocReg(), VT: MVT::i32, Glue: *InGlue);
1049 *InGlue = ArgValueHi.getValue(R: 2);
1050 }
1051
1052 // Convert the i32 type into v32i1 type.
1053 Lo = DAG.getBitcast(VT: MVT::v32i1, V: ArgValueLo);
1054
1055 // Convert the i32 type into v32i1 type.
1056 Hi = DAG.getBitcast(VT: MVT::v32i1, V: ArgValueHi);
1057
1058 // Concatenate the two values together.
1059 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: MVT::v64i1, N1: Lo, N2: Hi);
1060}
1061
1062/// The function will lower a register of various sizes (8/16/32/64)
1063/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
1064/// \returns a DAG node contains the operand after lowering to mask type.
1065static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
1066 const EVT &ValLoc, const SDLoc &DL,
1067 SelectionDAG &DAG) {
1068 SDValue ValReturned = ValArg;
1069
1070 if (ValVT == MVT::v1i1)
1071 return DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: MVT::v1i1, Operand: ValReturned);
1072
1073 if (ValVT == MVT::v64i1) {
1074 // In 32 bit machine, this case is handled by getv64i1Argument
1075 assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
1076 // In 64 bit machine, There is no need to truncate the value only bitcast
1077 } else {
1078 MVT MaskLenVT;
1079 switch (ValVT.getSimpleVT().SimpleTy) {
1080 case MVT::v8i1:
1081 MaskLenVT = MVT::i8;
1082 break;
1083 case MVT::v16i1:
1084 MaskLenVT = MVT::i16;
1085 break;
1086 case MVT::v32i1:
1087 MaskLenVT = MVT::i32;
1088 break;
1089 default:
1090 llvm_unreachable("Expecting a vector of i1 types");
1091 }
1092
1093 ValReturned = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MaskLenVT, Operand: ValReturned);
1094 }
1095 return DAG.getBitcast(VT: ValVT, V: ValReturned);
1096}
1097
1098static SDValue getPopFromX87Reg(SelectionDAG &DAG, SDValue Chain,
1099 const SDLoc &dl, Register Reg, EVT VT,
1100 SDValue Glue) {
1101 SDVTList VTs = DAG.getVTList(VT1: VT, VT2: MVT::Other, VT3: MVT::Glue);
1102 SDValue Ops[] = {Chain, DAG.getRegister(Reg, VT), Glue};
1103 return DAG.getNode(Opcode: X86ISD::POP_FROM_X87_REG, DL: dl, VTList: VTs,
1104 Ops: ArrayRef(Ops, Glue.getNode() ? 3 : 2));
1105}
1106
1107/// Lower the result values of a call into the
1108/// appropriate copies out of appropriate physical registers.
1109///
1110SDValue X86TargetLowering::LowerCallResult(
1111 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1112 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1113 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
1114 uint32_t *RegMask) const {
1115
1116 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1117 // Assign locations to each value returned by this call.
1118 SmallVector<CCValAssign, 16> RVLocs;
1119 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1120 *DAG.getContext());
1121 CCInfo.AnalyzeCallResult(Ins, Fn: RetCC_X86);
1122
1123 // Copy all of the result registers out of their specified physreg.
1124 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
1125 ++I, ++InsIndex) {
1126 CCValAssign &VA = RVLocs[I];
1127 EVT CopyVT = VA.getLocVT();
1128
1129 // In some calling conventions we need to remove the used registers
1130 // from the register mask.
1131 if (RegMask) {
1132 for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg: VA.getLocReg()))
1133 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
1134 }
1135
1136 // Report an error if there was an attempt to return FP values via XMM
1137 // registers.
1138 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(Reg: VA.getLocReg())) {
1139 errorUnsupported(DAG, dl, Msg: "SSE register return with SSE disabled");
1140 if (VA.getLocReg() == X86::XMM1)
1141 VA.convertToReg(Reg: X86::FP1); // Set reg to FP1, avoid hitting asserts.
1142 else
1143 VA.convertToReg(Reg: X86::FP0); // Set reg to FP0, avoid hitting asserts.
1144 } else if (!Subtarget.hasSSE2() &&
1145 X86::FR64XRegClass.contains(Reg: VA.getLocReg()) &&
1146 CopyVT == MVT::f64) {
1147 errorUnsupported(DAG, dl, Msg: "SSE2 register return with SSE2 disabled");
1148 if (VA.getLocReg() == X86::XMM1)
1149 VA.convertToReg(Reg: X86::FP1); // Set reg to FP1, avoid hitting asserts.
1150 else
1151 VA.convertToReg(Reg: X86::FP0); // Set reg to FP0, avoid hitting asserts.
1152 }
1153
1154 // If we prefer to use the value in xmm registers, copy it out as f80 and
1155 // use a truncate to move it from fp stack reg to xmm reg.
1156 bool RoundAfterCopy = false;
1157 bool X87Result = VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1;
1158 if (X87Result && isScalarFPTypeInSSEReg(VT: VA.getValVT())) {
1159 if (!Subtarget.hasX87())
1160 report_fatal_error(reason: "X87 register return with X87 disabled");
1161 CopyVT = MVT::f80;
1162 RoundAfterCopy = (CopyVT != VA.getLocVT());
1163 }
1164
1165 SDValue Val;
1166 if (VA.needsCustom()) {
1167 assert(VA.getValVT() == MVT::v64i1 &&
1168 "Currently the only custom case is when we split v64i1 to 2 regs");
1169 Val =
1170 getv64i1Argument(VA, NextVA&: RVLocs[++I], Root&: Chain, DAG, DL: dl, Subtarget, InGlue: &InGlue);
1171 } else {
1172 Chain =
1173 X87Result
1174 ? getPopFromX87Reg(DAG, Chain, dl, Reg: VA.getLocReg(), VT: CopyVT, Glue: InGlue)
1175 .getValue(R: 1)
1176 : DAG.getCopyFromReg(Chain, dl, Reg: VA.getLocReg(), VT: CopyVT, Glue: InGlue)
1177 .getValue(R: 1);
1178 Val = Chain.getValue(R: 0);
1179 InGlue = Chain.getValue(R: 2);
1180 }
1181
1182 if (RoundAfterCopy)
1183 Val = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: VA.getValVT(), N1: Val,
1184 // This truncation won't change the value.
1185 N2: DAG.getIntPtrConstant(Val: 1, DL: dl, /*isTarget=*/true));
1186
1187 if (VA.isExtInLoc()) {
1188 if (VA.getValVT().isVector() &&
1189 VA.getValVT().getScalarType() == MVT::i1 &&
1190 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1191 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1192 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1193 Val = lowerRegToMasks(ValArg: Val, ValVT: VA.getValVT(), ValLoc: VA.getLocVT(), DL: dl, DAG);
1194 } else
1195 Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
1196 }
1197
1198 if (VA.getLocInfo() == CCValAssign::BCvt)
1199 Val = DAG.getBitcast(VT: VA.getValVT(), V: Val);
1200
1201 InVals.push_back(Elt: Val);
1202 }
1203
1204 return Chain;
1205}
1206
1207//===----------------------------------------------------------------------===//
1208// C & StdCall & Fast Calling Convention implementation
1209//===----------------------------------------------------------------------===//
1210// StdCall calling convention seems to be standard for many Windows' API
1211// routines and around. It differs from C calling convention just a little:
1212// callee should clean up the stack, not caller. Symbols should be also
1213// decorated in some fancy way :) It doesn't support any vector arguments.
1214// For info on fast calling convention see Fast Calling Convention (tail call)
1215// implementation LowerX86_32FastCCCallTo.
1216
1217/// Determines whether Args, either a set of outgoing arguments to a call, or a
1218/// set of incoming args of a call, contains an sret pointer that the callee
1219/// pops
1220template <typename T>
1221static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
1222 const X86Subtarget &Subtarget) {
1223 // Not C++20 (yet), so no concepts available.
1224 static_assert(std::is_same_v<T, ISD::OutputArg> ||
1225 std::is_same_v<T, ISD::InputArg>,
1226 "requires ISD::OutputArg or ISD::InputArg");
1227
1228 // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
1229 // for most compilations.
1230 if (!Subtarget.is32Bit())
1231 return false;
1232
1233 if (Args.empty())
1234 return false;
1235
1236 // Most calls do not have an sret argument, check the arg next.
1237 const ISD::ArgFlagsTy &Flags = Args[0].Flags;
1238 if (!Flags.isSRet() || Flags.isInReg())
1239 return false;
1240
1241 // The MSVCabi does not pop the sret.
1242 if (Subtarget.getTargetTriple().isOSMSVCRT())
1243 return false;
1244
1245 // MCUs don't pop the sret
1246 if (Subtarget.isTargetMCU())
1247 return false;
1248
1249 // Callee pops argument
1250 return true;
1251}
1252
1253/// Make a copy of an aggregate at address specified by "Src" to address
1254/// "Dst" with size and alignment information specified by the specific
1255/// parameter attribute. The copy will be passed as a byval function parameter.
1256static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
1257 SDValue Chain, ISD::ArgFlagsTy Flags,
1258 SelectionDAG &DAG, const SDLoc &dl) {
1259 SDValue SizeNode = DAG.getIntPtrConstant(Val: Flags.getByValSize(), DL: dl);
1260
1261 return DAG.getMemcpy(
1262 Chain, dl, Dst, Src, Size: SizeNode, Alignment: Flags.getNonZeroByValAlign(),
1263 /*isVolatile*/ isVol: false, /*AlwaysInline=*/true,
1264 /*CI=*/nullptr, OverrideTailCall: std::nullopt, DstPtrInfo: MachinePointerInfo(), SrcPtrInfo: MachinePointerInfo());
1265}
1266
1267/// Return true if the calling convention is one that we can guarantee TCO for.
1268static bool canGuaranteeTCO(CallingConv::ID CC) {
1269 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
1270 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
1271 CC == CallingConv::Tail || CC == CallingConv::SwiftTail);
1272}
1273
1274/// Return true if we might ever do TCO for calls with this calling convention.
1275static bool mayTailCallThisCC(CallingConv::ID CC) {
1276 switch (CC) {
1277 // C calling conventions:
1278 case CallingConv::C:
1279 case CallingConv::Win64:
1280 case CallingConv::X86_64_SysV:
1281 case CallingConv::PreserveNone:
1282 // Callee pop conventions:
1283 case CallingConv::X86_ThisCall:
1284 case CallingConv::X86_StdCall:
1285 case CallingConv::X86_VectorCall:
1286 case CallingConv::X86_FastCall:
1287 // Swift:
1288 case CallingConv::Swift:
1289 return true;
1290 default:
1291 return canGuaranteeTCO(CC);
1292 }
1293}
1294
1295/// Return true if the function is being made into a tailcall target by
1296/// changing its ABI.
1297static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
1298 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
1299 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
1300}
1301
1302bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1303 if (!CI->isTailCall())
1304 return false;
1305
1306 CallingConv::ID CalleeCC = CI->getCallingConv();
1307 if (!mayTailCallThisCC(CC: CalleeCC))
1308 return false;
1309
1310 return true;
1311}
1312
1313SDValue
1314X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1315 const SmallVectorImpl<ISD::InputArg> &Ins,
1316 const SDLoc &dl, SelectionDAG &DAG,
1317 const CCValAssign &VA,
1318 MachineFrameInfo &MFI, unsigned i) const {
1319 // Create the nodes corresponding to a load from this parameter slot.
1320 ISD::ArgFlagsTy Flags = Ins[i].Flags;
1321 bool AlwaysUseMutable = shouldGuaranteeTCO(
1322 CC: CallConv, GuaranteedTailCallOpt: DAG.getTarget().Options.GuaranteedTailCallOpt);
1323 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
1324 EVT ValVT;
1325 MVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
1326
1327 // If value is passed by pointer we have address passed instead of the value
1328 // itself. No need to extend if the mask value and location share the same
1329 // absolute size.
1330 bool ExtendedInMem =
1331 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
1332 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
1333
1334 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
1335 ValVT = VA.getLocVT();
1336 else
1337 ValVT = VA.getValVT();
1338
1339 // FIXME: For now, all byval parameter objects are marked mutable. This can be
1340 // changed with more analysis.
1341 // In case of tail call optimization mark all arguments mutable. Since they
1342 // could be overwritten by lowering of arguments in case of a tail call.
1343 if (Flags.isByVal()) {
1344 unsigned Bytes = Flags.getByValSize();
1345 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
1346
1347 // FIXME: For now, all byval parameter objects are marked as aliasing. This
1348 // can be improved with deeper analysis.
1349 int FI = MFI.CreateFixedObject(Size: Bytes, SPOffset: VA.getLocMemOffset(), IsImmutable: isImmutable,
1350 /*isAliased=*/true);
1351 return DAG.getFrameIndex(FI, VT: PtrVT);
1352 }
1353
1354 EVT ArgVT = Ins[i].ArgVT;
1355
1356 // If this is a vector that has been split into multiple parts, don't elide
1357 // the copy. The layout on the stack may not match the packed in-memory
1358 // layout.
1359 bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
1360
1361 // This is an argument in memory. We might be able to perform copy elision.
1362 // If the argument is passed directly in memory without any extension, then we
1363 // can perform copy elision. Large vector types, for example, may be passed
1364 // indirectly by pointer.
1365 if (Flags.isCopyElisionCandidate() &&
1366 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
1367 !ScalarizedVector) {
1368 SDValue PartAddr;
1369 if (Ins[i].PartOffset == 0) {
1370 // If this is a one-part value or the first part of a multi-part value,
1371 // create a stack object for the entire argument value type and return a
1372 // load from our portion of it. This assumes that if the first part of an
1373 // argument is in memory, the rest will also be in memory.
1374 int FI = MFI.CreateFixedObject(Size: ArgVT.getStoreSize(), SPOffset: VA.getLocMemOffset(),
1375 /*IsImmutable=*/false);
1376 PartAddr = DAG.getFrameIndex(FI, VT: PtrVT);
1377 return DAG.getLoad(
1378 VT: ValVT, dl, Chain, Ptr: PartAddr,
1379 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI));
1380 }
1381
1382 // This is not the first piece of an argument in memory. See if there is
1383 // already a fixed stack object including this offset. If so, assume it
1384 // was created by the PartOffset == 0 branch above and create a load from
1385 // the appropriate offset into it.
1386 int64_t PartBegin = VA.getLocMemOffset();
1387 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
1388 int FI = MFI.getObjectIndexBegin();
1389 for (; MFI.isFixedObjectIndex(ObjectIdx: FI); ++FI) {
1390 int64_t ObjBegin = MFI.getObjectOffset(ObjectIdx: FI);
1391 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(ObjectIdx: FI);
1392 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
1393 break;
1394 }
1395 if (MFI.isFixedObjectIndex(ObjectIdx: FI)) {
1396 SDValue Addr =
1397 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: DAG.getFrameIndex(FI, VT: PtrVT),
1398 N2: DAG.getIntPtrConstant(Val: Ins[i].PartOffset, DL: dl));
1399 return DAG.getLoad(VT: ValVT, dl, Chain, Ptr: Addr,
1400 PtrInfo: MachinePointerInfo::getFixedStack(
1401 MF&: DAG.getMachineFunction(), FI, Offset: Ins[i].PartOffset));
1402 }
1403 }
1404
1405 int FI = MFI.CreateFixedObject(Size: ValVT.getSizeInBits() / 8,
1406 SPOffset: VA.getLocMemOffset(), IsImmutable: isImmutable);
1407
1408 // Set SExt or ZExt flag.
1409 if (VA.getLocInfo() == CCValAssign::ZExt) {
1410 MFI.setObjectZExt(ObjectIdx: FI, IsZExt: true);
1411 } else if (VA.getLocInfo() == CCValAssign::SExt) {
1412 MFI.setObjectSExt(ObjectIdx: FI, IsSExt: true);
1413 }
1414
1415 MaybeAlign Alignment;
1416 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1417 ValVT != MVT::f80)
1418 Alignment = MaybeAlign(4);
1419 SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
1420 SDValue Val = DAG.getLoad(
1421 VT: ValVT, dl, Chain, Ptr: FIN,
1422 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI),
1423 Alignment);
1424 return ExtendedInMem
1425 ? (VA.getValVT().isVector()
1426 ? DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL: dl, VT: VA.getValVT(), Operand: Val)
1427 : DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val))
1428 : Val;
1429}
1430
1431// FIXME: Get this from tablegen.
1432static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
1433 const X86Subtarget &Subtarget) {
1434 assert(Subtarget.is64Bit());
1435
1436 if (Subtarget.isCallingConvWin64(CC: CallConv)) {
1437 static const MCPhysReg GPR64ArgRegsWin64[] = {
1438 X86::RCX, X86::RDX, X86::R8, X86::R9
1439 };
1440 return GPR64ArgRegsWin64;
1441 }
1442
1443 static const MCPhysReg GPR64ArgRegs64Bit[] = {
1444 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
1445 };
1446 return GPR64ArgRegs64Bit;
1447}
1448
1449// FIXME: Get this from tablegen.
1450static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
1451 CallingConv::ID CallConv,
1452 const X86Subtarget &Subtarget) {
1453 assert(Subtarget.is64Bit());
1454 if (Subtarget.isCallingConvWin64(CC: CallConv)) {
1455 // The XMM registers which might contain var arg parameters are shadowed
1456 // in their paired GPR. So we only need to save the GPR to their home
1457 // slots.
1458 // TODO: __vectorcall will change this.
1459 return {};
1460 }
1461
1462 bool isSoftFloat = Subtarget.useSoftFloat();
1463 if (isSoftFloat || !Subtarget.hasSSE1())
1464 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
1465 // registers.
1466 return {};
1467
1468 static const MCPhysReg XMMArgRegs64Bit[] = {
1469 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1470 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1471 };
1472 return XMMArgRegs64Bit;
1473}
1474
1475#ifndef NDEBUG
1476static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
1477 return llvm::is_sorted(
1478 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
1479 return A.getValNo() < B.getValNo();
1480 });
1481}
1482#endif
1483
1484namespace {
1485/// This is a helper class for lowering variable arguments parameters.
1486class VarArgsLoweringHelper {
1487public:
1488 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
1489 SelectionDAG &DAG, const X86Subtarget &Subtarget,
1490 CallingConv::ID CallConv, CCState &CCInfo)
1491 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
1492 TheMachineFunction(DAG.getMachineFunction()),
1493 TheFunction(TheMachineFunction.getFunction()),
1494 FrameInfo(TheMachineFunction.getFrameInfo()),
1495 FrameLowering(*Subtarget.getFrameLowering()),
1496 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
1497 CCInfo(CCInfo) {}
1498
1499 // Lower variable arguments parameters.
1500 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
1501
1502private:
1503 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
1504
1505 void forwardMustTailParameters(SDValue &Chain);
1506
1507 bool is64Bit() const { return Subtarget.is64Bit(); }
1508 bool isWin64() const { return Subtarget.isCallingConvWin64(CC: CallConv); }
1509
1510 X86MachineFunctionInfo *FuncInfo;
1511 const SDLoc &DL;
1512 SelectionDAG &DAG;
1513 const X86Subtarget &Subtarget;
1514 MachineFunction &TheMachineFunction;
1515 const Function &TheFunction;
1516 MachineFrameInfo &FrameInfo;
1517 const TargetFrameLowering &FrameLowering;
1518 const TargetLowering &TargLowering;
1519 CallingConv::ID CallConv;
1520 CCState &CCInfo;
1521};
1522} // namespace
1523
1524void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
1525 SDValue &Chain, unsigned StackSize) {
1526 // If the function takes variable number of arguments, make a frame index for
1527 // the start of the first vararg value... for expansion of llvm.va_start. We
1528 // can skip this if there are no va_start calls.
1529 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
1530 CallConv != CallingConv::X86_ThisCall)) {
1531 FuncInfo->setVarArgsFrameIndex(
1532 FrameInfo.CreateFixedObject(Size: 1, SPOffset: StackSize, IsImmutable: true));
1533 }
1534
1535 // 64-bit calling conventions support varargs and register parameters, so we
1536 // have to do extra work to spill them in the prologue.
1537 if (is64Bit()) {
1538 // Find the first unallocated argument registers.
1539 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
1540 ArrayRef<MCPhysReg> ArgXMMs =
1541 get64BitArgumentXMMs(MF&: TheMachineFunction, CallConv, Subtarget);
1542 unsigned NumIntRegs = CCInfo.getFirstUnallocated(Regs: ArgGPRs);
1543 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(Regs: ArgXMMs);
1544
1545 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
1546 "SSE register cannot be used when SSE is disabled!");
1547
1548 if (isWin64()) {
1549 // Get to the caller-allocated home save location. Add 8 to account
1550 // for the return address.
1551 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
1552 FuncInfo->setRegSaveFrameIndex(
1553 FrameInfo.CreateFixedObject(Size: 1, SPOffset: NumIntRegs * 8 + HomeOffset, IsImmutable: false));
1554 // Fixup to set vararg frame on shadow area (4 x i64).
1555 if (NumIntRegs < 4)
1556 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
1557 } else {
1558 // For X86-64, if there are vararg parameters that are passed via
1559 // registers, then we must store them to their spots on the stack so
1560 // they may be loaded by dereferencing the result of va_next.
1561 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
1562 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
1563 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
1564 Size: ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Alignment: Align(16), isSpillSlot: false));
1565 }
1566
1567 SmallVector<SDValue, 6>
1568 LiveGPRs; // list of SDValue for GPR registers keeping live input value
1569 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
1570 // keeping live input value
1571 SDValue ALVal; // if applicable keeps SDValue for %al register
1572
1573 // Gather all the live in physical registers.
1574 for (MCPhysReg Reg : ArgGPRs.slice(N: NumIntRegs)) {
1575 Register GPR = TheMachineFunction.addLiveIn(PReg: Reg, RC: &X86::GR64RegClass);
1576 LiveGPRs.push_back(Elt: DAG.getCopyFromReg(Chain, dl: DL, Reg: GPR, VT: MVT::i64));
1577 }
1578 const auto &AvailableXmms = ArgXMMs.slice(N: NumXMMRegs);
1579 if (!AvailableXmms.empty()) {
1580 Register AL = TheMachineFunction.addLiveIn(PReg: X86::AL, RC: &X86::GR8RegClass);
1581 ALVal = DAG.getCopyFromReg(Chain, dl: DL, Reg: AL, VT: MVT::i8);
1582 for (MCPhysReg Reg : AvailableXmms) {
1583 // FastRegisterAllocator spills virtual registers at basic
1584 // block boundary. That leads to usages of xmm registers
1585 // outside of check for %al. Pass physical registers to
1586 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
1587 TheMachineFunction.getRegInfo().addLiveIn(Reg);
1588 LiveXMMRegs.push_back(Elt: DAG.getRegister(Reg, VT: MVT::v4f32));
1589 }
1590 }
1591
1592 // Store the integer parameter registers.
1593 SmallVector<SDValue, 8> MemOps;
1594 SDValue RSFIN =
1595 DAG.getFrameIndex(FI: FuncInfo->getRegSaveFrameIndex(),
1596 VT: TargLowering.getPointerTy(DL: DAG.getDataLayout()));
1597 unsigned Offset = FuncInfo->getVarArgsGPOffset();
1598 for (SDValue Val : LiveGPRs) {
1599 SDValue FIN = DAG.getNode(Opcode: ISD::ADD, DL,
1600 VT: TargLowering.getPointerTy(DL: DAG.getDataLayout()),
1601 N1: RSFIN, N2: DAG.getIntPtrConstant(Val: Offset, DL));
1602 SDValue Store =
1603 DAG.getStore(Chain: Val.getValue(R: 1), dl: DL, Val, Ptr: FIN,
1604 PtrInfo: MachinePointerInfo::getFixedStack(
1605 MF&: DAG.getMachineFunction(),
1606 FI: FuncInfo->getRegSaveFrameIndex(), Offset));
1607 MemOps.push_back(Elt: Store);
1608 Offset += 8;
1609 }
1610
1611 // Now store the XMM (fp + vector) parameter registers.
1612 if (!LiveXMMRegs.empty()) {
1613 SmallVector<SDValue, 12> SaveXMMOps;
1614 SaveXMMOps.push_back(Elt: Chain);
1615 SaveXMMOps.push_back(Elt: ALVal);
1616 SaveXMMOps.push_back(Elt: RSFIN);
1617 SaveXMMOps.push_back(
1618 Elt: DAG.getTargetConstant(Val: FuncInfo->getVarArgsFPOffset(), DL, VT: MVT::i32));
1619 llvm::append_range(C&: SaveXMMOps, R&: LiveXMMRegs);
1620 MachineMemOperand *StoreMMO =
1621 DAG.getMachineFunction().getMachineMemOperand(
1622 PtrInfo: MachinePointerInfo::getFixedStack(
1623 MF&: DAG.getMachineFunction(), FI: FuncInfo->getRegSaveFrameIndex(),
1624 Offset),
1625 F: MachineMemOperand::MOStore, Size: 128, BaseAlignment: Align(16));
1626 MemOps.push_back(Elt: DAG.getMemIntrinsicNode(Opcode: X86ISD::VASTART_SAVE_XMM_REGS,
1627 dl: DL, VTList: DAG.getVTList(VT: MVT::Other),
1628 Ops: SaveXMMOps, MemVT: MVT::i8, MMO: StoreMMO));
1629 }
1630
1631 if (!MemOps.empty())
1632 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOps);
1633 }
1634}
1635
1636void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
1637 // Find the largest legal vector type.
1638 MVT VecVT = MVT::Other;
1639 // FIXME: Only some x86_32 calling conventions support AVX512.
1640 if (Subtarget.useAVX512Regs() &&
1641 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
1642 CallConv == CallingConv::Intel_OCL_BI)))
1643 VecVT = MVT::v16f32;
1644 else if (Subtarget.hasAVX())
1645 VecVT = MVT::v8f32;
1646 else if (Subtarget.hasSSE2())
1647 VecVT = MVT::v4f32;
1648
1649 // We forward some GPRs and some vector types.
1650 SmallVector<MVT, 2> RegParmTypes;
1651 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
1652 RegParmTypes.push_back(Elt: IntVT);
1653 if (VecVT != MVT::Other)
1654 RegParmTypes.push_back(Elt: VecVT);
1655
1656 // Compute the set of forwarded registers. The rest are scratch.
1657 SmallVectorImpl<ForwardedRegister> &Forwards =
1658 FuncInfo->getForwardedMustTailRegParms();
1659 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, Fn: CC_X86);
1660
1661 // Forward AL for SysV x86_64 targets, since it is used for varargs.
1662 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(Reg: X86::AL)) {
1663 Register ALVReg = TheMachineFunction.addLiveIn(PReg: X86::AL, RC: &X86::GR8RegClass);
1664 Forwards.push_back(Elt: ForwardedRegister(ALVReg, X86::AL, MVT::i8));
1665 }
1666
1667 // Copy all forwards from physical to virtual registers.
1668 for (ForwardedRegister &FR : Forwards) {
1669 // FIXME: Can we use a less constrained schedule?
1670 SDValue RegVal = DAG.getCopyFromReg(Chain, dl: DL, Reg: FR.VReg, VT: FR.VT);
1671 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
1672 RegClass: TargLowering.getRegClassFor(VT: FR.VT));
1673 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: FR.VReg, N: RegVal);
1674 }
1675}
1676
1677void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
1678 unsigned StackSize) {
1679 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
1680 // If necessary, it would be set into the correct value later.
1681 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
1682 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1683
1684 if (FrameInfo.hasVAStart())
1685 createVarArgAreaAndStoreRegisters(Chain, StackSize);
1686
1687 if (FrameInfo.hasMustTailInVarArgFunc())
1688 forwardMustTailParameters(Chain);
1689}
1690
1691SDValue X86TargetLowering::LowerFormalArguments(
1692 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1693 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1694 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1695 MachineFunction &MF = DAG.getMachineFunction();
1696 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
1697
1698 const Function &F = MF.getFunction();
1699 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
1700 F.getName() == "main")
1701 FuncInfo->setForceFramePointer(true);
1702
1703 MachineFrameInfo &MFI = MF.getFrameInfo();
1704 bool Is64Bit = Subtarget.is64Bit();
1705 bool IsWin64 = Subtarget.isCallingConvWin64(CC: CallConv);
1706
1707 assert(
1708 !(IsVarArg && canGuaranteeTCO(CallConv)) &&
1709 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
1710
1711 // Assign locations to all of the incoming arguments.
1712 SmallVector<CCValAssign, 16> ArgLocs;
1713 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1714
1715 // Allocate shadow area for Win64.
1716 if (IsWin64)
1717 CCInfo.AllocateStack(Size: 32, Alignment: Align(8));
1718
1719 CCInfo.AnalyzeArguments(Ins, Fn: CC_X86);
1720
1721 // In vectorcall calling convention a second pass is required for the HVA
1722 // types.
1723 if (CallingConv::X86_VectorCall == CallConv) {
1724 CCInfo.AnalyzeArgumentsSecondPass(Args: Ins, Fn: CC_X86);
1725 }
1726
1727 // The next loop assumes that the locations are in the same order of the
1728 // input arguments.
1729 assert(isSortedByValueNo(ArgLocs) &&
1730 "Argument Location list must be sorted before lowering");
1731
1732 SDValue ArgValue;
1733 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
1734 ++I, ++InsIndex) {
1735 assert(InsIndex < Ins.size() && "Invalid Ins index");
1736 CCValAssign &VA = ArgLocs[I];
1737
1738 if (VA.isRegLoc()) {
1739 EVT RegVT = VA.getLocVT();
1740 if (VA.needsCustom()) {
1741 assert(
1742 VA.getValVT() == MVT::v64i1 &&
1743 "Currently the only custom case is when we split v64i1 to 2 regs");
1744
1745 // v64i1 values, in regcall calling convention, that are
1746 // compiled to 32 bit arch, are split up into two registers.
1747 ArgValue =
1748 getv64i1Argument(VA, NextVA&: ArgLocs[++I], Root&: Chain, DAG, DL: dl, Subtarget);
1749 } else {
1750 const TargetRegisterClass *RC;
1751 if (RegVT == MVT::i8)
1752 RC = &X86::GR8RegClass;
1753 else if (RegVT == MVT::i16)
1754 RC = &X86::GR16RegClass;
1755 else if (RegVT == MVT::i32)
1756 RC = &X86::GR32RegClass;
1757 else if (Is64Bit && RegVT == MVT::i64)
1758 RC = &X86::GR64RegClass;
1759 else if (RegVT == MVT::f16)
1760 RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
1761 else if (RegVT == MVT::f32)
1762 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
1763 else if (RegVT == MVT::f64)
1764 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
1765 else if (RegVT == MVT::f80)
1766 RC = &X86::RFP80RegClass;
1767 else if (RegVT == MVT::f128)
1768 RC = &X86::VR128RegClass;
1769 else if (RegVT.is512BitVector())
1770 RC = &X86::VR512RegClass;
1771 else if (RegVT.is256BitVector())
1772 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
1773 else if (RegVT.is128BitVector())
1774 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
1775 else if (RegVT == MVT::x86mmx)
1776 RC = &X86::VR64RegClass;
1777 else if (RegVT == MVT::v1i1)
1778 RC = &X86::VK1RegClass;
1779 else if (RegVT == MVT::v8i1)
1780 RC = &X86::VK8RegClass;
1781 else if (RegVT == MVT::v16i1)
1782 RC = &X86::VK16RegClass;
1783 else if (RegVT == MVT::v32i1)
1784 RC = &X86::VK32RegClass;
1785 else if (RegVT == MVT::v64i1)
1786 RC = &X86::VK64RegClass;
1787 else
1788 llvm_unreachable("Unknown argument type!");
1789
1790 Register Reg = MF.addLiveIn(PReg: VA.getLocReg(), RC);
1791 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, VT: RegVT);
1792 }
1793
1794 // If this is an 8 or 16-bit value, it is really passed promoted to 32
1795 // bits. Insert an assert[sz]ext to capture this, then truncate to the
1796 // right size.
1797 if (VA.getLocInfo() == CCValAssign::SExt)
1798 ArgValue = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: RegVT, N1: ArgValue,
1799 N2: DAG.getValueType(VA.getValVT()));
1800 else if (VA.getLocInfo() == CCValAssign::ZExt)
1801 ArgValue = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: RegVT, N1: ArgValue,
1802 N2: DAG.getValueType(VA.getValVT()));
1803 else if (VA.getLocInfo() == CCValAssign::BCvt)
1804 ArgValue = DAG.getBitcast(VT: VA.getValVT(), V: ArgValue);
1805
1806 if (VA.isExtInLoc()) {
1807 // Handle MMX values passed in XMM regs.
1808 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
1809 ArgValue = DAG.getNode(Opcode: X86ISD::MOVDQ2Q, DL: dl, VT: VA.getValVT(), Operand: ArgValue);
1810 else if (VA.getValVT().isVector() &&
1811 VA.getValVT().getScalarType() == MVT::i1 &&
1812 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1813 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1814 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1815 ArgValue = lowerRegToMasks(ValArg: ArgValue, ValVT: VA.getValVT(), ValLoc: RegVT, DL: dl, DAG);
1816 } else
1817 ArgValue = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: ArgValue);
1818 }
1819 } else {
1820 assert(VA.isMemLoc());
1821 ArgValue =
1822 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i: InsIndex);
1823 }
1824
1825 // If value is passed via pointer - do a load.
1826 if (VA.getLocInfo() == CCValAssign::Indirect &&
1827 !(Ins[I].Flags.isByVal() && VA.isRegLoc())) {
1828 ArgValue =
1829 DAG.getLoad(VT: VA.getValVT(), dl, Chain, Ptr: ArgValue, PtrInfo: MachinePointerInfo());
1830 }
1831
1832 InVals.push_back(Elt: ArgValue);
1833 }
1834
1835 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1836 if (Ins[I].Flags.isSwiftAsync()) {
1837 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
1838 if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF))
1839 X86FI->setHasSwiftAsyncContext(true);
1840 else {
1841 int PtrSize = Subtarget.is64Bit() ? 8 : 4;
1842 int FI =
1843 MF.getFrameInfo().CreateStackObject(Size: PtrSize, Alignment: Align(PtrSize), isSpillSlot: false);
1844 X86FI->setSwiftAsyncContextFrameIdx(FI);
1845 SDValue St = DAG.getStore(
1846 Chain: DAG.getEntryNode(), dl, Val: InVals[I],
1847 Ptr: DAG.getFrameIndex(FI, VT: PtrSize == 8 ? MVT::i64 : MVT::i32),
1848 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI));
1849 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: St, N2: Chain);
1850 }
1851 }
1852
1853 // Swift calling convention does not require we copy the sret argument
1854 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
1855 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
1856 continue;
1857
1858 // All x86 ABIs require that for returning structs by value we copy the
1859 // sret argument into %rax/%eax (depending on ABI) for the return. Save
1860 // the argument into a virtual register so that we can access it from the
1861 // return points.
1862 if (Ins[I].Flags.isSRet()) {
1863 assert(!FuncInfo->getSRetReturnReg() &&
1864 "SRet return has already been set");
1865 MVT PtrTy = getPointerTy(DL: DAG.getDataLayout());
1866 Register Reg =
1867 MF.getRegInfo().createVirtualRegister(RegClass: getRegClassFor(VT: PtrTy));
1868 FuncInfo->setSRetReturnReg(Reg);
1869 SDValue Copy = DAG.getCopyToReg(Chain: DAG.getEntryNode(), dl, Reg, N: InVals[I]);
1870 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Copy, N2: Chain);
1871 break;
1872 }
1873 }
1874
1875 unsigned StackSize = CCInfo.getStackSize();
1876 // Align stack specially for tail calls.
1877 if (shouldGuaranteeTCO(CC: CallConv,
1878 GuaranteedTailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt))
1879 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
1880
1881 if (IsVarArg)
1882 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
1883 .lowerVarArgsParameters(Chain, StackSize);
1884
1885 // Some CCs need callee pop.
1886 if (X86::isCalleePop(CallingConv: CallConv, is64Bit: Is64Bit, IsVarArg,
1887 GuaranteeTCO: MF.getTarget().Options.GuaranteedTailCallOpt)) {
1888 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
1889 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
1890 // X86 interrupts must pop the error code (and the alignment padding) if
1891 // present.
1892 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
1893 } else {
1894 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
1895 // If this is an sret function, the return should pop the hidden pointer.
1896 if (!canGuaranteeTCO(CC: CallConv) && hasCalleePopSRet(Args: Ins, Subtarget))
1897 FuncInfo->setBytesToPopOnReturn(4);
1898 }
1899
1900 if (!Is64Bit) {
1901 // RegSaveFrameIndex is X86-64 only.
1902 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1903 }
1904
1905 FuncInfo->setArgumentStackSize(StackSize);
1906
1907 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
1908 EHPersonality Personality = classifyEHPersonality(Pers: F.getPersonalityFn());
1909 if (Personality == EHPersonality::CoreCLR) {
1910 assert(Is64Bit);
1911 // TODO: Add a mechanism to frame lowering that will allow us to indicate
1912 // that we'd prefer this slot be allocated towards the bottom of the frame
1913 // (i.e. near the stack pointer after allocating the frame). Every
1914 // funclet needs a copy of this slot in its (mostly empty) frame, and the
1915 // offset from the bottom of this and each funclet's frame must be the
1916 // same, so the size of funclets' (mostly empty) frames is dictated by
1917 // how far this slot is from the bottom (since they allocate just enough
1918 // space to accommodate holding this slot at the correct offset).
1919 int PSPSymFI = MFI.CreateStackObject(Size: 8, Alignment: Align(8), /*isSpillSlot=*/false);
1920 EHInfo->PSPSymFrameIdx = PSPSymFI;
1921 }
1922 }
1923
1924 if (shouldDisableArgRegFromCSR(CC: CallConv) ||
1925 F.hasFnAttribute(Kind: "no_caller_saved_registers")) {
1926 MachineRegisterInfo &MRI = MF.getRegInfo();
1927 for (std::pair<MCRegister, Register> Pair : MRI.liveins())
1928 MRI.disableCalleeSavedRegister(Reg: Pair.first);
1929 }
1930
1931 if (CallingConv::PreserveNone == CallConv)
1932 for (const ISD::InputArg &In : Ins) {
1933 if (In.Flags.isSwiftSelf() || In.Flags.isSwiftAsync() ||
1934 In.Flags.isSwiftError()) {
1935 errorUnsupported(DAG, dl,
1936 Msg: "Swift attributes can't be used with preserve_none");
1937 break;
1938 }
1939 }
1940
1941 return Chain;
1942}
1943
1944SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1945 SDValue Arg, const SDLoc &dl,
1946 SelectionDAG &DAG,
1947 const CCValAssign &VA,
1948 ISD::ArgFlagsTy Flags,
1949 bool isByVal) const {
1950 unsigned LocMemOffset = VA.getLocMemOffset();
1951 SDValue PtrOff = DAG.getIntPtrConstant(Val: LocMemOffset, DL: dl);
1952 PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()),
1953 N1: StackPtr, N2: PtrOff);
1954 if (isByVal)
1955 return CreateCopyOfByValArgument(Src: Arg, Dst: PtrOff, Chain, Flags, DAG, dl);
1956
1957 MaybeAlign Alignment;
1958 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1959 Arg.getSimpleValueType() != MVT::f80)
1960 Alignment = MaybeAlign(4);
1961 return DAG.getStore(
1962 Chain, dl, Val: Arg, Ptr: PtrOff,
1963 PtrInfo: MachinePointerInfo::getStack(MF&: DAG.getMachineFunction(), Offset: LocMemOffset),
1964 Alignment);
1965}
1966
1967/// Emit a load of return address if tail call
1968/// optimization is performed and it is required.
1969SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
1970 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
1971 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
1972 // Adjust the Return address stack slot.
1973 EVT VT = getPointerTy(DL: DAG.getDataLayout());
1974 OutRetAddr = getReturnAddressFrameIndex(DAG);
1975
1976 // Load the "old" Return address.
1977 OutRetAddr = DAG.getLoad(VT, dl, Chain, Ptr: OutRetAddr, PtrInfo: MachinePointerInfo());
1978 return SDValue(OutRetAddr.getNode(), 1);
1979}
1980
1981/// Emit a store of the return address if tail call
1982/// optimization is performed and it is required (FPDiff!=0).
1983static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
1984 SDValue Chain, SDValue RetAddrFrIdx,
1985 EVT PtrVT, unsigned SlotSize,
1986 int FPDiff, const SDLoc &dl) {
1987 // Store the return address to the appropriate stack slot.
1988 if (!FPDiff) return Chain;
1989 // Calculate the new stack slot for the return address.
1990 int NewReturnAddrFI =
1991 MF.getFrameInfo().CreateFixedObject(Size: SlotSize, SPOffset: (int64_t)FPDiff - SlotSize,
1992 IsImmutable: false);
1993 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(FI: NewReturnAddrFI, VT: PtrVT);
1994 Chain = DAG.getStore(Chain, dl, Val: RetAddrFrIdx, Ptr: NewRetAddrFrIdx,
1995 PtrInfo: MachinePointerInfo::getFixedStack(
1996 MF&: DAG.getMachineFunction(), FI: NewReturnAddrFI));
1997 return Chain;
1998}
1999
2000/// Returns a vector_shuffle mask for an movs{s|d}, movd
2001/// operation of specified width.
2002SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
2003 SDValue V1, SDValue V2) const {
2004 unsigned NumElems = VT.getVectorNumElements();
2005 SmallVector<int, 8> Mask;
2006 Mask.push_back(Elt: NumElems);
2007 for (unsigned i = 1; i != NumElems; ++i)
2008 Mask.push_back(Elt: i);
2009 return DAG.getVectorShuffle(VT, dl, N1: V1, N2: V2, Mask);
2010}
2011
2012SDValue
2013X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2014 SmallVectorImpl<SDValue> &InVals) const {
2015 SelectionDAG &DAG = CLI.DAG;
2016 SDLoc &dl = CLI.DL;
2017 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2018 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2019 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2020 SDValue Chain = CLI.Chain;
2021 SDValue Callee = CLI.Callee;
2022 CallingConv::ID CallConv = CLI.CallConv;
2023 bool &isTailCall = CLI.IsTailCall;
2024 bool isVarArg = CLI.IsVarArg;
2025 const auto *CB = CLI.CB;
2026
2027 MachineFunction &MF = DAG.getMachineFunction();
2028 bool Is64Bit = Subtarget.is64Bit();
2029 bool IsWin64 = Subtarget.isCallingConvWin64(CC: CallConv);
2030 bool IsSibcall = false;
2031 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
2032 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
2033 bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Args: Outs, Subtarget);
2034 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
2035 bool HasNCSR = (CB && isa<CallInst>(Val: CB) &&
2036 CB->hasFnAttr(Kind: "no_caller_saved_registers"));
2037 bool IsIndirectCall = (CB && isa<CallInst>(Val: CB) && CB->isIndirectCall());
2038 bool IsCFICall = IsIndirectCall && CLI.CFIType;
2039 const Module *M = MF.getFunction().getParent();
2040
2041 // If the indirect call target has the nocf_check attribute, the call needs
2042 // the NOTRACK prefix. For simplicity just disable tail calls as there are
2043 // so many variants.
2044 bool IsNoTrackIndirectCall = IsIndirectCall && CB->doesNoCfCheck() &&
2045 M->getModuleFlag(Key: "cf-protection-branch");
2046 if (IsNoTrackIndirectCall)
2047 isTailCall = false;
2048
2049 MachineFunction::CallSiteInfo CSInfo;
2050 if (CallConv == CallingConv::X86_INTR)
2051 report_fatal_error(reason: "X86 interrupts may not be called directly");
2052
2053 if (IsIndirectCall && !IsWin64 &&
2054 M->getModuleFlag(Key: "import-call-optimization"))
2055 errorUnsupported(DAG, dl,
2056 Msg: "Indirect calls must have a normal calling convention if "
2057 "Import Call Optimization is enabled");
2058
2059 // Analyze operands of the call, assigning locations to each operand.
2060 SmallVector<CCValAssign, 16> ArgLocs;
2061 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2062
2063 // Allocate shadow area for Win64.
2064 if (IsWin64)
2065 CCInfo.AllocateStack(Size: 32, Alignment: Align(8));
2066
2067 CCInfo.AnalyzeArguments(Outs, Fn: CC_X86);
2068
2069 // In vectorcall calling convention a second pass is required for the HVA
2070 // types.
2071 if (CallingConv::X86_VectorCall == CallConv) {
2072 CCInfo.AnalyzeArgumentsSecondPass(Args: Outs, Fn: CC_X86);
2073 }
2074
2075 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
2076 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
2077 // If we are using a GOT, disable tail calls to external symbols with
2078 // default visibility. Tail calling such a symbol requires using a GOT
2079 // relocation, which forces early binding of the symbol. This breaks code
2080 // that require lazy function symbol resolution. Using musttail or
2081 // GuaranteedTailCallOpt will override this.
2082 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee);
2083 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
2084 G->getGlobal()->hasDefaultVisibility()))
2085 isTailCall = false;
2086 }
2087
2088 if (isTailCall && !IsMustTail) {
2089 // Check if it's really possible to do a tail call.
2090 isTailCall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs,
2091 IsCalleePopSRet);
2092
2093 // Sibcalls are automatically detected tailcalls which do not require
2094 // ABI changes.
2095 if (!IsGuaranteeTCO && isTailCall)
2096 IsSibcall = true;
2097
2098 if (isTailCall)
2099 ++NumTailCalls;
2100 }
2101
2102 if (IsMustTail && !isTailCall)
2103 report_fatal_error(reason: "failed to perform tail call elimination on a call "
2104 "site marked musttail");
2105
2106 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2107 "Var args not supported with calling convention fastcc, ghc or hipe");
2108
2109 // Get a count of how many bytes are to be pushed on the stack.
2110 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
2111 if (IsSibcall)
2112 // This is a sibcall. The memory operands are available in caller's
2113 // own caller's stack.
2114 NumBytes = 0;
2115 else if (IsGuaranteeTCO && canGuaranteeTCO(CC: CallConv))
2116 NumBytes = GetAlignedArgumentStackSize(StackSize: NumBytes, DAG);
2117
2118 int FPDiff = 0;
2119 if (isTailCall &&
2120 shouldGuaranteeTCO(CC: CallConv,
2121 GuaranteedTailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt)) {
2122 // Lower arguments at fp - stackoffset + fpdiff.
2123 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
2124
2125 FPDiff = NumBytesCallerPushed - NumBytes;
2126
2127 // Set the delta of movement of the returnaddr stackslot.
2128 // But only set if delta is greater than previous delta.
2129 if (FPDiff < X86Info->getTCReturnAddrDelta())
2130 X86Info->setTCReturnAddrDelta(FPDiff);
2131 }
2132
2133 unsigned NumBytesToPush = NumBytes;
2134 unsigned NumBytesToPop = NumBytes;
2135
2136 // If we have an inalloca argument, all stack space has already been allocated
2137 // for us and be right at the top of the stack. We don't support multiple
2138 // arguments passed in memory when using inalloca.
2139 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
2140 NumBytesToPush = 0;
2141 if (!ArgLocs.back().isMemLoc())
2142 report_fatal_error(reason: "cannot use inalloca attribute on a register "
2143 "parameter");
2144 if (ArgLocs.back().getLocMemOffset() != 0)
2145 report_fatal_error(reason: "any parameter with the inalloca attribute must be "
2146 "the only memory argument");
2147 } else if (CLI.IsPreallocated) {
2148 assert(ArgLocs.back().isMemLoc() &&
2149 "cannot use preallocated attribute on a register "
2150 "parameter");
2151 SmallVector<size_t, 4> PreallocatedOffsets;
2152 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
2153 if (CLI.CB->paramHasAttr(ArgNo: i, Kind: Attribute::Preallocated)) {
2154 PreallocatedOffsets.push_back(Elt: ArgLocs[i].getLocMemOffset());
2155 }
2156 }
2157 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
2158 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CS: CLI.CB);
2159 MFI->setPreallocatedStackSize(Id: PreallocatedId, StackSize: NumBytes);
2160 MFI->setPreallocatedArgOffsets(Id: PreallocatedId, AO: PreallocatedOffsets);
2161 NumBytesToPush = 0;
2162 }
2163
2164 if (!IsSibcall && !IsMustTail)
2165 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytesToPush,
2166 OutSize: NumBytes - NumBytesToPush, DL: dl);
2167
2168 SDValue RetAddrFrIdx;
2169 // Load return address for tail calls.
2170 if (isTailCall && FPDiff)
2171 Chain = EmitTailCallLoadRetAddr(DAG, OutRetAddr&: RetAddrFrIdx, Chain, IsTailCall: isTailCall,
2172 Is64Bit, FPDiff, dl);
2173
2174 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
2175 SmallVector<SDValue, 8> MemOpChains;
2176 SDValue StackPtr;
2177
2178 // The next loop assumes that the locations are in the same order of the
2179 // input arguments.
2180 assert(isSortedByValueNo(ArgLocs) &&
2181 "Argument Location list must be sorted before lowering");
2182
2183 // Walk the register/memloc assignments, inserting copies/loads. In the case
2184 // of tail call optimization arguments are handle later.
2185 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2186 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
2187 ++I, ++OutIndex) {
2188 assert(OutIndex < Outs.size() && "Invalid Out index");
2189 // Skip inalloca/preallocated arguments, they have already been written.
2190 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
2191 if (Flags.isInAlloca() || Flags.isPreallocated())
2192 continue;
2193
2194 CCValAssign &VA = ArgLocs[I];
2195 EVT RegVT = VA.getLocVT();
2196 SDValue Arg = OutVals[OutIndex];
2197 bool isByVal = Flags.isByVal();
2198
2199 // Promote the value if needed.
2200 switch (VA.getLocInfo()) {
2201 default: llvm_unreachable("Unknown loc info!");
2202 case CCValAssign::Full: break;
2203 case CCValAssign::SExt:
2204 Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: RegVT, Operand: Arg);
2205 break;
2206 case CCValAssign::ZExt:
2207 Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: RegVT, Operand: Arg);
2208 break;
2209 case CCValAssign::AExt:
2210 if (Arg.getValueType().isVector() &&
2211 Arg.getValueType().getVectorElementType() == MVT::i1)
2212 Arg = lowerMasksToReg(ValArg: Arg, ValLoc: RegVT, DL: dl, DAG);
2213 else if (RegVT.is128BitVector()) {
2214 // Special case: passing MMX values in XMM registers.
2215 Arg = DAG.getBitcast(VT: MVT::i64, V: Arg);
2216 Arg = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL: dl, VT: MVT::v2i64, Operand: Arg);
2217 Arg = getMOVL(DAG, dl, VT: MVT::v2i64, V1: DAG.getUNDEF(VT: MVT::v2i64), V2: Arg);
2218 } else
2219 Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: RegVT, Operand: Arg);
2220 break;
2221 case CCValAssign::BCvt:
2222 Arg = DAG.getBitcast(VT: RegVT, V: Arg);
2223 break;
2224 case CCValAssign::Indirect: {
2225 if (isByVal) {
2226 // Memcpy the argument to a temporary stack slot to prevent
2227 // the caller from seeing any modifications the callee may make
2228 // as guaranteed by the `byval` attribute.
2229 int FrameIdx = MF.getFrameInfo().CreateStackObject(
2230 Size: Flags.getByValSize(),
2231 Alignment: std::max(a: Align(16), b: Flags.getNonZeroByValAlign()), isSpillSlot: false);
2232 SDValue StackSlot =
2233 DAG.getFrameIndex(FI: FrameIdx, VT: getPointerTy(DL: DAG.getDataLayout()));
2234 Chain =
2235 CreateCopyOfByValArgument(Src: Arg, Dst: StackSlot, Chain, Flags, DAG, dl);
2236 // From now on treat this as a regular pointer
2237 Arg = StackSlot;
2238 isByVal = false;
2239 } else {
2240 // Store the argument.
2241 SDValue SpillSlot = DAG.CreateStackTemporary(VT: VA.getValVT());
2242 int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex();
2243 Chain = DAG.getStore(
2244 Chain, dl, Val: Arg, Ptr: SpillSlot,
2245 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI));
2246 Arg = SpillSlot;
2247 }
2248 break;
2249 }
2250 }
2251
2252 if (VA.needsCustom()) {
2253 assert(VA.getValVT() == MVT::v64i1 &&
2254 "Currently the only custom case is when we split v64i1 to 2 regs");
2255 // Split v64i1 value into two registers
2256 Passv64i1ArgInRegs(DL: dl, DAG, Arg, RegsToPass, VA, NextVA&: ArgLocs[++I], Subtarget);
2257 } else if (VA.isRegLoc()) {
2258 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
2259 const TargetOptions &Options = DAG.getTarget().Options;
2260 if (Options.EmitCallSiteInfo)
2261 CSInfo.ArgRegPairs.emplace_back(Args: VA.getLocReg(), Args&: I);
2262 if (isVarArg && IsWin64) {
2263 // Win64 ABI requires argument XMM reg to be copied to the corresponding
2264 // shadow reg if callee is a varargs function.
2265 Register ShadowReg;
2266 switch (VA.getLocReg()) {
2267 case X86::XMM0: ShadowReg = X86::RCX; break;
2268 case X86::XMM1: ShadowReg = X86::RDX; break;
2269 case X86::XMM2: ShadowReg = X86::R8; break;
2270 case X86::XMM3: ShadowReg = X86::R9; break;
2271 }
2272 if (ShadowReg)
2273 RegsToPass.push_back(Elt: std::make_pair(x&: ShadowReg, y&: Arg));
2274 }
2275 } else if (!IsSibcall && (!isTailCall || isByVal)) {
2276 assert(VA.isMemLoc());
2277 if (!StackPtr.getNode())
2278 StackPtr = DAG.getCopyFromReg(Chain, dl, Reg: RegInfo->getStackRegister(),
2279 VT: getPointerTy(DL: DAG.getDataLayout()));
2280 MemOpChains.push_back(Elt: LowerMemOpCallTo(Chain, StackPtr, Arg,
2281 dl, DAG, VA, Flags, isByVal));
2282 }
2283 }
2284
2285 if (!MemOpChains.empty())
2286 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains);
2287
2288 if (Subtarget.isPICStyleGOT()) {
2289 // ELF / PIC requires GOT in the EBX register before function calls via PLT
2290 // GOT pointer (except regcall).
2291 if (!isTailCall) {
2292 // Indirect call with RegCall calling convertion may use up all the
2293 // general registers, so it is not suitable to bind EBX reister for
2294 // GOT address, just let register allocator handle it.
2295 if (CallConv != CallingConv::X86_RegCall)
2296 RegsToPass.push_back(Elt: std::make_pair(
2297 x: Register(X86::EBX), y: DAG.getNode(Opcode: X86ISD::GlobalBaseReg, DL: SDLoc(),
2298 VT: getPointerTy(DL: DAG.getDataLayout()))));
2299 } else {
2300 // If we are tail calling and generating PIC/GOT style code load the
2301 // address of the callee into ECX. The value in ecx is used as target of
2302 // the tail jump. This is done to circumvent the ebx/callee-saved problem
2303 // for tail calls on PIC/GOT architectures. Normally we would just put the
2304 // address of GOT into ebx and then call target@PLT. But for tail calls
2305 // ebx would be restored (since ebx is callee saved) before jumping to the
2306 // target@PLT.
2307
2308 // Note: The actual moving to ECX is done further down.
2309 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee);
2310 if (G && !G->getGlobal()->hasLocalLinkage() &&
2311 G->getGlobal()->hasDefaultVisibility())
2312 Callee = LowerGlobalAddress(Op: Callee, DAG);
2313 else if (isa<ExternalSymbolSDNode>(Val: Callee))
2314 Callee = LowerExternalSymbol(Op: Callee, DAG);
2315 }
2316 }
2317
2318 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
2319 (Subtarget.hasSSE1() || !M->getModuleFlag(Key: "SkipRaxSetup"))) {
2320 // From AMD64 ABI document:
2321 // For calls that may call functions that use varargs or stdargs
2322 // (prototype-less calls or calls to functions containing ellipsis (...) in
2323 // the declaration) %al is used as hidden argument to specify the number
2324 // of SSE registers used. The contents of %al do not need to match exactly
2325 // the number of registers, but must be an ubound on the number of SSE
2326 // registers used and is in the range 0 - 8 inclusive.
2327
2328 // Count the number of XMM registers allocated.
2329 static const MCPhysReg XMMArgRegs[] = {
2330 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2331 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2332 };
2333 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(Regs: XMMArgRegs);
2334 assert((Subtarget.hasSSE1() || !NumXMMRegs)
2335 && "SSE registers cannot be used when SSE is disabled");
2336 RegsToPass.push_back(Elt: std::make_pair(x: Register(X86::AL),
2337 y: DAG.getConstant(Val: NumXMMRegs, DL: dl,
2338 VT: MVT::i8)));
2339 }
2340
2341 if (isVarArg && IsMustTail) {
2342 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
2343 for (const auto &F : Forwards) {
2344 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: F.VReg, VT: F.VT);
2345 RegsToPass.push_back(Elt: std::make_pair(x: F.PReg, y&: Val));
2346 }
2347 }
2348
2349 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
2350 // don't need this because the eligibility check rejects calls that require
2351 // shuffling arguments passed in memory.
2352 if (!IsSibcall && isTailCall) {
2353 // Force all the incoming stack arguments to be loaded from the stack
2354 // before any new outgoing arguments or the return address are stored to the
2355 // stack, because the outgoing stack slots may alias the incoming argument
2356 // stack slots, and the alias isn't otherwise explicit. This is slightly
2357 // more conservative than necessary, because it means that each store
2358 // effectively depends on every argument instead of just those arguments it
2359 // would clobber.
2360 Chain = DAG.getStackArgumentTokenFactor(Chain);
2361
2362 SmallVector<SDValue, 8> MemOpChains2;
2363 SDValue FIN;
2364 int FI = 0;
2365 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
2366 ++I, ++OutsIndex) {
2367 CCValAssign &VA = ArgLocs[I];
2368
2369 if (VA.isRegLoc()) {
2370 if (VA.needsCustom()) {
2371 assert((CallConv == CallingConv::X86_RegCall) &&
2372 "Expecting custom case only in regcall calling convention");
2373 // This means that we are in special case where one argument was
2374 // passed through two register locations - Skip the next location
2375 ++I;
2376 }
2377
2378 continue;
2379 }
2380
2381 assert(VA.isMemLoc());
2382 SDValue Arg = OutVals[OutsIndex];
2383 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
2384 // Skip inalloca/preallocated arguments. They don't require any work.
2385 if (Flags.isInAlloca() || Flags.isPreallocated())
2386 continue;
2387 // Create frame index.
2388 int32_t Offset = VA.getLocMemOffset()+FPDiff;
2389 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
2390 FI = MF.getFrameInfo().CreateFixedObject(Size: OpSize, SPOffset: Offset, IsImmutable: true);
2391 FIN = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
2392
2393 if (Flags.isByVal()) {
2394 // Copy relative to framepointer.
2395 SDValue Source = DAG.getIntPtrConstant(Val: VA.getLocMemOffset(), DL: dl);
2396 if (!StackPtr.getNode())
2397 StackPtr = DAG.getCopyFromReg(Chain, dl, Reg: RegInfo->getStackRegister(),
2398 VT: getPointerTy(DL: DAG.getDataLayout()));
2399 Source = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()),
2400 N1: StackPtr, N2: Source);
2401
2402 MemOpChains2.push_back(
2403 Elt: CreateCopyOfByValArgument(Src: Source, Dst: FIN, Chain, Flags, DAG, dl));
2404 } else {
2405 // Store relative to framepointer.
2406 MemOpChains2.push_back(Elt: DAG.getStore(
2407 Chain, dl, Val: Arg, Ptr: FIN,
2408 PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI)));
2409 }
2410 }
2411
2412 if (!MemOpChains2.empty())
2413 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains2);
2414
2415 // Store the return address to the appropriate stack slot.
2416 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
2417 PtrVT: getPointerTy(DL: DAG.getDataLayout()),
2418 SlotSize: RegInfo->getSlotSize(), FPDiff, dl);
2419 }
2420
2421 // Build a sequence of copy-to-reg nodes chained together with token chain
2422 // and glue operands which copy the outgoing args into registers.
2423 SDValue InGlue;
2424 for (const auto &[Reg, N] : RegsToPass) {
2425 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, Glue: InGlue);
2426 InGlue = Chain.getValue(R: 1);
2427 }
2428
2429 bool IsImpCall = false;
2430 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
2431 assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
2432 // In the 64-bit large code model, we have to make all calls
2433 // through a register, since the call instruction's 32-bit
2434 // pc-relative offset may not be large enough to hold the whole
2435 // address.
2436 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
2437 Callee->getOpcode() == ISD::ExternalSymbol) {
2438 // Lower direct calls to global addresses and external symbols. Setting
2439 // ForCall to true here has the effect of removing WrapperRIP when possible
2440 // to allow direct calls to be selected without first materializing the
2441 // address into a register.
2442 Callee = LowerGlobalOrExternal(Op: Callee, DAG, /*ForCall=*/true, IsImpCall: &IsImpCall);
2443 } else if (Subtarget.isTarget64BitILP32() &&
2444 Callee.getValueType() == MVT::i32) {
2445 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
2446 Callee = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MVT::i64, Operand: Callee);
2447 }
2448
2449 SmallVector<SDValue, 8> Ops;
2450
2451 if (!IsSibcall && isTailCall && !IsMustTail) {
2452 Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytesToPop, Size2: 0, Glue: InGlue, DL: dl);
2453 InGlue = Chain.getValue(R: 1);
2454 }
2455
2456 Ops.push_back(Elt: Chain);
2457 Ops.push_back(Elt: Callee);
2458
2459 if (isTailCall)
2460 Ops.push_back(Elt: DAG.getSignedTargetConstant(Val: FPDiff, DL: dl, VT: MVT::i32));
2461
2462 // Add argument registers to the end of the list so that they are known live
2463 // into the call.
2464 for (const auto &[Reg, N] : RegsToPass)
2465 Ops.push_back(Elt: DAG.getRegister(Reg, VT: N.getValueType()));
2466
2467 // Add a register mask operand representing the call-preserved registers.
2468 const uint32_t *Mask = [&]() {
2469 auto AdaptedCC = CallConv;
2470 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
2471 // use X86_INTR calling convention because it has the same CSR mask
2472 // (same preserved registers).
2473 if (HasNCSR)
2474 AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
2475 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
2476 // to use the CSR_NoRegs_RegMask.
2477 if (CB && CB->hasFnAttr(Kind: "no_callee_saved_registers"))
2478 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
2479 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
2480 }();
2481 assert(Mask && "Missing call preserved mask for calling convention");
2482
2483 if (MachineOperand::clobbersPhysReg(RegMask: Mask, PhysReg: RegInfo->getFramePtr())) {
2484 X86Info->setFPClobberedByCall(true);
2485 if (CLI.CB && isa<InvokeInst>(Val: CLI.CB))
2486 X86Info->setFPClobberedByInvoke(true);
2487 }
2488 if (MachineOperand::clobbersPhysReg(RegMask: Mask, PhysReg: RegInfo->getBaseRegister())) {
2489 X86Info->setBPClobberedByCall(true);
2490 if (CLI.CB && isa<InvokeInst>(Val: CLI.CB))
2491 X86Info->setBPClobberedByInvoke(true);
2492 }
2493
2494 // If this is an invoke in a 32-bit function using a funclet-based
2495 // personality, assume the function clobbers all registers. If an exception
2496 // is thrown, the runtime will not restore CSRs.
2497 // FIXME: Model this more precisely so that we can register allocate across
2498 // the normal edge and spill and fill across the exceptional edge.
2499 if (!Is64Bit && CLI.CB && isa<InvokeInst>(Val: CLI.CB)) {
2500 const Function &CallerFn = MF.getFunction();
2501 EHPersonality Pers =
2502 CallerFn.hasPersonalityFn()
2503 ? classifyEHPersonality(Pers: CallerFn.getPersonalityFn())
2504 : EHPersonality::Unknown;
2505 if (isFuncletEHPersonality(Pers))
2506 Mask = RegInfo->getNoPreservedMask();
2507 }
2508
2509 // Define a new register mask from the existing mask.
2510 uint32_t *RegMask = nullptr;
2511
2512 // In some calling conventions we need to remove the used physical registers
2513 // from the reg mask. Create a new RegMask for such calling conventions.
2514 // RegMask for calling conventions that disable only return registers (e.g.
2515 // preserve_most) will be modified later in LowerCallResult.
2516 bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CC: CallConv) || HasNCSR;
2517 if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CC: CallConv)) {
2518 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2519
2520 // Allocate a new Reg Mask and copy Mask.
2521 RegMask = MF.allocateRegMask();
2522 unsigned RegMaskSize = MachineOperand::getRegMaskSize(NumRegs: TRI->getNumRegs());
2523 memcpy(dest: RegMask, src: Mask, n: sizeof(RegMask[0]) * RegMaskSize);
2524
2525 // Make sure all sub registers of the argument registers are reset
2526 // in the RegMask.
2527 if (ShouldDisableArgRegs) {
2528 for (auto const &RegPair : RegsToPass)
2529 for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg: RegPair.first))
2530 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
2531 }
2532
2533 // Create the RegMask Operand according to our updated mask.
2534 Ops.push_back(Elt: DAG.getRegisterMask(RegMask));
2535 } else {
2536 // Create the RegMask Operand according to the static mask.
2537 Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
2538 }
2539
2540 if (InGlue.getNode())
2541 Ops.push_back(Elt: InGlue);
2542
2543 if (isTailCall) {
2544 // We used to do:
2545 //// If this is the first return lowered for this function, add the regs
2546 //// to the liveout set for the function.
2547 // This isn't right, although it's probably harmless on x86; liveouts
2548 // should be computed from returns not tail calls. Consider a void
2549 // function making a tail call to a function returning int.
2550 MF.getFrameInfo().setHasTailCall();
2551 SDValue Ret = DAG.getNode(Opcode: X86ISD::TC_RETURN, DL: dl, VT: MVT::Other, Ops);
2552
2553 if (IsCFICall)
2554 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2555
2556 DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge);
2557 DAG.addCallSiteInfo(Node: Ret.getNode(), CallInfo: std::move(CSInfo));
2558 return Ret;
2559 }
2560
2561 // Returns a chain & a glue for retval copy to use.
2562 SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
2563 if (IsImpCall) {
2564 Chain = DAG.getNode(Opcode: X86ISD::IMP_CALL, DL: dl, VTList: NodeTys, Ops);
2565 } else if (IsNoTrackIndirectCall) {
2566 Chain = DAG.getNode(Opcode: X86ISD::NT_CALL, DL: dl, VTList: NodeTys, Ops);
2567 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CB: CLI.CB)) {
2568 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
2569 // expanded to the call, directly followed by a special marker sequence and
2570 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
2571 assert(!isTailCall &&
2572 "tail calls cannot be marked with clang.arc.attachedcall");
2573 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
2574
2575 // Add a target global address for the retainRV/claimRV runtime function
2576 // just before the call target.
2577 Function *ARCFn = *objcarc::getAttachedARCFunction(CB: CLI.CB);
2578 auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
2579 auto GA = DAG.getTargetGlobalAddress(GV: ARCFn, DL: dl, VT: PtrVT);
2580 Ops.insert(I: Ops.begin() + 1, Elt: GA);
2581 Chain = DAG.getNode(Opcode: X86ISD::CALL_RVMARKER, DL: dl, VTList: NodeTys, Ops);
2582 } else {
2583 Chain = DAG.getNode(Opcode: X86ISD::CALL, DL: dl, VTList: NodeTys, Ops);
2584 }
2585
2586 if (IsCFICall)
2587 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2588
2589 InGlue = Chain.getValue(R: 1);
2590 DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge);
2591 DAG.addCallSiteInfo(Node: Chain.getNode(), CallInfo: std::move(CSInfo));
2592
2593 // Save heapallocsite metadata.
2594 if (CLI.CB)
2595 if (MDNode *HeapAlloc = CLI.CB->getMetadata(Kind: "heapallocsite"))
2596 DAG.addHeapAllocSite(Node: Chain.getNode(), MD: HeapAlloc);
2597
2598 // Create the CALLSEQ_END node.
2599 unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
2600 if (X86::isCalleePop(CallingConv: CallConv, is64Bit: Is64Bit, IsVarArg: isVarArg,
2601 GuaranteeTCO: DAG.getTarget().Options.GuaranteedTailCallOpt))
2602 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
2603 else if (!canGuaranteeTCO(CC: CallConv) && IsCalleePopSRet)
2604 // If this call passes a struct-return pointer, the callee
2605 // pops that struct pointer.
2606 NumBytesForCalleeToPop = 4;
2607
2608 // Returns a glue for retval copy to use.
2609 if (!IsSibcall) {
2610 Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytesToPop, Size2: NumBytesForCalleeToPop,
2611 Glue: InGlue, DL: dl);
2612 InGlue = Chain.getValue(R: 1);
2613 }
2614
2615 if (CallingConv::PreserveNone == CallConv)
2616 for (const ISD::OutputArg &Out : Outs) {
2617 if (Out.Flags.isSwiftSelf() || Out.Flags.isSwiftAsync() ||
2618 Out.Flags.isSwiftError()) {
2619 errorUnsupported(DAG, dl,
2620 Msg: "Swift attributes can't be used with preserve_none");
2621 break;
2622 }
2623 }
2624
2625 // Handle result values, copying them out of physregs into vregs that we
2626 // return.
2627 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2628 InVals, RegMask);
2629}
2630
2631//===----------------------------------------------------------------------===//
2632// Fast Calling Convention (tail call) implementation
2633//===----------------------------------------------------------------------===//
2634
2635// Like std call, callee cleans arguments, convention except that ECX is
2636// reserved for storing the tail called function address. Only 2 registers are
2637// free for argument passing (inreg). Tail call optimization is performed
2638// provided:
2639// * tailcallopt is enabled
2640// * caller/callee are fastcc
2641// On X86_64 architecture with GOT-style position independent code only local
2642// (within module) calls are supported at the moment.
2643// To keep the stack aligned according to platform abi the function
2644// GetAlignedArgumentStackSize ensures that argument delta is always multiples
2645// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
2646// If a tail called function callee has more arguments than the caller the
2647// caller needs to make sure that there is room to move the RETADDR to. This is
2648// achieved by reserving an area the size of the argument delta right after the
2649// original RETADDR, but before the saved framepointer or the spilled registers
2650// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
2651// stack layout:
2652// arg1
2653// arg2
2654// RETADDR
2655// [ new RETADDR
2656// move area ]
2657// (possible EBP)
2658// ESI
2659// EDI
2660// local1 ..
2661
2662/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
2663/// requirement.
2664unsigned
2665X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
2666 SelectionDAG &DAG) const {
2667 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
2668 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
2669 assert(StackSize % SlotSize == 0 &&
2670 "StackSize must be a multiple of SlotSize");
2671 return alignTo(Size: StackSize + SlotSize, A: StackAlignment) - SlotSize;
2672}
2673
2674/// Return true if the given stack call argument is already available in the
2675/// same position (relatively) of the caller's incoming argument stack.
2676static
2677bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2678 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2679 const X86InstrInfo *TII, const CCValAssign &VA) {
2680 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2681
2682 for (;;) {
2683 // Look through nodes that don't alter the bits of the incoming value.
2684 unsigned Op = Arg.getOpcode();
2685 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||
2686 Op == ISD::AssertZext) {
2687 Arg = Arg.getOperand(i: 0);
2688 continue;
2689 }
2690 if (Op == ISD::TRUNCATE) {
2691 const SDValue &TruncInput = Arg.getOperand(i: 0);
2692 if (TruncInput.getOpcode() == ISD::AssertZext &&
2693 cast<VTSDNode>(Val: TruncInput.getOperand(i: 1))->getVT() ==
2694 Arg.getValueType()) {
2695 Arg = TruncInput.getOperand(i: 0);
2696 continue;
2697 }
2698 }
2699 break;
2700 }
2701
2702 int FI = INT_MAX;
2703 if (Arg.getOpcode() == ISD::CopyFromReg) {
2704 Register VR = cast<RegisterSDNode>(Val: Arg.getOperand(i: 1))->getReg();
2705 if (!VR.isVirtual())
2706 return false;
2707 MachineInstr *Def = MRI->getVRegDef(Reg: VR);
2708 if (!Def)
2709 return false;
2710 if (!Flags.isByVal()) {
2711 if (!TII->isLoadFromStackSlot(MI: *Def, FrameIndex&: FI))
2712 return false;
2713 } else {
2714 unsigned Opcode = Def->getOpcode();
2715 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
2716 Opcode == X86::LEA64_32r) &&
2717 Def->getOperand(i: 1).isFI()) {
2718 FI = Def->getOperand(i: 1).getIndex();
2719 Bytes = Flags.getByValSize();
2720 } else
2721 return false;
2722 }
2723 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val&: Arg)) {
2724 if (Flags.isByVal())
2725 // ByVal argument is passed in as a pointer but it's now being
2726 // dereferenced. e.g.
2727 // define @foo(%struct.X* %A) {
2728 // tail call @bar(%struct.X* byval %A)
2729 // }
2730 return false;
2731 SDValue Ptr = Ld->getBasePtr();
2732 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Val&: Ptr);
2733 if (!FINode)
2734 return false;
2735 FI = FINode->getIndex();
2736 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
2737 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Val&: Arg);
2738 FI = FINode->getIndex();
2739 Bytes = Flags.getByValSize();
2740 } else
2741 return false;
2742
2743 assert(FI != INT_MAX);
2744 if (!MFI.isFixedObjectIndex(ObjectIdx: FI))
2745 return false;
2746
2747 if (Offset != MFI.getObjectOffset(ObjectIdx: FI))
2748 return false;
2749
2750 // If this is not byval, check that the argument stack object is immutable.
2751 // inalloca and argument copy elision can create mutable argument stack
2752 // objects. Byval objects can be mutated, but a byval call intends to pass the
2753 // mutated memory.
2754 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(ObjectIdx: FI))
2755 return false;
2756
2757 if (VA.getLocVT().getFixedSizeInBits() >
2758 Arg.getValueSizeInBits().getFixedValue()) {
2759 // If the argument location is wider than the argument type, check that any
2760 // extension flags match.
2761 if (Flags.isZExt() != MFI.isObjectZExt(ObjectIdx: FI) ||
2762 Flags.isSExt() != MFI.isObjectSExt(ObjectIdx: FI)) {
2763 return false;
2764 }
2765 }
2766
2767 return Bytes == MFI.getObjectSize(ObjectIdx: FI);
2768}
2769
2770/// Check whether the call is eligible for tail call optimization. Targets
2771/// that want to do tail call optimization should implement this function.
2772/// Note that the x86 backend does not check musttail calls for eligibility! The
2773/// rest of x86 tail call lowering must be prepared to forward arguments of any
2774/// type.
2775bool X86TargetLowering::IsEligibleForTailCallOptimization(
2776 TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo,
2777 SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const {
2778 SelectionDAG &DAG = CLI.DAG;
2779 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2780 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2781 const SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2782 SDValue Callee = CLI.Callee;
2783 CallingConv::ID CalleeCC = CLI.CallConv;
2784 bool isVarArg = CLI.IsVarArg;
2785
2786 if (!mayTailCallThisCC(CC: CalleeCC))
2787 return false;
2788
2789 // If -tailcallopt is specified, make fastcc functions tail-callable.
2790 MachineFunction &MF = DAG.getMachineFunction();
2791 const Function &CallerF = MF.getFunction();
2792
2793 // If the function return type is x86_fp80 and the callee return type is not,
2794 // then the FP_EXTEND of the call result is not a nop. It's not safe to
2795 // perform a tailcall optimization here.
2796 if (CallerF.getReturnType()->isX86_FP80Ty() && !CLI.RetTy->isX86_FP80Ty())
2797 return false;
2798
2799 CallingConv::ID CallerCC = CallerF.getCallingConv();
2800 bool CCMatch = CallerCC == CalleeCC;
2801 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CC: CalleeCC);
2802 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CC: CallerCC);
2803 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
2804 CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
2805
2806 // Win64 functions have extra shadow space for argument homing. Don't do the
2807 // sibcall if the caller and callee have mismatched expectations for this
2808 // space.
2809 if (IsCalleeWin64 != IsCallerWin64)
2810 return false;
2811
2812 if (IsGuaranteeTCO) {
2813 if (canGuaranteeTCO(CC: CalleeCC) && CCMatch)
2814 return true;
2815 return false;
2816 }
2817
2818 // Look for obvious safe cases to perform tail call optimization that do not
2819 // require ABI changes. This is what gcc calls sibcall.
2820
2821 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
2822 // emit a special epilogue.
2823 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2824 if (RegInfo->hasStackRealignment(MF))
2825 return false;
2826
2827 // Also avoid sibcall optimization if we're an sret return fn and the callee
2828 // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
2829 // insufficient.
2830 if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
2831 // For a compatible tail call the callee must return our sret pointer. So it
2832 // needs to be (a) an sret function itself and (b) we pass our sret as its
2833 // sret. Condition #b is harder to determine.
2834 return false;
2835 } else if (IsCalleePopSRet)
2836 // The callee pops an sret, so we cannot tail-call, as our caller doesn't
2837 // expect that.
2838 return false;
2839
2840 // Do not sibcall optimize vararg calls unless all arguments are passed via
2841 // registers.
2842 LLVMContext &C = *DAG.getContext();
2843 if (isVarArg && !Outs.empty()) {
2844 // Optimizing for varargs on Win64 is unlikely to be safe without
2845 // additional testing.
2846 if (IsCalleeWin64 || IsCallerWin64)
2847 return false;
2848
2849 for (const auto &VA : ArgLocs)
2850 if (!VA.isRegLoc())
2851 return false;
2852 }
2853
2854 // If the call result is in ST0 / ST1, it needs to be popped off the x87
2855 // stack. Therefore, if it's not used by the call it is not safe to optimize
2856 // this into a sibcall.
2857 bool Unused = false;
2858 for (const auto &In : Ins) {
2859 if (!In.Used) {
2860 Unused = true;
2861 break;
2862 }
2863 }
2864 if (Unused) {
2865 SmallVector<CCValAssign, 16> RVLocs;
2866 CCState RVCCInfo(CalleeCC, false, MF, RVLocs, C);
2867 RVCCInfo.AnalyzeCallResult(Ins, Fn: RetCC_X86);
2868 for (const auto &VA : RVLocs) {
2869 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
2870 return false;
2871 }
2872 }
2873
2874 // Check that the call results are passed in the same way.
2875 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2876 CalleeFn: RetCC_X86, CallerFn: RetCC_X86))
2877 return false;
2878 // The callee has to preserve all registers the caller needs to preserve.
2879 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2880 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2881 if (!CCMatch) {
2882 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2883 if (!TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved))
2884 return false;
2885 }
2886
2887 // The stack frame of the caller cannot be replaced by the tail-callee one's
2888 // if the function is required to preserve all the registers. Conservatively
2889 // prevent tail optimization even if hypothetically all the registers are used
2890 // for passing formal parameters or returning values.
2891 if (CallerF.hasFnAttribute(Kind: "no_caller_saved_registers"))
2892 return false;
2893
2894 unsigned StackArgsSize = CCInfo.getStackSize();
2895
2896 // If the callee takes no arguments then go on to check the results of the
2897 // call.
2898 if (!Outs.empty()) {
2899 if (StackArgsSize > 0) {
2900 // Check if the arguments are already laid out in the right way as
2901 // the caller's fixed stack objects.
2902 MachineFrameInfo &MFI = MF.getFrameInfo();
2903 const MachineRegisterInfo *MRI = &MF.getRegInfo();
2904 const X86InstrInfo *TII = Subtarget.getInstrInfo();
2905 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2906 const CCValAssign &VA = ArgLocs[I];
2907 SDValue Arg = OutVals[I];
2908 ISD::ArgFlagsTy Flags = Outs[I].Flags;
2909 if (VA.getLocInfo() == CCValAssign::Indirect)
2910 return false;
2911 if (!VA.isRegLoc()) {
2912 if (!MatchingStackOffset(Arg, Offset: VA.getLocMemOffset(), Flags, MFI, MRI,
2913 TII, VA))
2914 return false;
2915 }
2916 }
2917 }
2918
2919 bool PositionIndependent = isPositionIndependent();
2920 // If the tailcall address may be in a register, then make sure it's
2921 // possible to register allocate for it. In 32-bit, the call address can
2922 // only target EAX, EDX, or ECX since the tail call must be scheduled after
2923 // callee-saved registers are restored. These happen to be the same
2924 // registers used to pass 'inreg' arguments so watch out for those.
2925 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Val: Callee) &&
2926 !isa<ExternalSymbolSDNode>(Val: Callee)) ||
2927 PositionIndependent)) {
2928 unsigned NumInRegs = 0;
2929 // In PIC we need an extra register to formulate the address computation
2930 // for the callee.
2931 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
2932
2933 for (const auto &VA : ArgLocs) {
2934 if (!VA.isRegLoc())
2935 continue;
2936 Register Reg = VA.getLocReg();
2937 switch (Reg) {
2938 default: break;
2939 case X86::EAX: case X86::EDX: case X86::ECX:
2940 if (++NumInRegs == MaxInRegs)
2941 return false;
2942 break;
2943 }
2944 }
2945 }
2946
2947 const MachineRegisterInfo &MRI = MF.getRegInfo();
2948 if (!parametersInCSRMatch(MRI, CallerPreservedMask: CallerPreserved, ArgLocs, OutVals))
2949 return false;
2950 }
2951
2952 bool CalleeWillPop =
2953 X86::isCalleePop(CallingConv: CalleeCC, is64Bit: Subtarget.is64Bit(), IsVarArg: isVarArg,
2954 GuaranteeTCO: MF.getTarget().Options.GuaranteedTailCallOpt);
2955
2956 if (unsigned BytesToPop =
2957 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
2958 // If we have bytes to pop, the callee must pop them.
2959 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
2960 if (!CalleePopMatches)
2961 return false;
2962 } else if (CalleeWillPop && StackArgsSize > 0) {
2963 // If we don't have bytes to pop, make sure the callee doesn't pop any.
2964 return false;
2965 }
2966
2967 return true;
2968}
2969
2970/// Determines whether the callee is required to pop its own arguments.
2971/// Callee pop is necessary to support tail calls.
2972bool X86::isCalleePop(CallingConv::ID CallingConv,
2973 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
2974 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
2975 // can guarantee TCO.
2976 if (!IsVarArg && shouldGuaranteeTCO(CC: CallingConv, GuaranteedTailCallOpt: GuaranteeTCO))
2977 return true;
2978
2979 switch (CallingConv) {
2980 default:
2981 return false;
2982 case CallingConv::X86_StdCall:
2983 case CallingConv::X86_FastCall:
2984 case CallingConv::X86_ThisCall:
2985 case CallingConv::X86_VectorCall:
2986 return !is64Bit;
2987 }
2988}
2989