X86ISelLoweringCall.cpp source code [llvm_projects/llvm/lib/Target/X86/X86ISelLoweringCall.cpp]

1	//===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// This file implements the lowering of LLVM calls to DAG nodes.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "X86.h"
15	#include "X86CallingConv.h"
16	#include "X86FrameLowering.h"
17	#include "X86ISelLowering.h"
18	#include "X86InstrBuilder.h"
19	#include "X86MachineFunctionInfo.h"
20	#include "X86TargetMachine.h"
21	#include "X86TargetObjectFile.h"
22	#include "llvm/ADT/Statistic.h"
23	#include "llvm/Analysis/ObjCARCUtil.h"
24	#include "llvm/CodeGen/MachineJumpTableInfo.h"
25	#include "llvm/CodeGen/MachineModuleInfo.h"
26	#include "llvm/CodeGen/WinEHFuncInfo.h"
27	#include "llvm/IR/DiagnosticInfo.h"
28	#include "llvm/IR/IRBuilder.h"
29	#include "llvm/IR/Module.h"
30
31	#define DEBUG_TYPE "x86-isel"
32
33	using namespace llvm;
34
35	STATISTIC(NumTailCalls, "Number of tail calls");
36
37	/// Call this when the user attempts to do something unsupported, like
38	/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
39	/// report_fatal_error, so calling code should attempt to recover without
40	/// crashing.
41	static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
42	const char *Msg) {
43	MachineFunction &MF = DAG.getMachineFunction();
44	DAG.getContext()->diagnose(
45	DI: DiagnosticInfoUnsupported (MF.getFunction(), Msg, dl.getDebugLoc()));
46	}
47
48	/// Returns true if a CC can dynamically exclude a register from the list of
49	/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
50	/// the return registers.
51	static bool shouldDisableRetRegFromCSR(CallingConv::ID CC) {
52	switch (CC) {
53	default:
54	return false;
55	case CallingConv::X86_RegCall:
56	case CallingConv::PreserveMost:
57	case CallingConv::PreserveAll:
58	return true;
59	}
60	}
61
62	/// Returns true if a CC can dynamically exclude a register from the list of
63	/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
64	/// the parameters.
65	static bool shouldDisableArgRegFromCSR(CallingConv::ID CC) {
66	return CC == CallingConv::X86_RegCall;
67	}
68
69	static std::pair<MVT, unsigned>
70	handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
71	const X86Subtarget &Subtarget) {
72	// v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
73	// convention is one that uses k registers.
74	if (NumElts == `2`)
75	return {MVT::v2i64, `1`};
76	if (NumElts == `4`)
77	return {MVT::v4i32, `1`};
78	if (NumElts == `8` && CC != CallingConv::X86_RegCall &&
79	CC != CallingConv::Intel_OCL_BI)
80	return {MVT::v8i16, `1`};
81	if (NumElts == `16` && CC != CallingConv::X86_RegCall &&
82	CC != CallingConv::Intel_OCL_BI)
83	return {MVT::v16i8, `1`};
84	// v32i1 passes in ymm unless we have BWI and the calling convention is
85	// regcall.
86	if (NumElts == `32` && (!Subtarget.hasBWI() \|\| CC != CallingConv::X86_RegCall))
87	return {MVT::v32i8, `1`};
88	// Split v64i1 vectors if we don't have v64i8 available.
89	if (NumElts == `64` && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
90	if (Subtarget.useAVX512Regs())
91	return {MVT::v64i8, `1`};
92	return {MVT::v32i8, `2`};
93	}
94
95	// Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
96	if (!isPowerOf2_32(Value: NumElts) \|\| (NumElts == `64` && !Subtarget.hasBWI()) \|\|
97	NumElts > `64`)
98	return {MVT::i8, NumElts};
99
100	return {MVT::INVALID_SIMPLE_VALUE_TYPE, `0`};
101	}
102
103	MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
104	CallingConv::ID CC,
105	EVT VT) const {
106	if (VT.isVector()) {
107	if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
108	unsigned NumElts = VT.getVectorNumElements();
109
110	MVT RegisterVT;
111	unsigned NumRegisters;
112	std::tie(args&: RegisterVT, args&: NumRegisters) =
113	handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
114	if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
115	return RegisterVT;
116	}
117
118	if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < `8`)
119	return MVT::v8f16;
120	}
121
122	// We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
123	if ((VT == MVT::f64 \|\| VT == MVT::f80) && !Subtarget.is64Bit() &&
124	!Subtarget.hasX87())
125	return MVT::i32;
126
127	if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
128	return getRegisterTypeForCallingConv(Context, CC,
129	VT: VT.changeVectorElementType(EltVT: MVT::f16));
130
131	if (VT == MVT::bf16)
132	return MVT::f16;
133
134	return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
135	}
136
137	unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
138	CallingConv::ID CC,
139	EVT VT) const {
140	if (VT.isVector()) {
141	if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
142	unsigned NumElts = VT.getVectorNumElements();
143
144	MVT RegisterVT;
145	unsigned NumRegisters;
146	std::tie(args&: RegisterVT, args&: NumRegisters) =
147	handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
148	if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
149	return NumRegisters;
150	}
151
152	if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < `8`)
153	return `1`;
154	}
155
156	// We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
157	// x87 is disabled.
158	if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
159	if (VT == MVT::f64)
160	return `2`;
161	if (VT == MVT::f80)
162	return `3`;
163	}
164
165	if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
166	return getNumRegistersForCallingConv(Context, CC,
167	VT: VT.changeVectorElementType(EltVT: MVT::f16));
168
169	return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
170	}
171
172	unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
173	LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
174	unsigned &NumIntermediates, MVT &RegisterVT) const {
175	// Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
176	if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
177	Subtarget.hasAVX512() &&
178	(!isPowerOf2_32(Value: VT.getVectorNumElements()) \|\|
179	(VT.getVectorNumElements() == `64` && !Subtarget.hasBWI()) \|\|
180	VT.getVectorNumElements() > `64`)) {
181	RegisterVT = MVT::i8;
182	IntermediateVT = MVT::i1;
183	NumIntermediates = VT.getVectorNumElements();
184	return NumIntermediates;
185	}
186
187	// Split v64i1 vectors if we don't have v64i8 available.
188	if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
189	CC != CallingConv::X86_RegCall) {
190	RegisterVT = MVT::v32i8;
191	IntermediateVT = MVT::v32i1;
192	NumIntermediates = `2`;
193	return `2`;
194	}
195
196	// Split vNbf16 vectors according to vNf16.
197	if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
198	VT = VT.changeVectorElementType(EltVT: MVT::f16);
199
200	return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
201	NumIntermediates, RegisterVT);
202	}
203
204	EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
205	LLVMContext& Context,
206	EVT VT) const {
207	if (!VT.isVector())
208	return MVT::i8;
209
210	if (Subtarget.hasAVX512()) {
211	// Figure out what this type will be legalized to.
212	EVT LegalVT = VT;
213	while (getTypeAction(Context, VT: LegalVT) != TypeLegal)
214	LegalVT = getTypeToTransformTo(Context, VT: LegalVT);
215
216	// If we got a 512-bit vector then we'll definitely have a vXi1 compare.
217	if (LegalVT.getSimpleVT().is512BitVector())
218	return EVT::getVectorVT(Context, VT: MVT::i1, EC: VT.getVectorElementCount());
219
220	if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
221	// If we legalized to less than a 512-bit vector, then we will use a vXi1
222	// compare for vXi32/vXi64 for sure. If we have BWI we will also support
223	// vXi16/vXi8.
224	MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
225	if (Subtarget.hasBWI() \|\| EltVT.getSizeInBits() >= `32`)
226	return EVT::getVectorVT(Context, VT: MVT::i1, EC: VT.getVectorElementCount());
227	}
228	}
229
230	return VT.changeVectorElementTypeToInteger();
231	}
232
233	/// Helper for getByValTypeAlignment to determine
234	/// the desired ByVal argument alignment.
235	static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
236	if (MaxAlign == `16`)
237	return;
238	if (VectorType *VTy = dyn_cast<VectorType>(Val: Ty)) {
239	if (VTy->getPrimitiveSizeInBits().getFixedValue() == `128`)
240	MaxAlign = Align (`16`);
241	} else if (ArrayType *ATy = dyn_cast<ArrayType>(Val: Ty)) {
242	Align EltAlign;
243	getMaxByValAlign(Ty: ATy->getElementType(), MaxAlign&: EltAlign);
244	if (EltAlign > MaxAlign)
245	MaxAlign = EltAlign;
246	} else if (StructType *STy = dyn_cast<StructType>(Val: Ty)) {
247	for (auto *EltTy : STy->elements()) {
248	Align EltAlign;
249	getMaxByValAlign(Ty: EltTy, MaxAlign&: EltAlign);
250	if (EltAlign > MaxAlign)
251	MaxAlign = EltAlign;
252	if (MaxAlign == `16`)
253	break;
254	}
255	}
256	}
257
258	/// Return the desired alignment for ByVal aggregate
259	/// function arguments in the caller parameter area. For X86, aggregates
260	/// that contain SSE vectors are placed at 16-byte boundaries while the rest
261	/// are at 4-byte boundaries.
262	uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty,
263	const DataLayout &DL) const {
264	if (Subtarget.is64Bit()) {
265	// Max of 8 and alignment of type.
266	Align TyAlign = DL.getABITypeAlign(Ty);
267	if (TyAlign > `8`)
268	return TyAlign.value();
269	return `8`;
270	}
271
272	Align Alignment(`4`);
273	if (Subtarget.hasSSE1())
274	getMaxByValAlign(Ty, MaxAlign&: Alignment);
275	return Alignment.value();
276	}
277
278	/// It returns EVT::Other if the type should be determined using generic
279	/// target-independent logic.
280	/// For vector ops we check that the overall size isn't larger than our
281	/// preferred vector width.
282	EVT X86TargetLowering::getOptimalMemOpType(
283	const MemOp &Op, const AttributeList &FuncAttributes) const {
284	if (!FuncAttributes.hasFnAttr(Kind: Attribute::NoImplicitFloat)) {
285	if (Op.size() >= `16` &&
286	(!Subtarget.isUnalignedMem16Slow() \|\| Op.isAligned(AlignCheck: Align (`16`)))) {
287	// FIXME: Check if unaligned 64-byte accesses are slow.
288	if (Op.size() >= `64` && Subtarget.hasAVX512() && Subtarget.hasEVEX512() &&
289	(Subtarget.getPreferVectorWidth() >= `512`)) {
290	return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
291	}
292	// FIXME: Check if unaligned 32-byte accesses are slow.
293	if (Op.size() >= `32` && Subtarget.hasAVX() &&
294	Subtarget.useLight256BitInstructions()) {
295	// Although this isn't a well-supported type for AVX1, we'll let
296	// legalization and shuffle lowering produce the optimal codegen. If we
297	// choose an optimal type with a vector element larger than a byte,
298	// getMemsetStores() may create an intermediate splat (using an integer
299	// multiply) before we splat as a vector.
300	return MVT::v32i8;
301	}
302	if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= `128`))
303	return MVT::v16i8;
304	// TODO: Can SSE1 handle a byte vector?
305	// If we have SSE1 registers we should be able to use them.
306	if (Subtarget.hasSSE1() && (Subtarget.is64Bit() \|\| Subtarget.hasX87()) &&
307	(Subtarget.getPreferVectorWidth() >= `128`))
308	return MVT::v4f32;
309	} else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) \|\| Op.isZeroMemset()) &&
310	Op.size() >= `8` && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
311	// Do not use f64 to lower memcpy if source is string constant. It's
312	// better to use i32 to avoid the loads.
313	// Also, do not use f64 to lower memset unless this is a memset of zeros.
314	// The gymnastics of splatting a byte value into an XMM register and then
315	// only using 8-byte stores (because this is a CPU with slow unaligned
316	// 16-byte accesses) makes that a loser.
317	return MVT::f64;
318	}
319	}
320	// This is a compromise. If we reach here, unaligned accesses may be slow on
321	// this target. However, creating smaller, aligned accesses could be even
322	// slower and would certainly be a lot more code.
323	if (Subtarget.is64Bit() && Op.size() >= `8`)
324	return MVT::i64;
325	return MVT::i32;
326	}
327
328	bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
329	if (VT == MVT::f32)
330	return Subtarget.hasSSE1();
331	if (VT == MVT::f64)
332	return Subtarget.hasSSE2();
333	return true;
334	}
335
336	static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
337	return (`8` * Alignment.value()) % SizeInBits == `0`;
338	}
339
340	bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const {
341	if (isBitAligned(Alignment, SizeInBits: VT.getSizeInBits()))
342	return true;
343	switch (VT.getSizeInBits()) {
344	default:
345	// 8-byte and under are always assumed to be fast.
346	return true;
347	case `128`:
348	return !Subtarget.isUnalignedMem16Slow();
349	case `256`:
350	return !Subtarget.isUnalignedMem32Slow();
351	// TODO: What about AVX-512 (512-bit) accesses?
352	}
353	}
354
355	bool X86TargetLowering::allowsMisalignedMemoryAccesses(
356	EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
357	unsigned Fast) const* {
358	if (Fast)
359	*Fast = isMemoryAccessFast(VT, Alignment);
360	// NonTemporal vector memory ops must be aligned.
361	if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
362	// NT loads can only be vector aligned, so if its less aligned than the
363	// minimum vector size (which we can split the vector down to), we might as
364	// well use a regular unaligned vector load.
365	// We don't have any NT loads pre-SSE41.
366	if (!!(Flags & MachineMemOperand::MOLoad))
367	return (Alignment < `16` \|\| !Subtarget.hasSSE41());
368	return false;
369	}
370	// Misaligned accesses of any size are always allowed.
371	return true;
372	}
373
374	bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,
375	const DataLayout &DL, EVT VT,
376	unsigned AddrSpace, Align Alignment,
377	MachineMemOperand::Flags Flags,
378	unsigned Fast) const* {
379	if (Fast)
380	*Fast = isMemoryAccessFast(VT, Alignment);
381	if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
382	if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
383	/Fast=/nullptr))
384	return true;
385	// NonTemporal vector memory ops are special, and must be aligned.
386	if (!isBitAligned(Alignment, SizeInBits: VT.getSizeInBits()))
387	return false;
388	switch (VT.getSizeInBits()) {
389	case `128`:
390	if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
391	return true;
392	if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
393	return true;
394	return false;
395	case `256`:
396	if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
397	return true;
398	if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
399	return true;
400	return false;
401	case `512`:
402	if (Subtarget.hasAVX512() && Subtarget.hasEVEX512())
403	return true;
404	return false;
405	default:
406	return false; // Don't have NonTemporal vector memory ops of this size.
407	}
408	}
409	return true;
410	}
411
412	/// Return the entry encoding for a jump table in the
413	/// current function. The returned value is a member of the
414	/// MachineJumpTableInfo::JTEntryKind enum.
415	unsigned X86TargetLowering::getJumpTableEncoding() const {
416	// In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
417	// symbol.
418	if (isPositionIndependent() && Subtarget.isPICStyleGOT())
419	return MachineJumpTableInfo::EK_Custom32;
420	if (isPositionIndependent() &&
421	getTargetMachine().getCodeModel() == CodeModel::Large &&
422	!Subtarget.isTargetCOFF())
423	return MachineJumpTableInfo::EK_LabelDifference64;
424
425	// Otherwise, use the normal jump table encoding heuristics.
426	return TargetLowering::getJumpTableEncoding();
427	}
428
429	bool X86TargetLowering::useSoftFloat() const {
430	return Subtarget.useSoftFloat();
431	}
432
433	void X86TargetLowering::markLibCallAttributes(MachineFunction MF, unsigned* CC,
434	ArgListTy &Args) const {
435
436	// Only relabel X86-32 for C / Stdcall CCs.
437	if (Subtarget.is64Bit())
438	return;
439	if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
440	return;
441	unsigned ParamRegs = `0`;
442	if (auto *M = MF->getFunction().getParent())
443	ParamRegs = M->getNumberRegisterParameters();
444
445	// Mark the first N int arguments as having reg
446	for (auto &Arg : Args) {
447	Type *T = Arg.Ty;
448	if (T->isIntOrPtrTy())
449	if (MF->getDataLayout().getTypeAllocSize(Ty: T) <= `8`) {
450	unsigned numRegs = `1`;
451	if (MF->getDataLayout().getTypeAllocSize(Ty: T) > `4`)
452	numRegs = `2`;
453	if (ParamRegs < numRegs)
454	return;
455	ParamRegs -= numRegs;
456	Arg.IsInReg = true;
457	}
458	}
459	}
460
461	const MCExpr *
462	X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
463	const MachineBasicBlock *MBB,
464	unsigned uid,MCContext &Ctx) const{
465	assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
466	// In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
467	// entries.
468	return MCSymbolRefExpr::create(Symbol: MBB->getSymbol(),
469	Kind: MCSymbolRefExpr::VK_GOTOFF, Ctx);
470	}
471
472	/// Returns relocation base for the given PIC jumptable.
473	SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
474	SelectionDAG &DAG) const {
475	if (!Subtarget.is64Bit())
476	// This doesn't have SDLoc associated with it, but is not really the
477	// same as a Register.
478	return DAG.getNode(Opcode: X86ISD::GlobalBaseReg, DL: SDLoc (),
479	VT: getPointerTy(DL: DAG.getDataLayout()));
480	return Table;
481	}
482
483	/// This returns the relocation base for the given PIC jumptable,
484	/// the same as getPICJumpTableRelocBase, but as an MCExpr.
485	const MCExpr *X86TargetLowering::
486	getPICJumpTableRelocBaseExpr(const MachineFunction MF, unsigned* JTI,
487	MCContext &Ctx) const {
488	// X86-64 uses RIP relative addressing based on the jump table label.
489	if (Subtarget.isPICStyleRIPRel() \|\|
490	(Subtarget.is64Bit() &&
491	getTargetMachine().getCodeModel() == CodeModel::Large))
492	return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
493
494	// Otherwise, the reference is relative to the PIC base.
495	return MCSymbolRefExpr::create(Symbol: MF->getPICBaseSymbol(), Ctx);
496	}
497
498	std::pair<const TargetRegisterClass *, uint8_t>
499	X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
500	MVT VT) const {
501	const TargetRegisterClass RRC = nullptr*;
502	uint8_t Cost = `1`;
503	switch (VT.SimpleTy) {
504	default:
505	return TargetLowering::findRepresentativeClass(TRI, VT);
506	case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
507	RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
508	break;
509	case MVT::x86mmx:
510	RRC = &X86::VR64RegClass;
511	break;
512	case MVT::f32: case MVT::f64:
513	case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
514	case MVT::v4f32: case MVT::v2f64:
515	case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
516	case MVT::v8f32: case MVT::v4f64:
517	case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
518	case MVT::v16f32: case MVT::v8f64:
519	RRC = &X86::VR128XRegClass;
520	break;
521	}
522	return std::make_pair(x&: RRC, y&: Cost);
523	}
524
525	unsigned X86TargetLowering::getAddressSpace() const {
526	if (Subtarget.is64Bit())
527	return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? `256` : `257`;
528	return `256`;
529	}
530
531	static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
532	return TargetTriple.isOSGlibc() \|\| TargetTriple.isOSFuchsia() \|\|
533	(TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(Major: `17`));
534	}
535
536	static Constant* SegmentOffset(IRBuilderBase &IRB,
537	int Offset, unsigned AddressSpace) {
538	return ConstantExpr::getIntToPtr(
539	C: ConstantInt::get(Ty: Type::getInt32Ty(C&: IRB.getContext()), V: Offset),
540	Ty: IRB.getPtrTy(AddrSpace: AddressSpace));
541	}
542
543	Value X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const* {
544	// glibc, bionic, and Fuchsia have a special slot for the stack guard in
545	// tcbhead_t; use it instead of the usual global variable (see
546	// sysdeps/{i386,x86_64}/nptl/tls.h)
547	if (hasStackGuardSlotTLS(TargetTriple: Subtarget.getTargetTriple())) {
548	unsigned AddressSpace = getAddressSpace();
549
550	// <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
551	if (Subtarget.isTargetFuchsia())
552	return SegmentOffset(IRB, Offset: `0x10`, AddressSpace);
553
554	Module *M = IRB.GetInsertBlock()->getParent()->getParent();
555	// Specially, some users may customize the base reg and offset.
556	int Offset = M->getStackProtectorGuardOffset();
557	// If we don't set -stack-protector-guard-offset value:
558	// %fs:0x28, unless we're using a Kernel code model, in which case
559	// it's %gs:0x28. gs:0x14 on i386.
560	if (Offset == INT_MAX)
561	Offset = (Subtarget.is64Bit()) ? `0x28` : `0x14`;
562
563	StringRef GuardReg = M->getStackProtectorGuardReg();
564	if (GuardReg == "fs")
565	AddressSpace = X86AS::FS;
566	else if (GuardReg == "gs")
567	AddressSpace = X86AS::GS;
568
569	// Use symbol guard if user specify.
570	StringRef GuardSymb = M->getStackProtectorGuardSymbol();
571	if (!GuardSymb.empty()) {
572	GlobalVariable *GV = M->getGlobalVariable(Name: GuardSymb);
573	if (!GV) {
574	Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(C&: M->getContext())
575	: Type::getInt32Ty(C&: M->getContext());
576	GV = new GlobalVariable (M, Ty, false*, GlobalValue::ExternalLinkage,
577	nullptr, GuardSymb, nullptr,
578	GlobalValue::NotThreadLocal, AddressSpace);
579	if (!Subtarget.isTargetDarwin())
580	GV->setDSOLocal(M->getDirectAccessExternalData());
581	}
582	return GV;
583	}
584
585	return SegmentOffset(IRB, Offset, AddressSpace);
586	}
587	return TargetLowering::getIRStackGuard(IRB);
588	}
589
590	void X86TargetLowering::insertSSPDeclarations(Module &M) const {
591	// MSVC CRT provides functionalities for stack protection.
592	if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() \|\|
593	Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
594	// MSVC CRT has a global variable holding security cookie.
595	M.getOrInsertGlobal(Name: "__security_cookie",
596	Ty: PointerType::getUnqual(C&: M.getContext()));
597
598	// MSVC CRT has a function to validate security cookie.
599	FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
600	Name: "__security_check_cookie", RetTy: Type::getVoidTy(C&: M.getContext()),
601	Args: PointerType::getUnqual(C&: M.getContext()));
602	if (Function *F = dyn_cast<Function>(Val: SecurityCheckCookie.getCallee())) {
603	F->setCallingConv(CallingConv::X86_FastCall);
604	F->addParamAttr(ArgNo: `0`, Kind: Attribute::AttrKind::InReg);
605	}
606	return;
607	}
608
609	StringRef GuardMode = M.getStackProtectorGuard();
610
611	// glibc, bionic, and Fuchsia have a special slot for the stack guard.
612	if ((GuardMode == "tls" \|\| GuardMode.empty()) &&
613	hasStackGuardSlotTLS(TargetTriple: Subtarget.getTargetTriple()))
614	return;
615	TargetLowering::insertSSPDeclarations(M);
616	}
617
618	Value X86TargetLowering::getSDagStackGuard(const* Module &M) const {
619	// MSVC CRT has a global variable holding security cookie.
620	if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() \|\|
621	Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
622	return M.getGlobalVariable(Name: "__security_cookie");
623	}
624	return TargetLowering::getSDagStackGuard(M);
625	}
626
627	Function X86TargetLowering::getSSPStackGuardCheck(const* Module &M) const {
628	// MSVC CRT has a function to validate security cookie.
629	if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() \|\|
630	Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
631	return M.getFunction(Name: "__security_check_cookie");
632	}
633	return TargetLowering::getSSPStackGuardCheck(M);
634	}
635
636	Value *
637	X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
638	// Android provides a fixed TLS slot for the SafeStack pointer. See the
639	// definition of TLS_SLOT_SAFESTACK in
640	// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
641	if (Subtarget.isTargetAndroid()) {
642	// %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
643	// %gs:0x24 on i386
644	int Offset = (Subtarget.is64Bit()) ? `0x48` : `0x24`;
645	return SegmentOffset(IRB, Offset, AddressSpace: getAddressSpace());
646	}
647
648	// Fuchsia is similar.
649	if (Subtarget.isTargetFuchsia()) {
650	// <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
651	return SegmentOffset(IRB, Offset: `0x18`, AddressSpace: getAddressSpace());
652	}
653
654	return TargetLowering::getSafeStackPointerLocation(IRB);
655	}
656
657	//===----------------------------------------------------------------------===//
658	// Return Value Calling Convention Implementation
659	//===----------------------------------------------------------------------===//
660
661	bool X86TargetLowering::CanLowerReturn(
662	CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
663	const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
664	SmallVector<CCValAssign, `16`> RVLocs;
665	CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
666	return CCInfo.CheckReturn(Outs, Fn: RetCC_X86);
667	}
668
669	const MCPhysReg X86TargetLowering::getScratchRegisters(CallingConv::ID) const* {
670	static const MCPhysReg ScratchRegs[] = { X86::R11, `0` };
671	return ScratchRegs;
672	}
673
674	ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
675	static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
676	return RCRegs;
677	}
678
679	/// Lowers masks values (vi1) to the local register values*
680	/// \returns DAG node after lowering to register type
681	static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
682	const SDLoc &DL, SelectionDAG &DAG) {
683	EVT ValVT = ValArg.getValueType();
684
685	if (ValVT == MVT::v1i1)
686	return DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: ValLoc, N1: ValArg,
687	N2: DAG.getIntPtrConstant(Val: `0`, DL));
688
689	if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 \|\| ValLoc == MVT::i32)) \|\|
690	(ValVT == MVT::v16i1 && (ValLoc == MVT::i16 \|\| ValLoc == MVT::i32))) {
691	// Two stage lowering might be required
692	// bitcast: v8i1 -> i8 / v16i1 -> i16
693	// anyextend: i8 -> i32 / i16 -> i32
694	EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
695	SDValue ValToCopy = DAG.getBitcast(VT: TempValLoc, V: ValArg);
696	if (ValLoc == MVT::i32)
697	ValToCopy = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ValLoc, Operand: ValToCopy);
698	return ValToCopy;
699	}
700
701	if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) \|\|
702	(ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
703	// One stage lowering is required
704	// bitcast: v32i1 -> i32 / v64i1 -> i64
705	return DAG.getBitcast(VT: ValLoc, V: ValArg);
706	}
707
708	return DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: ValLoc, Operand: ValArg);
709	}
710
711	/// Breaks v64i1 value into two registers and adds the new node to the DAG
712	static void Passv64i1ArgInRegs(
713	const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
714	SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
715	CCValAssign &NextVA, const X86Subtarget &Subtarget) {
716	assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
717	assert(Subtarget.is32Bit() && "Expecting 32 bit target");
718	assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
719	assert(VA.isRegLoc() && NextVA.isRegLoc() &&
720	"The value should reside in two registers");
721
722	// Before splitting the value we cast it to i64
723	Arg = DAG.getBitcast(VT: MVT::i64, V: Arg);
724
725	// Splitting the value into two i32 types
726	SDValue Lo, Hi;
727	std::tie(args&: Lo, args&: Hi) = DAG.SplitScalar(N: Arg, DL, LoVT: MVT::i32, HiVT: MVT::i32);
728
729	// Attach the two i32 types into corresponding registers
730	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Lo));
731	RegsToPass.push_back(Elt: std::make_pair(x: NextVA.getLocReg(), y&: Hi));
732	}
733
734	SDValue
735	X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
736	bool isVarArg,
737	const SmallVectorImpl<ISD::OutputArg> &Outs,
738	const SmallVectorImpl<SDValue> &OutVals,
739	const SDLoc &dl, SelectionDAG &DAG) const {
740	MachineFunction &MF = DAG.getMachineFunction();
741	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
742
743	// In some cases we need to disable registers from the default CSR list.
744	// For example, when they are used as return registers (preserve_ and X86's*
745	// regcall) or for argument passing (X86's regcall).
746	bool ShouldDisableCalleeSavedRegister =
747	shouldDisableRetRegFromCSR(CC: CallConv) \|\|
748	MF.getFunction().hasFnAttribute(Kind: "no_caller_saved_registers");
749
750	if (CallConv == CallingConv::X86_INTR && !Outs.empty())
751	report_fatal_error(reason: "X86 interrupts may not return any value");
752
753	SmallVector<CCValAssign, `16`> RVLocs;
754	CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
755	CCInfo.AnalyzeReturn(Outs, Fn: RetCC_X86);
756
757	SmallVector<std::pair<Register, SDValue>, `4`> RetVals;
758	for (unsigned I = `0`, OutsIndex = `0`, E = RVLocs.size(); I != E;
759	++I, ++OutsIndex) {
760	CCValAssign &VA = RVLocs [I];
761	assert(VA.isRegLoc() && "Can only return in registers!");
762
763	// Add the register to the CalleeSaveDisableRegs list.
764	if (ShouldDisableCalleeSavedRegister)
765	MF.getRegInfo().disableCalleeSavedRegister(Reg: VA.getLocReg());
766
767	SDValue ValToCopy = OutVals [OutsIndex];
768	EVT ValVT = ValToCopy.getValueType();
769
770	// Promote values to the appropriate types.
771	if (VA.getLocInfo() == CCValAssign::SExt)
772	ValToCopy = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: ValToCopy);
773	else if (VA.getLocInfo() == CCValAssign::ZExt)
774	ValToCopy = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: ValToCopy);
775	else if (VA.getLocInfo() == CCValAssign::AExt) {
776	if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
777	ValToCopy = lowerMasksToReg(ValArg: ValToCopy, ValLoc: VA.getLocVT(), DL: dl, DAG);
778	else
779	ValToCopy = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: ValToCopy);
780	}
781	else if (VA.getLocInfo() == CCValAssign::BCvt)
782	ValToCopy = DAG.getBitcast(VT: VA.getLocVT(), V: ValToCopy);
783
784	assert(VA.getLocInfo() != CCValAssign::FPExt &&
785	"Unexpected FP-extend for return value.");
786
787	// Report an error if we have attempted to return a value via an XMM
788	// register and SSE was disabled.
789	if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(Reg: VA.getLocReg())) {
790	errorUnsupported(DAG, dl, Msg: "SSE register return with SSE disabled");
791	VA.convertToReg(RegNo: X86::FP0); // Set reg to FP0, avoid hitting asserts.
792	} else if (!Subtarget.hasSSE2() &&
793	X86::FR64XRegClass.contains(Reg: VA.getLocReg()) &&
794	ValVT == MVT::f64) {
795	// When returning a double via an XMM register, report an error if SSE2 is
796	// not enabled.
797	errorUnsupported(DAG, dl, Msg: "SSE2 register return with SSE2 disabled");
798	VA.convertToReg(RegNo: X86::FP0); // Set reg to FP0, avoid hitting asserts.
799	}
800
801	// Returns in ST0/ST1 are handled specially: these are pushed as operands to
802	// the RET instruction and handled by the FP Stackifier.
803	if (VA.getLocReg() == X86::FP0 \|\|
804	VA.getLocReg() == X86::FP1) {
805	// If this is a copy from an xmm register to ST(0), use an FPExtend to
806	// change the value to the FP stack register class.
807	if (isScalarFPTypeInSSEReg(VT: VA.getValVT()))
808	ValToCopy = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f80, Operand: ValToCopy);
809	RetVals.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ValToCopy));
810	// Don't emit a copytoreg.
811	continue;
812	}
813
814	// 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
815	// which is returned in RAX / RDX.
816	if (Subtarget.is64Bit()) {
817	if (ValVT == MVT::x86mmx) {
818	if (VA.getLocReg() == X86::XMM0 \|\| VA.getLocReg() == X86::XMM1) {
819	ValToCopy = DAG.getBitcast(VT: MVT::i64, V: ValToCopy);
820	ValToCopy = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL: dl, VT: MVT::v2i64,
821	Operand: ValToCopy);
822	// If we don't have SSE2 available, convert to v4f32 so the generated
823	// register is legal.
824	if (!Subtarget.hasSSE2())
825	ValToCopy = DAG.getBitcast(VT: MVT::v4f32, V: ValToCopy);
826	}
827	}
828	}
829
830	if (VA.needsCustom()) {
831	assert(VA.getValVT() == MVT::v64i1 &&
832	"Currently the only custom case is when we split v64i1 to 2 regs");
833
834	Passv64i1ArgInRegs(DL: dl, DAG, Arg&: ValToCopy, RegsToPass&: RetVals, VA, NextVA&: RVLocs [++I],
835	Subtarget);
836
837	// Add the second register to the CalleeSaveDisableRegs list.
838	if (ShouldDisableCalleeSavedRegister)
839	MF.getRegInfo().disableCalleeSavedRegister(Reg: RVLocs [I].getLocReg());
840	} else {
841	RetVals.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ValToCopy));
842	}
843	}
844
845	SDValue Glue;
846	SmallVector<SDValue, `6`> RetOps;
847	RetOps.push_back(Elt: Chain); // Operand #0 = Chain (updated below)
848	// Operand #1 = Bytes To Pop
849	RetOps.push_back(Elt: DAG.getTargetConstant(Val: FuncInfo->getBytesToPopOnReturn(), DL: dl,
850	VT: MVT::i32));
851
852	// Copy the result values into the output registers.
853	for (auto &RetVal : RetVals) {
854	if (RetVal.first == X86::FP0 \|\| RetVal.first == X86::FP1) {
855	RetOps.push_back(Elt: RetVal.second);
856	continue; // Don't emit a copytoreg.
857	}
858
859	Chain = DAG.getCopyToReg(Chain, dl, Reg: RetVal.first, N: RetVal.second, Glue);
860	Glue = Chain.getValue(R: `1`);
861	RetOps.push_back(
862	Elt: DAG.getRegister(Reg: RetVal.first, VT: RetVal.second.getValueType()));
863	}
864
865	// Swift calling convention does not require we copy the sret argument
866	// into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
867
868	// All x86 ABIs require that for returning structs by value we copy
869	// the sret argument into %rax/%eax (depending on ABI) for the return.
870	// We saved the argument into a virtual register in the entry block,
871	// so now we copy the value out and into %rax/%eax.
872	//
873	// Checking Function.hasStructRetAttr() here is insufficient because the IR
874	// may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
875	// false, then an sret argument may be implicitly inserted in the SelDAG. In
876	// either case FuncInfo->setSRetReturnReg() will have been called.
877	if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
878	// When we have both sret and another return value, we should use the
879	// original Chain stored in RetOps[0], instead of the current Chain updated
880	// in the above loop. If we only have sret, RetOps[0] equals to Chain.
881
882	// For the case of sret and another return value, we have
883	// Chain_0 at the function entry
884	// Chain_1 = getCopyToReg(Chain_0) in the above loop
885	// If we use Chain_1 in getCopyFromReg, we will have
886	// Val = getCopyFromReg(Chain_1)
887	// Chain_2 = getCopyToReg(Chain_1, Val) from below
888
889	// getCopyToReg(Chain_0) will be glued together with
890	// getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
891	// in Unit B, and we will have cyclic dependency between Unit A and Unit B:
892	// Data dependency from Unit B to Unit A due to usage of Val in
893	// getCopyToReg(Chain_1, Val)
894	// Chain dependency from Unit A to Unit B
895
896	// So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
897	SDValue Val = DAG.getCopyFromReg(Chain: RetOps [`0`], dl, Reg: SRetReg,
898	VT: getPointerTy(DL: MF.getDataLayout()));
899
900	Register RetValReg
901	= (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
902	X86::RAX : X86::EAX;
903	Chain = DAG.getCopyToReg(Chain, dl, Reg: RetValReg, N: Val, Glue);
904	Glue = Chain.getValue(R: `1`);
905
906	// RAX/EAX now acts like a return value.
907	RetOps.push_back(
908	Elt: DAG.getRegister(Reg: RetValReg, VT: getPointerTy(DL: DAG.getDataLayout())));
909
910	// Add the returned register to the CalleeSaveDisableRegs list. Don't do
911	// this however for preserve_most/preserve_all to minimize the number of
912	// callee-saved registers for these CCs.
913	if (ShouldDisableCalleeSavedRegister &&
914	CallConv != CallingConv::PreserveAll &&
915	CallConv != CallingConv::PreserveMost)
916	MF.getRegInfo().disableCalleeSavedRegister(Reg: RetValReg);
917	}
918
919	const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
920	const MCPhysReg *I =
921	TRI->getCalleeSavedRegsViaCopy(MF: &DAG.getMachineFunction());
922	if (I) {
923	for (; *I; ++I) {
924	if (X86::GR64RegClass.contains(Reg: *I))
925	RetOps.push_back(Elt: DAG.getRegister(Reg: *I, VT: MVT::i64));
926	else
927	llvm_unreachable("Unexpected register class in CSRsViaCopy!");
928	}
929	}
930
931	RetOps [`0`] = Chain; // Update chain.
932
933	// Add the glue if we have it.
934	if (Glue.getNode())
935	RetOps.push_back(Elt: Glue);
936
937	X86ISD::NodeType opcode = X86ISD::RET_GLUE;
938	if (CallConv == CallingConv::X86_INTR)
939	opcode = X86ISD::IRET;
940	return DAG.getNode(Opcode: opcode, DL: dl, VT: MVT::Other, Ops: RetOps);
941	}
942
943	bool X86TargetLowering::isUsedByReturnOnly(SDNode N, SDValue &Chain) const* {
944	if (N->getNumValues() != `1` \|\| !N->hasNUsesOfValue(NUses: `1`, Value: `0`))
945	return false;
946
947	SDValue TCChain = Chain;
948	SDNode Copy = N->use_begin();
949	if (Copy->getOpcode() == ISD::CopyToReg) {
950	// If the copy has a glue operand, we conservatively assume it isn't safe to
951	// perform a tail call.
952	if (Copy->getOperand(Num: Copy->getNumOperands()-`1`).getValueType() == MVT::Glue)
953	return false;
954	TCChain = Copy->getOperand(Num: `0`);
955	} else if (Copy->getOpcode() != ISD::FP_EXTEND)
956	return false;
957
958	bool HasRet = false;
959	for (const SDNode *U : Copy->uses()) {
960	if (U->getOpcode() != X86ISD::RET_GLUE)
961	return false;
962	// If we are returning more than one value, we can definitely
963	// not make a tail call see PR19530
964	if (U->getNumOperands() > `4`)
965	return false;
966	if (U->getNumOperands() == `4` &&
967	U->getOperand(Num: U->getNumOperands() - `1`).getValueType() != MVT::Glue)
968	return false;
969	HasRet = true;
970	}
971
972	if (!HasRet)
973	return false;
974
975	Chain = TCChain;
976	return true;
977	}
978
979	EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
980	ISD::NodeType ExtendKind) const {
981	MVT ReturnMVT = MVT::i32;
982
983	bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
984	if (VT == MVT::i1 \|\| (!Darwin && (VT == MVT::i8 \|\| VT == MVT::i16))) {
985	// The ABI does not require i1, i8 or i16 to be extended.
986	//
987	// On Darwin, there is code in the wild relying on Clang's old behaviour of
988	// always extending i8/i16 return values, so keep doing that for now.
989	// (PR26665).
990	ReturnMVT = MVT::i8;
991	}
992
993	EVT MinVT = getRegisterType(Context, VT: ReturnMVT);
994	return VT.bitsLT(VT: MinVT) ? MinVT : VT;
995	}
996
997	/// Reads two 32 bit registers and creates a 64 bit mask value.
998	/// \param VA The current 32 bit value that need to be assigned.
999	/// \param NextVA The next 32 bit value that need to be assigned.
1000	/// \param Root The parent DAG node.
1001	/// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
1002	/// glue purposes. In the case the DAG is already using
1003	/// physical register instead of virtual, we should glue
1004	/// our new SDValue to InGlue SDvalue.
1005	/// \return a new SDvalue of size 64bit.
1006	static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
1007	SDValue &Root, SelectionDAG &DAG,
1008	const SDLoc &DL, const X86Subtarget &Subtarget,
1009	SDValue InGlue = nullptr*) {
1010	assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
1011	assert(Subtarget.is32Bit() && "Expecting 32 bit target");
1012	assert(VA.getValVT() == MVT::v64i1 &&
1013	"Expecting first location of 64 bit width type");
1014	assert(NextVA.getValVT() == VA.getValVT() &&
1015	"The locations should have the same type");
1016	assert(VA.isRegLoc() && NextVA.isRegLoc() &&
1017	"The values should reside in two registers");
1018
1019	SDValue Lo, Hi;
1020	SDValue ArgValueLo, ArgValueHi;
1021
1022	MachineFunction &MF = DAG.getMachineFunction();
1023	const TargetRegisterClass *RC = &X86::GR32RegClass;
1024
1025	// Read a 32 bit value from the registers.
1026	if (nullptr == InGlue) {
1027	// When no physical register is present,
1028	// create an intermediate virtual register.
1029	Register Reg = MF.addLiveIn(PReg: VA.getLocReg(), RC);
1030	ArgValueLo = DAG.getCopyFromReg(Chain: Root, dl: DL, Reg, VT: MVT::i32);
1031	Reg = MF.addLiveIn(PReg: NextVA.getLocReg(), RC);
1032	ArgValueHi = DAG.getCopyFromReg(Chain: Root, dl: DL, Reg, VT: MVT::i32);
1033	} else {
1034	// When a physical register is available read the value from it and glue
1035	// the reads together.
1036	ArgValueLo =
1037	DAG.getCopyFromReg(Chain: Root, dl: DL, Reg: VA.getLocReg(), VT: MVT::i32, Glue: *InGlue);
1038	*InGlue = ArgValueLo.getValue(R: `2`);
1039	ArgValueHi =
1040	DAG.getCopyFromReg(Chain: Root, dl: DL, Reg: NextVA.getLocReg(), VT: MVT::i32, Glue: *InGlue);
1041	*InGlue = ArgValueHi.getValue(R: `2`);
1042	}
1043
1044	// Convert the i32 type into v32i1 type.
1045	Lo = DAG.getBitcast(VT: MVT::v32i1, V: ArgValueLo);
1046
1047	// Convert the i32 type into v32i1 type.
1048	Hi = DAG.getBitcast(VT: MVT::v32i1, V: ArgValueHi);
1049
1050	// Concatenate the two values together.
1051	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: MVT::v64i1, N1: Lo, N2: Hi);
1052	}
1053
1054	/// The function will lower a register of various sizes (8/16/32/64)
1055	/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
1056	/// \returns a DAG node contains the operand after lowering to mask type.
1057	static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
1058	const EVT &ValLoc, const SDLoc &DL,
1059	SelectionDAG &DAG) {
1060	SDValue ValReturned = ValArg;
1061
1062	if (ValVT == MVT::v1i1)
1063	return DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: MVT::v1i1, Operand: ValReturned);
1064
1065	if (ValVT == MVT::v64i1) {
1066	// In 32 bit machine, this case is handled by getv64i1Argument
1067	assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
1068	// In 64 bit machine, There is no need to truncate the value only bitcast
1069	} else {
1070	MVT MaskLenVT;
1071	switch (ValVT.getSimpleVT().SimpleTy) {
1072	case MVT::v8i1:
1073	MaskLenVT = MVT::i8;
1074	break;
1075	case MVT::v16i1:
1076	MaskLenVT = MVT::i16;
1077	break;
1078	case MVT::v32i1:
1079	MaskLenVT = MVT::i32;
1080	break;
1081	default:
1082	llvm_unreachable("Expecting a vector of i1 types");
1083	}
1084
1085	ValReturned = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MaskLenVT, Operand: ValReturned);
1086	}
1087	return DAG.getBitcast(VT: ValVT, V: ValReturned);
1088	}
1089
1090	/// Lower the result values of a call into the
1091	/// appropriate copies out of appropriate physical registers.
1092	///
1093	SDValue X86TargetLowering::LowerCallResult(
1094	SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1095	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1096	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
1097	uint32_t RegMask) const* {
1098
1099	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1100	// Assign locations to each value returned by this call.
1101	SmallVector<CCValAssign, `16`> RVLocs;
1102	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1103	*DAG.getContext());
1104	CCInfo.AnalyzeCallResult(Ins, Fn: RetCC_X86);
1105
1106	// Copy all of the result registers out of their specified physreg.
1107	for (unsigned I = `0`, InsIndex = `0`, E = RVLocs.size(); I != E;
1108	++I, ++InsIndex) {
1109	CCValAssign &VA = RVLocs [I];
1110	EVT CopyVT = VA.getLocVT();
1111
1112	// In some calling conventions we need to remove the used registers
1113	// from the register mask.
1114	if (RegMask) {
1115	for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg: VA.getLocReg()))
1116	RegMask[SubReg / `32`] &= ~(`1u` << (SubReg % `32`));
1117	}
1118
1119	// Report an error if there was an attempt to return FP values via XMM
1120	// registers.
1121	if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(Reg: VA.getLocReg())) {
1122	errorUnsupported(DAG, dl, Msg: "SSE register return with SSE disabled");
1123	if (VA.getLocReg() == X86::XMM1)
1124	VA.convertToReg(RegNo: X86::FP1); // Set reg to FP1, avoid hitting asserts.
1125	else
1126	VA.convertToReg(RegNo: X86::FP0); // Set reg to FP0, avoid hitting asserts.
1127	} else if (!Subtarget.hasSSE2() &&
1128	X86::FR64XRegClass.contains(Reg: VA.getLocReg()) &&
1129	CopyVT == MVT::f64) {
1130	errorUnsupported(DAG, dl, Msg: "SSE2 register return with SSE2 disabled");
1131	if (VA.getLocReg() == X86::XMM1)
1132	VA.convertToReg(RegNo: X86::FP1); // Set reg to FP1, avoid hitting asserts.
1133	else
1134	VA.convertToReg(RegNo: X86::FP0); // Set reg to FP0, avoid hitting asserts.
1135	}
1136
1137	// If we prefer to use the value in xmm registers, copy it out as f80 and
1138	// use a truncate to move it from fp stack reg to xmm reg.
1139	bool RoundAfterCopy = false;
1140	if ((VA.getLocReg() == X86::FP0 \|\| VA.getLocReg() == X86::FP1) &&
1141	isScalarFPTypeInSSEReg(VT: VA.getValVT())) {
1142	if (!Subtarget.hasX87())
1143	report_fatal_error(reason: "X87 register return with X87 disabled");
1144	CopyVT = MVT::f80;
1145	RoundAfterCopy = (CopyVT != VA.getLocVT());
1146	}
1147
1148	SDValue Val;
1149	if (VA.needsCustom()) {
1150	assert(VA.getValVT() == MVT::v64i1 &&
1151	"Currently the only custom case is when we split v64i1 to 2 regs");
1152	Val =
1153	getv64i1Argument(VA, NextVA&: RVLocs [++I], Root&: Chain, DAG, DL: dl, Subtarget, InGlue: &InGlue);
1154	} else {
1155	Chain = DAG.getCopyFromReg(Chain, dl, Reg: VA.getLocReg(), VT: CopyVT, Glue: InGlue)
1156	.getValue(R: `1`);
1157	Val = Chain.getValue(R: `0`);
1158	InGlue = Chain.getValue(R: `2`);
1159	}
1160
1161	if (RoundAfterCopy)
1162	Val = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: VA.getValVT(), N1: Val,
1163	// This truncation won't change the value.
1164	N2: DAG.getIntPtrConstant(Val: `1`, DL: dl, /isTarget=/true));
1165
1166	if (VA.isExtInLoc()) {
1167	if (VA.getValVT().isVector() &&
1168	VA.getValVT().getScalarType() == MVT::i1 &&
1169	((VA.getLocVT() == MVT::i64) \|\| (VA.getLocVT() == MVT::i32) \|\|
1170	(VA.getLocVT() == MVT::i16) \|\| (VA.getLocVT() == MVT::i8))) {
1171	// promoting a mask type (vi1) into a register of type i64/i32/i16/i8*
1172	Val = lowerRegToMasks(ValArg: Val, ValVT: VA.getValVT(), ValLoc: VA.getLocVT(), DL: dl, DAG);
1173	} else
1174	Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
1175	}
1176
1177	if (VA.getLocInfo() == CCValAssign::BCvt)
1178	Val = DAG.getBitcast(VT: VA.getValVT(), V: Val);
1179
1180	InVals.push_back(Elt: Val);
1181	}
1182
1183	return Chain;
1184	}
1185
1186	//===----------------------------------------------------------------------===//
1187	// C & StdCall & Fast Calling Convention implementation
1188	//===----------------------------------------------------------------------===//
1189	// StdCall calling convention seems to be standard for many Windows' API
1190	// routines and around. It differs from C calling convention just a little:
1191	// callee should clean up the stack, not caller. Symbols should be also
1192	// decorated in some fancy way :) It doesn't support any vector arguments.
1193	// For info on fast calling convention see Fast Calling Convention (tail call)
1194	// implementation LowerX86_32FastCCCallTo.
1195
1196	/// Determines whether Args, either a set of outgoing arguments to a call, or a
1197	/// set of incoming args of a call, contains an sret pointer that the callee
1198	/// pops
1199	template <typename T>
1200	static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
1201	const X86Subtarget &Subtarget) {
1202	// Not C++20 (yet), so no concepts available.
1203	static_assert(std::is_same_v<T, ISD::OutputArg> \|\|
1204	std::is_same_v<T, ISD::InputArg>,
1205	"requires ISD::OutputArg or ISD::InputArg");
1206
1207	// Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
1208	// for most compilations.
1209	if (!Subtarget.is32Bit())
1210	return false;
1211
1212	if (Args.empty())
1213	return false;
1214
1215	// Most calls do not have an sret argument, check the arg next.
1216	const ISD::ArgFlagsTy &Flags = Args[`0`].Flags;
1217	if (!Flags.isSRet() \|\| Flags.isInReg())
1218	return false;
1219
1220	// The MSVCabi does not pop the sret.
1221	if (Subtarget.getTargetTriple().isOSMSVCRT())
1222	return false;
1223
1224	// MCUs don't pop the sret
1225	if (Subtarget.isTargetMCU())
1226	return false;
1227
1228	// Callee pops argument
1229	return true;
1230	}
1231
1232	/// Make a copy of an aggregate at address specified by "Src" to address
1233	/// "Dst" with size and alignment information specified by the specific
1234	/// parameter attribute. The copy will be passed as a byval function parameter.
1235	static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
1236	SDValue Chain, ISD::ArgFlagsTy Flags,
1237	SelectionDAG &DAG, const SDLoc &dl) {
1238	SDValue SizeNode = DAG.getIntPtrConstant(Val: Flags.getByValSize(), DL: dl);
1239
1240	return DAG.getMemcpy(
1241	Chain, dl, Dst, Src, Size: SizeNode, Alignment: Flags.getNonZeroByValAlign(),
1242	/isVolatile/ isVol: false, /AlwaysInline=/true,
1243	/CI=/nullptr, OverrideTailCall: std::nullopt, DstPtrInfo: MachinePointerInfo (), SrcPtrInfo: MachinePointerInfo ());
1244	}
1245
1246	/// Return true if the calling convention is one that we can guarantee TCO for.
1247	static bool canGuaranteeTCO(CallingConv::ID CC) {
1248	return (CC == CallingConv::Fast \|\| CC == CallingConv::GHC \|\|
1249	CC == CallingConv::X86_RegCall \|\| CC == CallingConv::HiPE \|\|
1250	CC == CallingConv::Tail \|\| CC == CallingConv::SwiftTail);
1251	}
1252
1253	/// Return true if we might ever do TCO for calls with this calling convention.
1254	static bool mayTailCallThisCC(CallingConv::ID CC) {
1255	switch (CC) {
1256	// C calling conventions:
1257	case CallingConv::C:
1258	case CallingConv::Win64:
1259	case CallingConv::X86_64_SysV:
1260	case CallingConv::PreserveNone:
1261	// Callee pop conventions:
1262	case CallingConv::X86_ThisCall:
1263	case CallingConv::X86_StdCall:
1264	case CallingConv::X86_VectorCall:
1265	case CallingConv::X86_FastCall:
1266	// Swift:
1267	case CallingConv::Swift:
1268	return true;
1269	default:
1270	return canGuaranteeTCO(CC);
1271	}
1272	}
1273
1274	/// Return true if the function is being made into a tailcall target by
1275	/// changing its ABI.
1276	static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
1277	return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) \|\|
1278	CC == CallingConv::Tail \|\| CC == CallingConv::SwiftTail;
1279	}
1280
1281	bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst CI) const* {
1282	if (!CI->isTailCall())
1283	return false;
1284
1285	CallingConv::ID CalleeCC = CI->getCallingConv();
1286	if (!mayTailCallThisCC(CC: CalleeCC))
1287	return false;
1288
1289	return true;
1290	}
1291
1292	SDValue
1293	X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1294	const SmallVectorImpl<ISD::InputArg> &Ins,
1295	const SDLoc &dl, SelectionDAG &DAG,
1296	const CCValAssign &VA,
1297	MachineFrameInfo &MFI, unsigned i) const {
1298	// Create the nodes corresponding to a load from this parameter slot.
1299	ISD::ArgFlagsTy Flags = Ins [i].Flags;
1300	bool AlwaysUseMutable = shouldGuaranteeTCO(
1301	CC: CallConv, GuaranteedTailCallOpt: DAG.getTarget().Options.GuaranteedTailCallOpt);
1302	bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
1303	EVT ValVT;
1304	MVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
1305
1306	// If value is passed by pointer we have address passed instead of the value
1307	// itself. No need to extend if the mask value and location share the same
1308	// absolute size.
1309	bool ExtendedInMem =
1310	VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
1311	VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
1312
1313	if (VA.getLocInfo() == CCValAssign::Indirect \|\| ExtendedInMem)
1314	ValVT = VA.getLocVT();
1315	else
1316	ValVT = VA.getValVT();
1317
1318	// FIXME: For now, all byval parameter objects are marked mutable. This can be
1319	// changed with more analysis.
1320	// In case of tail call optimization mark all arguments mutable. Since they
1321	// could be overwritten by lowering of arguments in case of a tail call.
1322	if (Flags.isByVal()) {
1323	unsigned Bytes = Flags.getByValSize();
1324	if (Bytes == `0`) Bytes = `1`; // Don't create zero-sized stack objects.
1325
1326	// FIXME: For now, all byval parameter objects are marked as aliasing. This
1327	// can be improved with deeper analysis.
1328	int FI = MFI.CreateFixedObject(Size: Bytes, SPOffset: VA.getLocMemOffset(), IsImmutable: isImmutable,
1329	/isAliased=/true);
1330	return DAG.getFrameIndex(FI, VT: PtrVT);
1331	}
1332
1333	EVT ArgVT = Ins [i].ArgVT;
1334
1335	// If this is a vector that has been split into multiple parts, don't elide
1336	// the copy. The layout on the stack may not match the packed in-memory
1337	// layout.
1338	bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
1339
1340	// This is an argument in memory. We might be able to perform copy elision.
1341	// If the argument is passed directly in memory without any extension, then we
1342	// can perform copy elision. Large vector types, for example, may be passed
1343	// indirectly by pointer.
1344	if (Flags.isCopyElisionCandidate() &&
1345	VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
1346	!ScalarizedVector) {
1347	SDValue PartAddr;
1348	if (Ins [i].PartOffset == `0`) {
1349	// If this is a one-part value or the first part of a multi-part value,
1350	// create a stack object for the entire argument value type and return a
1351	// load from our portion of it. This assumes that if the first part of an
1352	// argument is in memory, the rest will also be in memory.
1353	int FI = MFI.CreateFixedObject(Size: ArgVT.getStoreSize(), SPOffset: VA.getLocMemOffset(),
1354	/IsImmutable=/false);
1355	PartAddr = DAG.getFrameIndex(FI, VT: PtrVT);
1356	return DAG.getLoad(
1357	VT: ValVT, dl, Chain, Ptr: PartAddr,
1358	PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI));
1359	}
1360
1361	// This is not the first piece of an argument in memory. See if there is
1362	// already a fixed stack object including this offset. If so, assume it
1363	// was created by the PartOffset == 0 branch above and create a load from
1364	// the appropriate offset into it.
1365	int64_t PartBegin = VA.getLocMemOffset();
1366	int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / `8`;
1367	int FI = MFI.getObjectIndexBegin();
1368	for (; MFI.isFixedObjectIndex(ObjectIdx: FI); ++FI) {
1369	int64_t ObjBegin = MFI.getObjectOffset(ObjectIdx: FI);
1370	int64_t ObjEnd = ObjBegin + MFI.getObjectSize(ObjectIdx: FI);
1371	if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
1372	break;
1373	}
1374	if (MFI.isFixedObjectIndex(ObjectIdx: FI)) {
1375	SDValue Addr =
1376	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: DAG.getFrameIndex(FI, VT: PtrVT),
1377	N2: DAG.getIntPtrConstant(Val: Ins [i].PartOffset, DL: dl));
1378	return DAG.getLoad(VT: ValVT, dl, Chain, Ptr: Addr,
1379	PtrInfo: MachinePointerInfo::getFixedStack(
1380	MF&: DAG.getMachineFunction(), FI, Offset: Ins [i].PartOffset));
1381	}
1382	}
1383
1384	int FI = MFI.CreateFixedObject(Size: ValVT.getSizeInBits() / `8`,
1385	SPOffset: VA.getLocMemOffset(), IsImmutable: isImmutable);
1386
1387	// Set SExt or ZExt flag.
1388	if (VA.getLocInfo() == CCValAssign::ZExt) {
1389	MFI.setObjectZExt(ObjectIdx: FI, IsZExt: true);
1390	} else if (VA.getLocInfo() == CCValAssign::SExt) {
1391	MFI.setObjectSExt(ObjectIdx: FI, IsSExt: true);
1392	}
1393
1394	MaybeAlign Alignment;
1395	if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1396	ValVT != MVT::f80)
1397	Alignment = MaybeAlign (`4`);
1398	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
1399	SDValue Val = DAG.getLoad(
1400	VT: ValVT, dl, Chain, Ptr: FIN,
1401	PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI),
1402	Alignment);
1403	return ExtendedInMem
1404	? (VA.getValVT().isVector()
1405	? DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL: dl, VT: VA.getValVT(), Operand: Val)
1406	: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val))
1407	: Val;
1408	}
1409
1410	// FIXME: Get this from tablegen.
1411	static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
1412	const X86Subtarget &Subtarget) {
1413	assert(Subtarget.is64Bit());
1414
1415	if (Subtarget.isCallingConvWin64(CC: CallConv)) {
1416	static const MCPhysReg GPR64ArgRegsWin64[] = {
1417	X86::RCX, X86::RDX, X86::R8, X86::R9
1418	};
1419	return ArrayRef(std::begin(arr: GPR64ArgRegsWin64), std::end(arr: GPR64ArgRegsWin64));
1420	}
1421
1422	static const MCPhysReg GPR64ArgRegs64Bit[] = {
1423	X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
1424	};
1425	return ArrayRef(std::begin(arr: GPR64ArgRegs64Bit), std::end(arr: GPR64ArgRegs64Bit));
1426	}
1427
1428	// FIXME: Get this from tablegen.
1429	static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
1430	CallingConv::ID CallConv,
1431	const X86Subtarget &Subtarget) {
1432	assert(Subtarget.is64Bit());
1433	if (Subtarget.isCallingConvWin64(CC: CallConv)) {
1434	// The XMM registers which might contain var arg parameters are shadowed
1435	// in their paired GPR. So we only need to save the GPR to their home
1436	// slots.
1437	// TODO: __vectorcall will change this.
1438	return std::nullopt;
1439	}
1440
1441	bool isSoftFloat = Subtarget.useSoftFloat();
1442	if (isSoftFloat \|\| !Subtarget.hasSSE1())
1443	// Kernel mode asks for SSE to be disabled, so there are no XMM argument
1444	// registers.
1445	return std::nullopt;
1446
1447	static const MCPhysReg XMMArgRegs64Bit[] = {
1448	X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1449	X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1450	};
1451	return ArrayRef(std::begin(arr: XMMArgRegs64Bit), std::end(arr: XMMArgRegs64Bit));
1452	}
1453
1454	#ifndef NDEBUG
1455	static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
1456	return llvm::is_sorted(
1457	ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
1458	return A.getValNo() < B.getValNo();
1459	});
1460	}
1461	#endif
1462
1463	namespace {
1464	/// This is a helper class for lowering variable arguments parameters.
1465	class VarArgsLoweringHelper {
1466	public:
1467	VarArgsLoweringHelper(X86MachineFunctionInfo FuncInfo, const* SDLoc &Loc,
1468	SelectionDAG &DAG, const X86Subtarget &Subtarget,
1469	CallingConv::ID CallConv, CCState &CCInfo)
1470	: FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
1471	TheMachineFunction(DAG.getMachineFunction()),
1472	TheFunction(TheMachineFunction.getFunction()),
1473	FrameInfo(TheMachineFunction.getFrameInfo()),
1474	FrameLowering(*Subtarget.getFrameLowering()),
1475	TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
1476	CCInfo(CCInfo) {}
1477
1478	// Lower variable arguments parameters.
1479	void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
1480
1481	private:
1482	void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
1483
1484	void forwardMustTailParameters(SDValue &Chain);
1485
1486	bool is64Bit() const { return Subtarget.is64Bit(); }
1487	bool isWin64() const { return Subtarget.isCallingConvWin64(CC: CallConv); }
1488
1489	X86MachineFunctionInfo *FuncInfo;
1490	const SDLoc &DL;
1491	SelectionDAG &DAG;
1492	const X86Subtarget &Subtarget;
1493	MachineFunction &TheMachineFunction;
1494	const Function &TheFunction;
1495	MachineFrameInfo &FrameInfo;
1496	const TargetFrameLowering &FrameLowering;
1497	const TargetLowering &TargLowering;
1498	CallingConv::ID CallConv;
1499	CCState &CCInfo;
1500	};
1501	} // namespace
1502
1503	void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
1504	SDValue &Chain, unsigned StackSize) {
1505	// If the function takes variable number of arguments, make a frame index for
1506	// the start of the first vararg value... for expansion of llvm.va_start. We
1507	// can skip this if there are no va_start calls.
1508	if (is64Bit() \|\| (CallConv != CallingConv::X86_FastCall &&
1509	CallConv != CallingConv::X86_ThisCall)) {
1510	FuncInfo->setVarArgsFrameIndex(
1511	FrameInfo.CreateFixedObject(Size: `1`, SPOffset: StackSize, IsImmutable: true));
1512	}
1513
1514	// 64-bit calling conventions support varargs and register parameters, so we
1515	// have to do extra work to spill them in the prologue.
1516	if (is64Bit()) {
1517	// Find the first unallocated argument registers.
1518	ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
1519	ArrayRef<MCPhysReg> ArgXMMs =
1520	get64BitArgumentXMMs(MF&: TheMachineFunction, CallConv, Subtarget);
1521	unsigned NumIntRegs = CCInfo.getFirstUnallocated(Regs: ArgGPRs);
1522	unsigned NumXMMRegs = CCInfo.getFirstUnallocated(Regs: ArgXMMs);
1523
1524	assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
1525	"SSE register cannot be used when SSE is disabled!");
1526
1527	if (isWin64()) {
1528	// Get to the caller-allocated home save location. Add 8 to account
1529	// for the return address.
1530	int HomeOffset = FrameLowering.getOffsetOfLocalArea() + `8`;
1531	FuncInfo->setRegSaveFrameIndex(
1532	FrameInfo.CreateFixedObject(Size: `1`, SPOffset: NumIntRegs * `8` + HomeOffset, IsImmutable: false));
1533	// Fixup to set vararg frame on shadow area (4 x i64).
1534	if (NumIntRegs < `4`)
1535	FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
1536	} else {
1537	// For X86-64, if there are vararg parameters that are passed via
1538	// registers, then we must store them to their spots on the stack so
1539	// they may be loaded by dereferencing the result of va_next.
1540	FuncInfo->setVarArgsGPOffset(NumIntRegs * `8`);
1541	FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * `8` + NumXMMRegs * `16`);
1542	FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
1543	Size: ArgGPRs.size() * `8` + ArgXMMs.size() * `16`, Alignment: Align (`16`), isSpillSlot: false));
1544	}
1545
1546	SmallVector<SDValue, `6`>
1547	LiveGPRs; // list of SDValue for GPR registers keeping live input value
1548	SmallVector<SDValue, `8`> LiveXMMRegs; // list of SDValue for XMM registers
1549	// keeping live input value
1550	SDValue ALVal; // if applicable keeps SDValue for %al register
1551
1552	// Gather all the live in physical registers.
1553	for (MCPhysReg Reg : ArgGPRs.slice(N: NumIntRegs)) {
1554	Register GPR = TheMachineFunction.addLiveIn(PReg: Reg, RC: &X86::GR64RegClass);
1555	LiveGPRs.push_back(Elt: DAG.getCopyFromReg(Chain, dl: DL, Reg: GPR, VT: MVT::i64));
1556	}
1557	const auto &AvailableXmms = ArgXMMs.slice(N: NumXMMRegs);
1558	if (!AvailableXmms.empty()) {
1559	Register AL = TheMachineFunction.addLiveIn(PReg: X86::AL, RC: &X86::GR8RegClass);
1560	ALVal = DAG.getCopyFromReg(Chain, dl: DL, Reg: AL, VT: MVT::i8);
1561	for (MCPhysReg Reg : AvailableXmms) {
1562	// FastRegisterAllocator spills virtual registers at basic
1563	// block boundary. That leads to usages of xmm registers
1564	// outside of check for %al. Pass physical registers to
1565	// VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
1566	TheMachineFunction.getRegInfo().addLiveIn(Reg);
1567	LiveXMMRegs.push_back(Elt: DAG.getRegister(Reg, VT: MVT::v4f32));
1568	}
1569	}
1570
1571	// Store the integer parameter registers.
1572	SmallVector<SDValue, `8`> MemOps;
1573	SDValue RSFIN =
1574	DAG.getFrameIndex(FI: FuncInfo->getRegSaveFrameIndex(),
1575	VT: TargLowering.getPointerTy(DL: DAG.getDataLayout()));
1576	unsigned Offset = FuncInfo->getVarArgsGPOffset();
1577	for (SDValue Val : LiveGPRs) {
1578	SDValue FIN = DAG.getNode(Opcode: ISD::ADD, DL,
1579	VT: TargLowering.getPointerTy(DL: DAG.getDataLayout()),
1580	N1: RSFIN, N2: DAG.getIntPtrConstant(Val: Offset, DL));
1581	SDValue Store =
1582	DAG.getStore(Chain: Val.getValue(R: `1`), dl: DL, Val, Ptr: FIN,
1583	PtrInfo: MachinePointerInfo::getFixedStack(
1584	MF&: DAG.getMachineFunction(),
1585	FI: FuncInfo->getRegSaveFrameIndex(), Offset));
1586	MemOps.push_back(Elt: Store);
1587	Offset += `8`;
1588	}
1589
1590	// Now store the XMM (fp + vector) parameter registers.
1591	if (!LiveXMMRegs.empty()) {
1592	SmallVector<SDValue, `12`> SaveXMMOps;
1593	SaveXMMOps.push_back(Elt: Chain);
1594	SaveXMMOps.push_back(Elt: ALVal);
1595	SaveXMMOps.push_back(Elt: RSFIN);
1596	SaveXMMOps.push_back(
1597	Elt: DAG.getTargetConstant(Val: FuncInfo->getVarArgsFPOffset(), DL, VT: MVT::i32));
1598	llvm::append_range(C&: SaveXMMOps, R&: LiveXMMRegs);
1599	MachineMemOperand *StoreMMO =
1600	DAG.getMachineFunction().getMachineMemOperand(
1601	PtrInfo: MachinePointerInfo::getFixedStack(
1602	MF&: DAG.getMachineFunction(), FI: FuncInfo->getRegSaveFrameIndex(),
1603	Offset),
1604	F: MachineMemOperand::MOStore, Size: `128`, BaseAlignment: Align (`16`));
1605	MemOps.push_back(Elt: DAG.getMemIntrinsicNode(Opcode: X86ISD::VASTART_SAVE_XMM_REGS,
1606	dl: DL, VTList: DAG.getVTList(VT: MVT::Other),
1607	Ops: SaveXMMOps, MemVT: MVT::i8, MMO: StoreMMO));
1608	}
1609
1610	if (!MemOps.empty())
1611	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: MemOps);
1612	}
1613	}
1614
1615	void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
1616	// Find the largest legal vector type.
1617	MVT VecVT = MVT::Other;
1618	// FIXME: Only some x86_32 calling conventions support AVX512.
1619	if (Subtarget.useAVX512Regs() &&
1620	(is64Bit() \|\| (CallConv == CallingConv::X86_VectorCall \|\|
1621	CallConv == CallingConv::Intel_OCL_BI)))
1622	VecVT = MVT::v16f32;
1623	else if (Subtarget.hasAVX())
1624	VecVT = MVT::v8f32;
1625	else if (Subtarget.hasSSE2())
1626	VecVT = MVT::v4f32;
1627
1628	// We forward some GPRs and some vector types.
1629	SmallVector<MVT, `2`> RegParmTypes;
1630	MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
1631	RegParmTypes.push_back(Elt: IntVT);
1632	if (VecVT != MVT::Other)
1633	RegParmTypes.push_back(Elt: VecVT);
1634
1635	// Compute the set of forwarded registers. The rest are scratch.
1636	SmallVectorImpl<ForwardedRegister> &Forwards =
1637	FuncInfo->getForwardedMustTailRegParms();
1638	CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, Fn: CC_X86);
1639
1640	// Forward AL for SysV x86_64 targets, since it is used for varargs.
1641	if (is64Bit() && !isWin64() && !CCInfo.isAllocated(Reg: X86::AL)) {
1642	Register ALVReg = TheMachineFunction.addLiveIn(PReg: X86::AL, RC: &X86::GR8RegClass);
1643	Forwards.push_back(Elt: ForwardedRegister (ALVReg, X86::AL, MVT::i8));
1644	}
1645
1646	// Copy all forwards from physical to virtual registers.
1647	for (ForwardedRegister &FR : Forwards) {
1648	// FIXME: Can we use a less constrained schedule?
1649	SDValue RegVal = DAG.getCopyFromReg(Chain, dl: DL, Reg: FR.VReg, VT: FR.VT);
1650	FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
1651	RegClass: TargLowering.getRegClassFor(VT: FR.VT));
1652	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: FR.VReg, N: RegVal);
1653	}
1654	}
1655
1656	void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
1657	unsigned StackSize) {
1658	// Set FrameIndex to the 0xAAAAAAA value to mark unset state.
1659	// If necessary, it would be set into the correct value later.
1660	FuncInfo->setVarArgsFrameIndex(`0xAAAAAAA`);
1661	FuncInfo->setRegSaveFrameIndex(`0xAAAAAAA`);
1662
1663	if (FrameInfo.hasVAStart())
1664	createVarArgAreaAndStoreRegisters(Chain, StackSize);
1665
1666	if (FrameInfo.hasMustTailInVarArgFunc())
1667	forwardMustTailParameters(Chain);
1668	}
1669
1670	SDValue X86TargetLowering::LowerFormalArguments(
1671	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1672	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1673	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1674	MachineFunction &MF = DAG.getMachineFunction();
1675	X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
1676
1677	const Function &F = MF.getFunction();
1678	if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
1679	F.getName() == "main")
1680	FuncInfo->setForceFramePointer(true);
1681
1682	MachineFrameInfo &MFI = MF.getFrameInfo();
1683	bool Is64Bit = Subtarget.is64Bit();
1684	bool IsWin64 = Subtarget.isCallingConvWin64(CC: CallConv);
1685
1686	assert(
1687	!(IsVarArg && canGuaranteeTCO(CallConv)) &&
1688	"Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
1689
1690	// Assign locations to all of the incoming arguments.
1691	SmallVector<CCValAssign, `16`> ArgLocs;
1692	CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1693
1694	// Allocate shadow area for Win64.
1695	if (IsWin64)
1696	CCInfo.AllocateStack(Size: `32`, Alignment: Align (`8`));
1697
1698	CCInfo.AnalyzeArguments(Ins, Fn: CC_X86);
1699
1700	// In vectorcall calling convention a second pass is required for the HVA
1701	// types.
1702	if (CallingConv::X86_VectorCall == CallConv) {
1703	CCInfo.AnalyzeArgumentsSecondPass(Args: Ins, Fn: CC_X86);
1704	}
1705
1706	// The next loop assumes that the locations are in the same order of the
1707	// input arguments.
1708	assert(isSortedByValueNo(ArgLocs) &&
1709	"Argument Location list must be sorted before lowering");
1710
1711	SDValue ArgValue;
1712	for (unsigned I = `0`, InsIndex = `0`, E = ArgLocs.size(); I != E;
1713	++I, ++InsIndex) {
1714	assert(InsIndex < Ins.size() && "Invalid Ins index");
1715	CCValAssign &VA = ArgLocs [I];
1716
1717	if (VA.isRegLoc()) {
1718	EVT RegVT = VA.getLocVT();
1719	if (VA.needsCustom()) {
1720	assert(
1721	VA.getValVT() == MVT::v64i1 &&
1722	"Currently the only custom case is when we split v64i1 to 2 regs");
1723
1724	// v64i1 values, in regcall calling convention, that are
1725	// compiled to 32 bit arch, are split up into two registers.
1726	ArgValue =
1727	getv64i1Argument(VA, NextVA&: ArgLocs [++I], Root&: Chain, DAG, DL: dl, Subtarget);
1728	} else {
1729	const TargetRegisterClass *RC;
1730	if (RegVT == MVT::i8)
1731	RC = &X86::GR8RegClass;
1732	else if (RegVT == MVT::i16)
1733	RC = &X86::GR16RegClass;
1734	else if (RegVT == MVT::i32)
1735	RC = &X86::GR32RegClass;
1736	else if (Is64Bit && RegVT == MVT::i64)
1737	RC = &X86::GR64RegClass;
1738	else if (RegVT == MVT::f16)
1739	RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
1740	else if (RegVT == MVT::f32)
1741	RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
1742	else if (RegVT == MVT::f64)
1743	RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
1744	else if (RegVT == MVT::f80)
1745	RC = &X86::RFP80RegClass;
1746	else if (RegVT == MVT::f128)
1747	RC = &X86::VR128RegClass;
1748	else if (RegVT.is512BitVector())
1749	RC = &X86::VR512RegClass;
1750	else if (RegVT.is256BitVector())
1751	RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
1752	else if (RegVT.is128BitVector())
1753	RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
1754	else if (RegVT == MVT::x86mmx)
1755	RC = &X86::VR64RegClass;
1756	else if (RegVT == MVT::v1i1)
1757	RC = &X86::VK1RegClass;
1758	else if (RegVT == MVT::v8i1)
1759	RC = &X86::VK8RegClass;
1760	else if (RegVT == MVT::v16i1)
1761	RC = &X86::VK16RegClass;
1762	else if (RegVT == MVT::v32i1)
1763	RC = &X86::VK32RegClass;
1764	else if (RegVT == MVT::v64i1)
1765	RC = &X86::VK64RegClass;
1766	else
1767	llvm_unreachable("Unknown argument type!");
1768
1769	Register Reg = MF.addLiveIn(PReg: VA.getLocReg(), RC);
1770	ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, VT: RegVT);
1771	}
1772
1773	// If this is an 8 or 16-bit value, it is really passed promoted to 32
1774	// bits. Insert an assert[sz]ext to capture this, then truncate to the
1775	// right size.
1776	if (VA.getLocInfo() == CCValAssign::SExt)
1777	ArgValue = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: RegVT, N1: ArgValue,
1778	N2: DAG.getValueType(VA.getValVT()));
1779	else if (VA.getLocInfo() == CCValAssign::ZExt)
1780	ArgValue = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: RegVT, N1: ArgValue,
1781	N2: DAG.getValueType(VA.getValVT()));
1782	else if (VA.getLocInfo() == CCValAssign::BCvt)
1783	ArgValue = DAG.getBitcast(VT: VA.getValVT(), V: ArgValue);
1784
1785	if (VA.isExtInLoc()) {
1786	// Handle MMX values passed in XMM regs.
1787	if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
1788	ArgValue = DAG.getNode(Opcode: X86ISD::MOVDQ2Q, DL: dl, VT: VA.getValVT(), Operand: ArgValue);
1789	else if (VA.getValVT().isVector() &&
1790	VA.getValVT().getScalarType() == MVT::i1 &&
1791	((VA.getLocVT() == MVT::i64) \|\| (VA.getLocVT() == MVT::i32) \|\|
1792	(VA.getLocVT() == MVT::i16) \|\| (VA.getLocVT() == MVT::i8))) {
1793	// Promoting a mask type (vi1) into a register of type i64/i32/i16/i8*
1794	ArgValue = lowerRegToMasks(ValArg: ArgValue, ValVT: VA.getValVT(), ValLoc: RegVT, DL: dl, DAG);
1795	} else
1796	ArgValue = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: ArgValue);
1797	}
1798	} else {
1799	assert(VA.isMemLoc());
1800	ArgValue =
1801	LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i: InsIndex);
1802	}
1803
1804	// If value is passed via pointer - do a load.
1805	if (VA.getLocInfo() == CCValAssign::Indirect &&
1806	!(Ins [I].Flags.isByVal() && VA.isRegLoc())) {
1807	ArgValue =
1808	DAG.getLoad(VT: VA.getValVT(), dl, Chain, Ptr: ArgValue, PtrInfo: MachinePointerInfo ());
1809	}
1810
1811	InVals.push_back(Elt: ArgValue);
1812	}
1813
1814	for (unsigned I = `0`, E = Ins.size(); I != E; ++I) {
1815	if (Ins [I].Flags.isSwiftAsync()) {
1816	auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
1817	if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF))
1818	X86FI->setHasSwiftAsyncContext(true);
1819	else {
1820	int PtrSize = Subtarget.is64Bit() ? `8` : `4`;
1821	int FI =
1822	MF.getFrameInfo().CreateStackObject(Size: PtrSize, Alignment: Align (PtrSize), isSpillSlot: false);
1823	X86FI->setSwiftAsyncContextFrameIdx(FI);
1824	SDValue St = DAG.getStore(
1825	Chain: DAG.getEntryNode(), dl, Val: InVals [I],
1826	Ptr: DAG.getFrameIndex(FI, VT: PtrSize == `8` ? MVT::i64 : MVT::i32),
1827	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI));
1828	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: St, N2: Chain);
1829	}
1830	}
1831
1832	// Swift calling convention does not require we copy the sret argument
1833	// into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
1834	if (CallConv == CallingConv::Swift \|\| CallConv == CallingConv::SwiftTail)
1835	continue;
1836
1837	// All x86 ABIs require that for returning structs by value we copy the
1838	// sret argument into %rax/%eax (depending on ABI) for the return. Save
1839	// the argument into a virtual register so that we can access it from the
1840	// return points.
1841	if (Ins [I].Flags.isSRet()) {
1842	assert(!FuncInfo->getSRetReturnReg() &&
1843	"SRet return has already been set");
1844	MVT PtrTy = getPointerTy(DL: DAG.getDataLayout());
1845	Register Reg =
1846	MF.getRegInfo().createVirtualRegister(RegClass: getRegClassFor(VT: PtrTy));
1847	FuncInfo->setSRetReturnReg(Reg);
1848	SDValue Copy = DAG.getCopyToReg(Chain: DAG.getEntryNode(), dl, Reg, N: InVals [I]);
1849	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Copy, N2: Chain);
1850	break;
1851	}
1852	}
1853
1854	unsigned StackSize = CCInfo.getStackSize();
1855	// Align stack specially for tail calls.
1856	if (shouldGuaranteeTCO(CC: CallConv,
1857	GuaranteedTailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt))
1858	StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
1859
1860	if (IsVarArg)
1861	VarArgsLoweringHelper (FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
1862	.lowerVarArgsParameters(Chain, StackSize);
1863
1864	// Some CCs need callee pop.
1865	if (X86::isCalleePop(CallingConv: CallConv, is64Bit: Is64Bit, IsVarArg,
1866	GuaranteeTCO: MF.getTarget().Options.GuaranteedTailCallOpt)) {
1867	FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
1868	} else if (CallConv == CallingConv::X86_INTR && Ins.size() == `2`) {
1869	// X86 interrupts must pop the error code (and the alignment padding) if
1870	// present.
1871	FuncInfo->setBytesToPopOnReturn(Is64Bit ? `16` : `4`);
1872	} else {
1873	FuncInfo->setBytesToPopOnReturn(`0`); // Callee pops nothing.
1874	// If this is an sret function, the return should pop the hidden pointer.
1875	if (!canGuaranteeTCO(CC: CallConv) && hasCalleePopSRet(Args: Ins, Subtarget))
1876	FuncInfo->setBytesToPopOnReturn(`4`);
1877	}
1878
1879	if (!Is64Bit) {
1880	// RegSaveFrameIndex is X86-64 only.
1881	FuncInfo->setRegSaveFrameIndex(`0xAAAAAAA`);
1882	}
1883
1884	FuncInfo->setArgumentStackSize(StackSize);
1885
1886	if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
1887	EHPersonality Personality = classifyEHPersonality(Pers: F.getPersonalityFn());
1888	if (Personality == EHPersonality::CoreCLR) {
1889	assert(Is64Bit);
1890	// TODO: Add a mechanism to frame lowering that will allow us to indicate
1891	// that we'd prefer this slot be allocated towards the bottom of the frame
1892	// (i.e. near the stack pointer after allocating the frame). Every
1893	// funclet needs a copy of this slot in its (mostly empty) frame, and the
1894	// offset from the bottom of this and each funclet's frame must be the
1895	// same, so the size of funclets' (mostly empty) frames is dictated by
1896	// how far this slot is from the bottom (since they allocate just enough
1897	// space to accommodate holding this slot at the correct offset).
1898	int PSPSymFI = MFI.CreateStackObject(Size: `8`, Alignment: Align (`8`), /isSpillSlot=/false);
1899	EHInfo->PSPSymFrameIdx = PSPSymFI;
1900	}
1901	}
1902
1903	if (shouldDisableArgRegFromCSR(CC: CallConv) \|\|
1904	F.hasFnAttribute(Kind: "no_caller_saved_registers")) {
1905	MachineRegisterInfo &MRI = MF.getRegInfo();
1906	for (std::pair<Register, Register> Pair : MRI.liveins())
1907	MRI.disableCalleeSavedRegister(Reg: Pair.first);
1908	}
1909
1910	if (CallingConv::PreserveNone == CallConv)
1911	for (unsigned I = `0`, E = Ins.size(); I != E; ++I) {
1912	if (Ins [I].Flags.isSwiftSelf() \|\| Ins [I].Flags.isSwiftAsync() \|\|
1913	Ins [I].Flags.isSwiftError()) {
1914	errorUnsupported(DAG, dl,
1915	Msg: "Swift attributes can't be used with preserve_none");
1916	break;
1917	}
1918	}
1919
1920	return Chain;
1921	}
1922
1923	SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1924	SDValue Arg, const SDLoc &dl,
1925	SelectionDAG &DAG,
1926	const CCValAssign &VA,
1927	ISD::ArgFlagsTy Flags,
1928	bool isByVal) const {
1929	unsigned LocMemOffset = VA.getLocMemOffset();
1930	SDValue PtrOff = DAG.getIntPtrConstant(Val: LocMemOffset, DL: dl);
1931	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()),
1932	N1: StackPtr, N2: PtrOff);
1933	if (isByVal)
1934	return CreateCopyOfByValArgument(Src: Arg, Dst: PtrOff, Chain, Flags, DAG, dl);
1935
1936	MaybeAlign Alignment;
1937	if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1938	Arg.getSimpleValueType() != MVT::f80)
1939	Alignment = MaybeAlign (`4`);
1940	return DAG.getStore(
1941	Chain, dl, Val: Arg, Ptr: PtrOff,
1942	PtrInfo: MachinePointerInfo::getStack(MF&: DAG.getMachineFunction(), Offset: LocMemOffset),
1943	Alignment);
1944	}
1945
1946	/// Emit a load of return address if tail call
1947	/// optimization is performed and it is required.
1948	SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
1949	SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
1950	bool Is64Bit, int FPDiff, const SDLoc &dl) const {
1951	// Adjust the Return address stack slot.
1952	EVT VT = getPointerTy(DL: DAG.getDataLayout());
1953	OutRetAddr = getReturnAddressFrameIndex(DAG);
1954
1955	// Load the "old" Return address.
1956	OutRetAddr = DAG.getLoad(VT, dl, Chain, Ptr: OutRetAddr, PtrInfo: MachinePointerInfo ());
1957	return SDValue (OutRetAddr.getNode(), `1`);
1958	}
1959
1960	/// Emit a store of the return address if tail call
1961	/// optimization is performed and it is required (FPDiff!=0).
1962	static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
1963	SDValue Chain, SDValue RetAddrFrIdx,
1964	EVT PtrVT, unsigned SlotSize,
1965	int FPDiff, const SDLoc &dl) {
1966	// Store the return address to the appropriate stack slot.
1967	if (!FPDiff) return Chain;
1968	// Calculate the new stack slot for the return address.
1969	int NewReturnAddrFI =
1970	MF.getFrameInfo().CreateFixedObject(Size: SlotSize, SPOffset: (int64_t)FPDiff - SlotSize,
1971	IsImmutable: false);
1972	SDValue NewRetAddrFrIdx = DAG.getFrameIndex(FI: NewReturnAddrFI, VT: PtrVT);
1973	Chain = DAG.getStore(Chain, dl, Val: RetAddrFrIdx, Ptr: NewRetAddrFrIdx,
1974	PtrInfo: MachinePointerInfo::getFixedStack(
1975	MF&: DAG.getMachineFunction(), FI: NewReturnAddrFI));
1976	return Chain;
1977	}
1978
1979	/// Returns a vector_shuffle mask for an movs{s\|d}, movd
1980	/// operation of specified width.
1981	SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
1982	SDValue V1, SDValue V2) const {
1983	unsigned NumElems = VT.getVectorNumElements();
1984	SmallVector<int, `8`> Mask;
1985	Mask.push_back(Elt: NumElems);
1986	for (unsigned i = `1`; i != NumElems; ++i)
1987	Mask.push_back(Elt: i);
1988	return DAG.getVectorShuffle(VT, dl, N1: V1, N2: V2, Mask);
1989	}
1990
1991	SDValue
1992	X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1993	SmallVectorImpl<SDValue> &InVals) const {
1994	SelectionDAG &DAG = CLI.DAG;
1995	SDLoc &dl = CLI.DL;
1996	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1997	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1998	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1999	SDValue Chain = CLI.Chain;
2000	SDValue Callee = CLI.Callee;
2001	CallingConv::ID CallConv = CLI.CallConv;
2002	bool &isTailCall = CLI.IsTailCall;
2003	bool isVarArg = CLI.IsVarArg;
2004	const auto *CB = CLI.CB;
2005
2006	MachineFunction &MF = DAG.getMachineFunction();
2007	bool Is64Bit = Subtarget.is64Bit();
2008	bool IsWin64 = Subtarget.isCallingConvWin64(CC: CallConv);
2009	bool IsSibcall = false;
2010	bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt \|\|
2011	CallConv == CallingConv::Tail \|\| CallConv == CallingConv::SwiftTail;
2012	bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Args: Outs, Subtarget);
2013	X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
2014	bool HasNCSR = (CB && isa<CallInst>(Val: CB) &&
2015	CB->hasFnAttr(Kind: "no_caller_saved_registers"));
2016	bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
2017	bool IsIndirectCall = (CB && isa<CallInst>(Val: CB) && CB->isIndirectCall());
2018	bool IsCFICall = IsIndirectCall && CLI.CFIType;
2019	const Module *M = MF.getFunction().getParent();
2020	Metadata *IsCFProtectionSupported = M->getModuleFlag(Key: "cf-protection-branch");
2021
2022	MachineFunction::CallSiteInfo CSInfo;
2023	if (CallConv == CallingConv::X86_INTR)
2024	report_fatal_error(reason: "X86 interrupts may not be called directly");
2025
2026	// Analyze operands of the call, assigning locations to each operand.
2027	SmallVector<CCValAssign, `16`> ArgLocs;
2028	CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2029
2030	// Allocate shadow area for Win64.
2031	if (IsWin64)
2032	CCInfo.AllocateStack(Size: `32`, Alignment: Align (`8`));
2033
2034	CCInfo.AnalyzeArguments(Outs, Fn: CC_X86);
2035
2036	// In vectorcall calling convention a second pass is required for the HVA
2037	// types.
2038	if (CallingConv::X86_VectorCall == CallConv) {
2039	CCInfo.AnalyzeArgumentsSecondPass(Args: Outs, Fn: CC_X86);
2040	}
2041
2042	bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
2043	if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
2044	// If we are using a GOT, disable tail calls to external symbols with
2045	// default visibility. Tail calling such a symbol requires using a GOT
2046	// relocation, which forces early binding of the symbol. This breaks code
2047	// that require lazy function symbol resolution. Using musttail or
2048	// GuaranteedTailCallOpt will override this.
2049	GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee);
2050	if (!G \|\| (!G->getGlobal()->hasLocalLinkage() &&
2051	G->getGlobal()->hasDefaultVisibility()))
2052	isTailCall = false;
2053	}
2054
2055	if (isTailCall && !IsMustTail) {
2056	// Check if it's really possible to do a tail call.
2057	isTailCall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs,
2058	IsCalleePopSRet);
2059
2060	// Sibcalls are automatically detected tailcalls which do not require
2061	// ABI changes.
2062	if (!IsGuaranteeTCO && isTailCall)
2063	IsSibcall = true;
2064
2065	if (isTailCall)
2066	++NumTailCalls;
2067	}
2068
2069	if (IsMustTail && !isTailCall)
2070	report_fatal_error(reason: "failed to perform tail call elimination on a call "
2071	"site marked musttail");
2072
2073	assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2074	"Var args not supported with calling convention fastcc, ghc or hipe");
2075
2076	// Get a count of how many bytes are to be pushed on the stack.
2077	unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
2078	if (IsSibcall)
2079	// This is a sibcall. The memory operands are available in caller's
2080	// own caller's stack.
2081	NumBytes = `0`;
2082	else if (IsGuaranteeTCO && canGuaranteeTCO(CC: CallConv))
2083	NumBytes = GetAlignedArgumentStackSize(StackSize: NumBytes, DAG);
2084
2085	int FPDiff = `0`;
2086	if (isTailCall &&
2087	shouldGuaranteeTCO(CC: CallConv,
2088	GuaranteedTailCallOpt: MF.getTarget().Options.GuaranteedTailCallOpt)) {
2089	// Lower arguments at fp - stackoffset + fpdiff.
2090	unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
2091
2092	FPDiff = NumBytesCallerPushed - NumBytes;
2093
2094	// Set the delta of movement of the returnaddr stackslot.
2095	// But only set if delta is greater than previous delta.
2096	if (FPDiff < X86Info->getTCReturnAddrDelta())
2097	X86Info->setTCReturnAddrDelta(FPDiff);
2098	}
2099
2100	unsigned NumBytesToPush = NumBytes;
2101	unsigned NumBytesToPop = NumBytes;
2102
2103	// If we have an inalloca argument, all stack space has already been allocated
2104	// for us and be right at the top of the stack. We don't support multiple
2105	// arguments passed in memory when using inalloca.
2106	if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
2107	NumBytesToPush = `0`;
2108	if (!ArgLocs.back().isMemLoc())
2109	report_fatal_error(reason: "cannot use inalloca attribute on a register "
2110	"parameter");
2111	if (ArgLocs.back().getLocMemOffset() != `0`)
2112	report_fatal_error(reason: "any parameter with the inalloca attribute must be "
2113	"the only memory argument");
2114	} else if (CLI.IsPreallocated) {
2115	assert(ArgLocs.back().isMemLoc() &&
2116	"cannot use preallocated attribute on a register "
2117	"parameter");
2118	SmallVector<size_t, `4`> PreallocatedOffsets;
2119	for (size_t i = `0`; i < CLI.OutVals.size(); ++i) {
2120	if (CLI.CB->paramHasAttr(ArgNo: i, Kind: Attribute::Preallocated)) {
2121	PreallocatedOffsets.push_back(Elt: ArgLocs [i].getLocMemOffset());
2122	}
2123	}
2124	auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
2125	size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CS: CLI.CB);
2126	MFI->setPreallocatedStackSize(Id: PreallocatedId, StackSize: NumBytes);
2127	MFI->setPreallocatedArgOffsets(Id: PreallocatedId, AO: PreallocatedOffsets);
2128	NumBytesToPush = `0`;
2129	}
2130
2131	if (!IsSibcall && !IsMustTail)
2132	Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytesToPush,
2133	OutSize: NumBytes - NumBytesToPush, DL: dl);
2134
2135	SDValue RetAddrFrIdx;
2136	// Load return address for tail calls.
2137	if (isTailCall && FPDiff)
2138	Chain = EmitTailCallLoadRetAddr(DAG, OutRetAddr&: RetAddrFrIdx, Chain, IsTailCall: isTailCall,
2139	Is64Bit, FPDiff, dl);
2140
2141	SmallVector<std::pair<Register, SDValue>, `8`> RegsToPass;
2142	SmallVector<SDValue, `8`> MemOpChains;
2143	SDValue StackPtr;
2144
2145	// The next loop assumes that the locations are in the same order of the
2146	// input arguments.
2147	assert(isSortedByValueNo(ArgLocs) &&
2148	"Argument Location list must be sorted before lowering");
2149
2150	// Walk the register/memloc assignments, inserting copies/loads. In the case
2151	// of tail call optimization arguments are handle later.
2152	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2153	for (unsigned I = `0`, OutIndex = `0`, E = ArgLocs.size(); I != E;
2154	++I, ++OutIndex) {
2155	assert(OutIndex < Outs.size() && "Invalid Out index");
2156	// Skip inalloca/preallocated arguments, they have already been written.
2157	ISD::ArgFlagsTy Flags = Outs [OutIndex].Flags;
2158	if (Flags.isInAlloca() \|\| Flags.isPreallocated())
2159	continue;
2160
2161	CCValAssign &VA = ArgLocs [I];
2162	EVT RegVT = VA.getLocVT();
2163	SDValue Arg = OutVals [OutIndex];
2164	bool isByVal = Flags.isByVal();
2165
2166	// Promote the value if needed.
2167	switch (VA.getLocInfo()) {
2168	default: llvm_unreachable("Unknown loc info!");
2169	case CCValAssign::Full: break;
2170	case CCValAssign::SExt:
2171	Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: RegVT, Operand: Arg);
2172	break;
2173	case CCValAssign::ZExt:
2174	Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: RegVT, Operand: Arg);
2175	break;
2176	case CCValAssign::AExt:
2177	if (Arg.getValueType().isVector() &&
2178	Arg.getValueType().getVectorElementType() == MVT::i1)
2179	Arg = lowerMasksToReg(ValArg: Arg, ValLoc: RegVT, DL: dl, DAG);
2180	else if (RegVT.is128BitVector()) {
2181	// Special case: passing MMX values in XMM registers.
2182	Arg = DAG.getBitcast(VT: MVT::i64, V: Arg);
2183	Arg = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL: dl, VT: MVT::v2i64, Operand: Arg);
2184	Arg = getMOVL(DAG, dl, VT: MVT::v2i64, V1: DAG.getUNDEF(VT: MVT::v2i64), V2: Arg);
2185	} else
2186	Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: RegVT, Operand: Arg);
2187	break;
2188	case CCValAssign::BCvt:
2189	Arg = DAG.getBitcast(VT: RegVT, V: Arg);
2190	break;
2191	case CCValAssign::Indirect: {
2192	if (isByVal) {
2193	// Memcpy the argument to a temporary stack slot to prevent
2194	// the caller from seeing any modifications the callee may make
2195	// as guaranteed by the `byval` attribute.
2196	int FrameIdx = MF.getFrameInfo().CreateStackObject(
2197	Size: Flags.getByValSize(),
2198	Alignment: std::max(a: Align (`16`), b: Flags.getNonZeroByValAlign()), isSpillSlot: false);
2199	SDValue StackSlot =
2200	DAG.getFrameIndex(FI: FrameIdx, VT: getPointerTy(DL: DAG.getDataLayout()));
2201	Chain =
2202	CreateCopyOfByValArgument(Src: Arg, Dst: StackSlot, Chain, Flags, DAG, dl);
2203	// From now on treat this as a regular pointer
2204	Arg = StackSlot;
2205	isByVal = false;
2206	} else {
2207	// Store the argument.
2208	SDValue SpillSlot = DAG.CreateStackTemporary(VT: VA.getValVT());
2209	int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex();
2210	Chain = DAG.getStore(
2211	Chain, dl, Val: Arg, Ptr: SpillSlot,
2212	PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI));
2213	Arg = SpillSlot;
2214	}
2215	break;
2216	}
2217	}
2218
2219	if (VA.needsCustom()) {
2220	assert(VA.getValVT() == MVT::v64i1 &&
2221	"Currently the only custom case is when we split v64i1 to 2 regs");
2222	// Split v64i1 value into two registers
2223	Passv64i1ArgInRegs(DL: dl, DAG, Arg, RegsToPass, VA, NextVA&: ArgLocs [++I], Subtarget);
2224	} else if (VA.isRegLoc()) {
2225	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
2226	const TargetOptions &Options = DAG.getTarget().Options;
2227	if (Options.EmitCallSiteInfo)
2228	CSInfo.ArgRegPairs.emplace_back(Args: VA.getLocReg(), Args&: I);
2229	if (isVarArg && IsWin64) {
2230	// Win64 ABI requires argument XMM reg to be copied to the corresponding
2231	// shadow reg if callee is a varargs function.
2232	Register ShadowReg;
2233	switch (VA.getLocReg()) {
2234	case X86::XMM0: ShadowReg = X86::RCX; break;
2235	case X86::XMM1: ShadowReg = X86::RDX; break;
2236	case X86::XMM2: ShadowReg = X86::R8; break;
2237	case X86::XMM3: ShadowReg = X86::R9; break;
2238	}
2239	if (ShadowReg)
2240	RegsToPass.push_back(Elt: std::make_pair(x&: ShadowReg, y&: Arg));
2241	}
2242	} else if (!IsSibcall && (!isTailCall \|\| isByVal)) {
2243	assert(VA.isMemLoc());
2244	if (!StackPtr.getNode())
2245	StackPtr = DAG.getCopyFromReg(Chain, dl, Reg: RegInfo->getStackRegister(),
2246	VT: getPointerTy(DL: DAG.getDataLayout()));
2247	MemOpChains.push_back(Elt: LowerMemOpCallTo(Chain, StackPtr, Arg,
2248	dl, DAG, VA, Flags, isByVal));
2249	}
2250	}
2251
2252	if (!MemOpChains.empty())
2253	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains);
2254
2255	if (Subtarget.isPICStyleGOT()) {
2256	// ELF / PIC requires GOT in the EBX register before function calls via PLT
2257	// GOT pointer (except regcall).
2258	if (!isTailCall) {
2259	// Indirect call with RegCall calling convertion may use up all the
2260	// general registers, so it is not suitable to bind EBX reister for
2261	// GOT address, just let register allocator handle it.
2262	if (CallConv != CallingConv::X86_RegCall)
2263	RegsToPass.push_back(Elt: std::make_pair(
2264	x: Register (X86::EBX), y: DAG.getNode(Opcode: X86ISD::GlobalBaseReg, DL: SDLoc (),
2265	VT: getPointerTy(DL: DAG.getDataLayout()))));
2266	} else {
2267	// If we are tail calling and generating PIC/GOT style code load the
2268	// address of the callee into ECX. The value in ecx is used as target of
2269	// the tail jump. This is done to circumvent the ebx/callee-saved problem
2270	// for tail calls on PIC/GOT architectures. Normally we would just put the
2271	// address of GOT into ebx and then call target@PLT. But for tail calls
2272	// ebx would be restored (since ebx is callee saved) before jumping to the
2273	// target@PLT.
2274
2275	// Note: The actual moving to ECX is done further down.
2276	GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee);
2277	if (G && !G->getGlobal()->hasLocalLinkage() &&
2278	G->getGlobal()->hasDefaultVisibility())
2279	Callee = LowerGlobalAddress(Op: Callee, DAG);
2280	else if (isa<ExternalSymbolSDNode>(Val: Callee))
2281	Callee = LowerExternalSymbol(Op: Callee, DAG);
2282	}
2283	}
2284
2285	if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
2286	(Subtarget.hasSSE1() \|\| !M->getModuleFlag(Key: "SkipRaxSetup"))) {
2287	// From AMD64 ABI document:
2288	// For calls that may call functions that use varargs or stdargs
2289	// (prototype-less calls or calls to functions containing ellipsis (...) in
2290	// the declaration) %al is used as hidden argument to specify the number
2291	// of SSE registers used. The contents of %al do not need to match exactly
2292	// the number of registers, but must be an ubound on the number of SSE
2293	// registers used and is in the range 0 - 8 inclusive.
2294
2295	// Count the number of XMM registers allocated.
2296	static const MCPhysReg XMMArgRegs[] = {
2297	X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2298	X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2299	};
2300	unsigned NumXMMRegs = CCInfo.getFirstUnallocated(Regs: XMMArgRegs);
2301	assert((Subtarget.hasSSE1() \|\| !NumXMMRegs)
2302	&& "SSE registers cannot be used when SSE is disabled");
2303	RegsToPass.push_back(Elt: std::make_pair(x: Register (X86::AL),
2304	y: DAG.getConstant(Val: NumXMMRegs, DL: dl,
2305	VT: MVT::i8)));
2306	}
2307
2308	if (isVarArg && IsMustTail) {
2309	const auto &Forwards = X86Info->getForwardedMustTailRegParms();
2310	for (const auto &F : Forwards) {
2311	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: F.VReg, VT: F.VT);
2312	RegsToPass.push_back(Elt: std::make_pair(x: F.PReg, y&: Val));
2313	}
2314	}
2315
2316	// For tail calls lower the arguments to the 'real' stack slots. Sibcalls
2317	// don't need this because the eligibility check rejects calls that require
2318	// shuffling arguments passed in memory.
2319	if (!IsSibcall && isTailCall) {
2320	// Force all the incoming stack arguments to be loaded from the stack
2321	// before any new outgoing arguments are stored to the stack, because the
2322	// outgoing stack slots may alias the incoming argument stack slots, and
2323	// the alias isn't otherwise explicit. This is slightly more conservative
2324	// than necessary, because it means that each store effectively depends
2325	// on every argument instead of just those arguments it would clobber.
2326	SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
2327
2328	SmallVector<SDValue, `8`> MemOpChains2;
2329	SDValue FIN;
2330	int FI = `0`;
2331	for (unsigned I = `0`, OutsIndex = `0`, E = ArgLocs.size(); I != E;
2332	++I, ++OutsIndex) {
2333	CCValAssign &VA = ArgLocs [I];
2334
2335	if (VA.isRegLoc()) {
2336	if (VA.needsCustom()) {
2337	assert((CallConv == CallingConv::X86_RegCall) &&
2338	"Expecting custom case only in regcall calling convention");
2339	// This means that we are in special case where one argument was
2340	// passed through two register locations - Skip the next location
2341	++I;
2342	}
2343
2344	continue;
2345	}
2346
2347	assert(VA.isMemLoc());
2348	SDValue Arg = OutVals [OutsIndex];
2349	ISD::ArgFlagsTy Flags = Outs [OutsIndex].Flags;
2350	// Skip inalloca/preallocated arguments. They don't require any work.
2351	if (Flags.isInAlloca() \|\| Flags.isPreallocated())
2352	continue;
2353	// Create frame index.
2354	int32_t Offset = VA.getLocMemOffset()+FPDiff;
2355	uint32_t OpSize = (VA.getLocVT().getSizeInBits()+`7`)/`8`;
2356	FI = MF.getFrameInfo().CreateFixedObject(Size: OpSize, SPOffset: Offset, IsImmutable: true);
2357	FIN = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout()));
2358
2359	if (Flags.isByVal()) {
2360	// Copy relative to framepointer.
2361	SDValue Source = DAG.getIntPtrConstant(Val: VA.getLocMemOffset(), DL: dl);
2362	if (!StackPtr.getNode())
2363	StackPtr = DAG.getCopyFromReg(Chain, dl, Reg: RegInfo->getStackRegister(),
2364	VT: getPointerTy(DL: DAG.getDataLayout()));
2365	Source = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()),
2366	N1: StackPtr, N2: Source);
2367
2368	MemOpChains2.push_back(Elt: CreateCopyOfByValArgument(Src: Source, Dst: FIN,
2369	Chain: ArgChain,
2370	Flags, DAG, dl));
2371	} else {
2372	// Store relative to framepointer.
2373	MemOpChains2.push_back(Elt: DAG.getStore(
2374	Chain: ArgChain, dl, Val: Arg, Ptr: FIN,
2375	PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI)));
2376	}
2377	}
2378
2379	if (!MemOpChains2.empty())
2380	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: MemOpChains2);
2381
2382	// Store the return address to the appropriate stack slot.
2383	Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
2384	PtrVT: getPointerTy(DL: DAG.getDataLayout()),
2385	SlotSize: RegInfo->getSlotSize(), FPDiff, dl);
2386	}
2387
2388	// Build a sequence of copy-to-reg nodes chained together with token chain
2389	// and glue operands which copy the outgoing args into registers.
2390	SDValue InGlue;
2391	for (unsigned i = `0`, e = RegsToPass.size(); i != e; ++i) {
2392	Chain = DAG.getCopyToReg(Chain, dl, Reg: RegsToPass [i].first,
2393	N: RegsToPass [i].second, Glue: InGlue);
2394	InGlue = Chain.getValue(R: `1`);
2395	}
2396
2397	if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
2398	assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
2399	// In the 64-bit large code model, we have to make all calls
2400	// through a register, since the call instruction's 32-bit
2401	// pc-relative offset may not be large enough to hold the whole
2402	// address.
2403	} else if (Callee ->getOpcode() == ISD::GlobalAddress \|\|
2404	Callee ->getOpcode() == ISD::ExternalSymbol) {
2405	// Lower direct calls to global addresses and external symbols. Setting
2406	// ForCall to true here has the effect of removing WrapperRIP when possible
2407	// to allow direct calls to be selected without first materializing the
2408	// address into a register.
2409	Callee = LowerGlobalOrExternal(Op: Callee, DAG, /ForCall=/true);
2410	} else if (Subtarget.isTarget64BitILP32() &&
2411	Callee.getValueType() == MVT::i32) {
2412	// Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
2413	Callee = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MVT::i64, Operand: Callee);
2414	}
2415
2416	// Returns a chain & a glue for retval copy to use.
2417	SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
2418	SmallVector<SDValue, `8`> Ops;
2419
2420	if (!IsSibcall && isTailCall && !IsMustTail) {
2421	Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytesToPop, Size2: `0`, Glue: InGlue, DL: dl);
2422	InGlue = Chain.getValue(R: `1`);
2423	}
2424
2425	Ops.push_back(Elt: Chain);
2426	Ops.push_back(Elt: Callee);
2427
2428	if (isTailCall)
2429	Ops.push_back(Elt: DAG.getTargetConstant(Val: FPDiff, DL: dl, VT: MVT::i32));
2430
2431	// Add argument registers to the end of the list so that they are known live
2432	// into the call.
2433	for (unsigned i = `0`, e = RegsToPass.size(); i != e; ++i)
2434	Ops.push_back(Elt: DAG.getRegister(Reg: RegsToPass [i].first,
2435	VT: RegsToPass [i].second.getValueType()));
2436
2437	// Add a register mask operand representing the call-preserved registers.
2438	const uint32_t *Mask = [&]() {
2439	auto AdaptedCC = CallConv;
2440	// If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
2441	// use X86_INTR calling convention because it has the same CSR mask
2442	// (same preserved registers).
2443	if (HasNCSR)
2444	AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
2445	// If NoCalleeSavedRegisters is requested, than use GHC since it happens
2446	// to use the CSR_NoRegs_RegMask.
2447	if (CB && CB->hasFnAttr(Kind: "no_callee_saved_registers"))
2448	AdaptedCC = (CallingConv::ID)CallingConv::GHC;
2449	return RegInfo->getCallPreservedMask(MF, AdaptedCC);
2450	}();
2451	assert(Mask && "Missing call preserved mask for calling convention");
2452
2453	// If this is an invoke in a 32-bit function using a funclet-based
2454	// personality, assume the function clobbers all registers. If an exception
2455	// is thrown, the runtime will not restore CSRs.
2456	// FIXME: Model this more precisely so that we can register allocate across
2457	// the normal edge and spill and fill across the exceptional edge.
2458	if (!Is64Bit && CLI.CB && isa<InvokeInst>(Val: CLI.CB)) {
2459	const Function &CallerFn = MF.getFunction();
2460	EHPersonality Pers =
2461	CallerFn.hasPersonalityFn()
2462	? classifyEHPersonality(Pers: CallerFn.getPersonalityFn())
2463	: EHPersonality::Unknown;
2464	if (isFuncletEHPersonality(Pers))
2465	Mask = RegInfo->getNoPreservedMask();
2466	}
2467
2468	// Define a new register mask from the existing mask.
2469	uint32_t RegMask = nullptr*;
2470
2471	// In some calling conventions we need to remove the used physical registers
2472	// from the reg mask. Create a new RegMask for such calling conventions.
2473	// RegMask for calling conventions that disable only return registers (e.g.
2474	// preserve_most) will be modified later in LowerCallResult.
2475	bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CC: CallConv) \|\| HasNCSR;
2476	if (ShouldDisableArgRegs \|\| shouldDisableRetRegFromCSR(CC: CallConv)) {
2477	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2478
2479	// Allocate a new Reg Mask and copy Mask.
2480	RegMask = MF.allocateRegMask();
2481	unsigned RegMaskSize = MachineOperand::getRegMaskSize(NumRegs: TRI->getNumRegs());
2482	memcpy(dest: RegMask, src: Mask, n: sizeof(RegMask[`0`]) * RegMaskSize);
2483
2484	// Make sure all sub registers of the argument registers are reset
2485	// in the RegMask.
2486	if (ShouldDisableArgRegs) {
2487	for (auto const &RegPair : RegsToPass)
2488	for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg: RegPair.first))
2489	RegMask[SubReg / `32`] &= ~(`1u` << (SubReg % `32`));
2490	}
2491
2492	// Create the RegMask Operand according to our updated mask.
2493	Ops.push_back(Elt: DAG.getRegisterMask(RegMask));
2494	} else {
2495	// Create the RegMask Operand according to the static mask.
2496	Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
2497	}
2498
2499	if (InGlue.getNode())
2500	Ops.push_back(Elt: InGlue);
2501
2502	if (isTailCall) {
2503	// We used to do:
2504	//// If this is the first return lowered for this function, add the regs
2505	//// to the liveout set for the function.
2506	// This isn't right, although it's probably harmless on x86; liveouts
2507	// should be computed from returns not tail calls. Consider a void
2508	// function making a tail call to a function returning int.
2509	MF.getFrameInfo().setHasTailCall();
2510	SDValue Ret = DAG.getNode(Opcode: X86ISD::TC_RETURN, DL: dl, VTList: NodeTys, Ops);
2511
2512	if (IsCFICall)
2513	Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2514
2515	DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge);
2516	DAG.addCallSiteInfo(Node: Ret.getNode(), CallInfo: std::move(CSInfo));
2517	return Ret;
2518	}
2519
2520	if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
2521	Chain = DAG.getNode(Opcode: X86ISD::NT_CALL, DL: dl, VTList: NodeTys, Ops);
2522	} else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CB: CLI.CB)) {
2523	// Calls with a "clang.arc.attachedcall" bundle are special. They should be
2524	// expanded to the call, directly followed by a special marker sequence and
2525	// a call to a ObjC library function. Use the CALL_RVMARKER to do that.
2526	assert(!isTailCall &&
2527	"tail calls cannot be marked with clang.arc.attachedcall");
2528	assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
2529
2530	// Add a target global address for the retainRV/claimRV runtime function
2531	// just before the call target.
2532	Function ARCFn = objcarc::getAttachedARCFunction(CB: CLI.CB);
2533	auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
2534	auto GA = DAG.getTargetGlobalAddress(GV: ARCFn, DL: dl, VT: PtrVT);
2535	Ops.insert(I: Ops.begin() + `1`, Elt: GA);
2536	Chain = DAG.getNode(Opcode: X86ISD::CALL_RVMARKER, DL: dl, VTList: NodeTys, Ops);
2537	} else {
2538	Chain = DAG.getNode(Opcode: X86ISD::CALL, DL: dl, VTList: NodeTys, Ops);
2539	}
2540
2541	if (IsCFICall)
2542	Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2543
2544	InGlue = Chain.getValue(R: `1`);
2545	DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge);
2546	DAG.addCallSiteInfo(Node: Chain.getNode(), CallInfo: std::move(CSInfo));
2547
2548	// Save heapallocsite metadata.
2549	if (CLI.CB)
2550	if (MDNode *HeapAlloc = CLI.CB->getMetadata(Kind: "heapallocsite"))
2551	DAG.addHeapAllocSite(Node: Chain.getNode(), MD: HeapAlloc);
2552
2553	// Create the CALLSEQ_END node.
2554	unsigned NumBytesForCalleeToPop = `0`; // Callee pops nothing.
2555	if (X86::isCalleePop(CallingConv: CallConv, is64Bit: Is64Bit, IsVarArg: isVarArg,
2556	GuaranteeTCO: DAG.getTarget().Options.GuaranteedTailCallOpt))
2557	NumBytesForCalleeToPop = NumBytes; // Callee pops everything
2558	else if (!canGuaranteeTCO(CC: CallConv) && IsCalleePopSRet)
2559	// If this call passes a struct-return pointer, the callee
2560	// pops that struct pointer.
2561	NumBytesForCalleeToPop = `4`;
2562
2563	// Returns a glue for retval copy to use.
2564	if (!IsSibcall) {
2565	Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytesToPop, Size2: NumBytesForCalleeToPop,
2566	Glue: InGlue, DL: dl);
2567	InGlue = Chain.getValue(R: `1`);
2568	}
2569
2570	if (CallingConv::PreserveNone == CallConv)
2571	for (unsigned I = `0`, E = Outs.size(); I != E; ++I) {
2572	if (Outs [I].Flags.isSwiftSelf() \|\| Outs [I].Flags.isSwiftAsync() \|\|
2573	Outs [I].Flags.isSwiftError()) {
2574	errorUnsupported(DAG, dl,
2575	Msg: "Swift attributes can't be used with preserve_none");
2576	break;
2577	}
2578	}
2579
2580	// Handle result values, copying them out of physregs into vregs that we
2581	// return.
2582	return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2583	InVals, RegMask);
2584	}
2585
2586	//===----------------------------------------------------------------------===//
2587	// Fast Calling Convention (tail call) implementation
2588	//===----------------------------------------------------------------------===//
2589
2590	// Like std call, callee cleans arguments, convention except that ECX is
2591	// reserved for storing the tail called function address. Only 2 registers are
2592	// free for argument passing (inreg). Tail call optimization is performed
2593	// provided:
2594	// tailcallopt is enabled*
2595	// caller/callee are fastcc*
2596	// On X86_64 architecture with GOT-style position independent code only local
2597	// (within module) calls are supported at the moment.
2598	// To keep the stack aligned according to platform abi the function
2599	// GetAlignedArgumentStackSize ensures that argument delta is always multiples
2600	// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
2601	// If a tail called function callee has more arguments than the caller the
2602	// caller needs to make sure that there is room to move the RETADDR to. This is
2603	// achieved by reserving an area the size of the argument delta right after the
2604	// original RETADDR, but before the saved framepointer or the spilled registers
2605	// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
2606	// stack layout:
2607	// arg1
2608	// arg2
2609	// RETADDR
2610	// [ new RETADDR
2611	// move area ]
2612	// (possible EBP)
2613	// ESI
2614	// EDI
2615	// local1 ..
2616
2617	/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
2618	/// requirement.
2619	unsigned
2620	X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
2621	SelectionDAG &DAG) const {
2622	const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
2623	const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
2624	assert(StackSize % SlotSize == `0` &&
2625	"StackSize must be a multiple of SlotSize");
2626	return alignTo(Size: StackSize + SlotSize, A: StackAlignment) - SlotSize;
2627	}
2628
2629	/// Return true if the given stack call argument is already available in the
2630	/// same position (relatively) of the caller's incoming argument stack.
2631	static
2632	bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2633	MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2634	const X86InstrInfo TII, const* CCValAssign &VA) {
2635	unsigned Bytes = Arg.getValueSizeInBits() / `8`;
2636
2637	for (;;) {
2638	// Look through nodes that don't alter the bits of the incoming value.
2639	unsigned Op = Arg.getOpcode();
2640	if (Op == ISD::ZERO_EXTEND \|\| Op == ISD::ANY_EXTEND \|\| Op == ISD::BITCAST \|\|
2641	Op == ISD::AssertZext) {
2642	Arg = Arg.getOperand(i: `0`);
2643	continue;
2644	}
2645	if (Op == ISD::TRUNCATE) {
2646	const SDValue &TruncInput = Arg.getOperand(i: `0`);
2647	if (TruncInput.getOpcode() == ISD::AssertZext &&
2648	cast<VTSDNode>(Val: TruncInput.getOperand(i: `1`))->getVT() ==
2649	Arg.getValueType()) {
2650	Arg = TruncInput.getOperand(i: `0`);
2651	continue;
2652	}
2653	}
2654	break;
2655	}
2656
2657	int FI = INT_MAX;
2658	if (Arg.getOpcode() == ISD::CopyFromReg) {
2659	Register VR = cast<RegisterSDNode>(Val: Arg.getOperand(i: `1`))->getReg();
2660	if (!VR.isVirtual())
2661	return false;
2662	MachineInstr *Def = MRI->getVRegDef(Reg: VR);
2663	if (!Def)
2664	return false;
2665	if (!Flags.isByVal()) {
2666	if (!TII->isLoadFromStackSlot(MI: *Def, FrameIndex&: FI))
2667	return false;
2668	} else {
2669	unsigned Opcode = Def->getOpcode();
2670	if ((Opcode == X86::LEA32r \|\| Opcode == X86::LEA64r \|\|
2671	Opcode == X86::LEA64_32r) &&
2672	Def->getOperand(i: `1`).isFI()) {
2673	FI = Def->getOperand(i: `1`).getIndex();
2674	Bytes = Flags.getByValSize();
2675	} else
2676	return false;
2677	}
2678	} else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val&: Arg)) {
2679	if (Flags.isByVal())
2680	// ByVal argument is passed in as a pointer but it's now being
2681	// dereferenced. e.g.
2682	// define @foo(%struct.X %A) {*
2683	// tail call @bar(%struct.X byval %A)*
2684	// }
2685	return false;
2686	SDValue Ptr = Ld->getBasePtr();
2687	FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Val&: Ptr);
2688	if (!FINode)
2689	return false;
2690	FI = FINode->getIndex();
2691	} else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
2692	FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Val&: Arg);
2693	FI = FINode->getIndex();
2694	Bytes = Flags.getByValSize();
2695	} else
2696	return false;
2697
2698	assert(FI != INT_MAX);
2699	if (!MFI.isFixedObjectIndex(ObjectIdx: FI))
2700	return false;
2701
2702	if (Offset != MFI.getObjectOffset(ObjectIdx: FI))
2703	return false;
2704
2705	// If this is not byval, check that the argument stack object is immutable.
2706	// inalloca and argument copy elision can create mutable argument stack
2707	// objects. Byval objects can be mutated, but a byval call intends to pass the
2708	// mutated memory.
2709	if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(ObjectIdx: FI))
2710	return false;
2711
2712	if (VA.getLocVT().getFixedSizeInBits() >
2713	Arg.getValueSizeInBits().getFixedValue()) {
2714	// If the argument location is wider than the argument type, check that any
2715	// extension flags match.
2716	if (Flags.isZExt() != MFI.isObjectZExt(ObjectIdx: FI) \|\|
2717	Flags.isSExt() != MFI.isObjectSExt(ObjectIdx: FI)) {
2718	return false;
2719	}
2720	}
2721
2722	return Bytes == MFI.getObjectSize(ObjectIdx: FI);
2723	}
2724
2725	/// Check whether the call is eligible for tail call optimization. Targets
2726	/// that want to do tail call optimization should implement this function.
2727	/// Note that the x86 backend does not check musttail calls for eligibility! The
2728	/// rest of x86 tail call lowering must be prepared to forward arguments of any
2729	/// type.
2730	bool X86TargetLowering::IsEligibleForTailCallOptimization(
2731	TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo,
2732	SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const {
2733	SelectionDAG &DAG = CLI.DAG;
2734	const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2735	const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2736	const SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2737	SDValue Callee = CLI.Callee;
2738	CallingConv::ID CalleeCC = CLI.CallConv;
2739	bool isVarArg = CLI.IsVarArg;
2740
2741	if (!mayTailCallThisCC(CC: CalleeCC))
2742	return false;
2743
2744	// If -tailcallopt is specified, make fastcc functions tail-callable.
2745	MachineFunction &MF = DAG.getMachineFunction();
2746	const Function &CallerF = MF.getFunction();
2747
2748	// If the function return type is x86_fp80 and the callee return type is not,
2749	// then the FP_EXTEND of the call result is not a nop. It's not safe to
2750	// perform a tailcall optimization here.
2751	if (CallerF.getReturnType()->isX86_FP80Ty() && !CLI.RetTy->isX86_FP80Ty())
2752	return false;
2753
2754	CallingConv::ID CallerCC = CallerF.getCallingConv();
2755	bool CCMatch = CallerCC == CalleeCC;
2756	bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CC: CalleeCC);
2757	bool IsCallerWin64 = Subtarget.isCallingConvWin64(CC: CallerCC);
2758	bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt \|\|
2759	CalleeCC == CallingConv::Tail \|\| CalleeCC == CallingConv::SwiftTail;
2760
2761	// Win64 functions have extra shadow space for argument homing. Don't do the
2762	// sibcall if the caller and callee have mismatched expectations for this
2763	// space.
2764	if (IsCalleeWin64 != IsCallerWin64)
2765	return false;
2766
2767	if (IsGuaranteeTCO) {
2768	if (canGuaranteeTCO(CC: CalleeCC) && CCMatch)
2769	return true;
2770	return false;
2771	}
2772
2773	// Look for obvious safe cases to perform tail call optimization that do not
2774	// require ABI changes. This is what gcc calls sibcall.
2775
2776	// Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
2777	// emit a special epilogue.
2778	const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2779	if (RegInfo->hasStackRealignment(MF))
2780	return false;
2781
2782	// Also avoid sibcall optimization if we're an sret return fn and the callee
2783	// is incompatible. See comment in LowerReturn about why hasStructRetAttr is
2784	// insufficient.
2785	if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
2786	// For a compatible tail call the callee must return our sret pointer. So it
2787	// needs to be (a) an sret function itself and (b) we pass our sret as its
2788	// sret. Condition #b is harder to determine.
2789	return false;
2790	} else if (IsCalleePopSRet)
2791	// The callee pops an sret, so we cannot tail-call, as our caller doesn't
2792	// expect that.
2793	return false;
2794
2795	// Do not sibcall optimize vararg calls unless all arguments are passed via
2796	// registers.
2797	LLVMContext &C = *DAG.getContext();
2798	if (isVarArg && !Outs.empty()) {
2799	// Optimizing for varargs on Win64 is unlikely to be safe without
2800	// additional testing.
2801	if (IsCalleeWin64 \|\| IsCallerWin64)
2802	return false;
2803
2804	for (const auto &VA : ArgLocs)
2805	if (!VA.isRegLoc())
2806	return false;
2807	}
2808
2809	// If the call result is in ST0 / ST1, it needs to be popped off the x87
2810	// stack. Therefore, if it's not used by the call it is not safe to optimize
2811	// this into a sibcall.
2812	bool Unused = false;
2813	for (const auto &In : Ins) {
2814	if (!In.Used) {
2815	Unused = true;
2816	break;
2817	}
2818	}
2819	if (Unused) {
2820	SmallVector<CCValAssign, `16`> RVLocs;
2821	CCState RVCCInfo(CalleeCC, false, MF, RVLocs, C);
2822	RVCCInfo.AnalyzeCallResult(Ins, Fn: RetCC_X86);
2823	for (const auto &VA : RVLocs) {
2824	if (VA.getLocReg() == X86::FP0 \|\| VA.getLocReg() == X86::FP1)
2825	return false;
2826	}
2827	}
2828
2829	// Check that the call results are passed in the same way.
2830	if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2831	CalleeFn: RetCC_X86, CallerFn: RetCC_X86))
2832	return false;
2833	// The callee has to preserve all registers the caller needs to preserve.
2834	const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2835	const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2836	if (!CCMatch) {
2837	const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2838	if (!TRI->regmaskSubsetEqual(mask0: CallerPreserved, mask1: CalleePreserved))
2839	return false;
2840	}
2841
2842	unsigned StackArgsSize = CCInfo.getStackSize();
2843
2844	// If the callee takes no arguments then go on to check the results of the
2845	// call.
2846	if (!Outs.empty()) {
2847	if (StackArgsSize > `0`) {
2848	// Check if the arguments are already laid out in the right way as
2849	// the caller's fixed stack objects.
2850	MachineFrameInfo &MFI = MF.getFrameInfo();
2851	const MachineRegisterInfo *MRI = &MF.getRegInfo();
2852	const X86InstrInfo *TII = Subtarget.getInstrInfo();
2853	for (unsigned I = `0`, E = ArgLocs.size(); I != E; ++I) {
2854	const CCValAssign &VA = ArgLocs [I];
2855	SDValue Arg = OutVals [I];
2856	ISD::ArgFlagsTy Flags = Outs [I].Flags;
2857	if (VA.getLocInfo() == CCValAssign::Indirect)
2858	return false;
2859	if (!VA.isRegLoc()) {
2860	if (!MatchingStackOffset(Arg, Offset: VA.getLocMemOffset(), Flags, MFI, MRI,
2861	TII, VA))
2862	return false;
2863	}
2864	}
2865	}
2866
2867	bool PositionIndependent = isPositionIndependent();
2868	// If the tailcall address may be in a register, then make sure it's
2869	// possible to register allocate for it. In 32-bit, the call address can
2870	// only target EAX, EDX, or ECX since the tail call must be scheduled after
2871	// callee-saved registers are restored. These happen to be the same
2872	// registers used to pass 'inreg' arguments so watch out for those.
2873	if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Val: Callee) &&
2874	!isa<ExternalSymbolSDNode>(Val: Callee)) \|\|
2875	PositionIndependent)) {
2876	unsigned NumInRegs = `0`;
2877	// In PIC we need an extra register to formulate the address computation
2878	// for the callee.
2879	unsigned MaxInRegs = PositionIndependent ? `2` : `3`;
2880
2881	for (const auto &VA : ArgLocs) {
2882	if (!VA.isRegLoc())
2883	continue;
2884	Register Reg = VA.getLocReg();
2885	switch (Reg) {
2886	default: break;
2887	case X86::EAX: case X86::EDX: case X86::ECX:
2888	if (++NumInRegs == MaxInRegs)
2889	return false;
2890	break;
2891	}
2892	}
2893	}
2894
2895	const MachineRegisterInfo &MRI = MF.getRegInfo();
2896	if (!parametersInCSRMatch(MRI, CallerPreservedMask: CallerPreserved, ArgLocs, OutVals))
2897	return false;
2898	}
2899
2900	bool CalleeWillPop =
2901	X86::isCalleePop(CallingConv: CalleeCC, is64Bit: Subtarget.is64Bit(), IsVarArg: isVarArg,
2902	GuaranteeTCO: MF.getTarget().Options.GuaranteedTailCallOpt);
2903
2904	if (unsigned BytesToPop =
2905	MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
2906	// If we have bytes to pop, the callee must pop them.
2907	bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
2908	if (!CalleePopMatches)
2909	return false;
2910	} else if (CalleeWillPop && StackArgsSize > `0`) {
2911	// If we don't have bytes to pop, make sure the callee doesn't pop any.
2912	return false;
2913	}
2914
2915	return true;
2916	}
2917
2918	/// Determines whether the callee is required to pop its own arguments.
2919	/// Callee pop is necessary to support tail calls.
2920	bool X86::isCalleePop(CallingConv::ID CallingConv,
2921	bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
2922	// If GuaranteeTCO is true, we force some calls to be callee pop so that we
2923	// can guarantee TCO.
2924	if (!IsVarArg && shouldGuaranteeTCO(CC: CallingConv, GuaranteedTailCallOpt: GuaranteeTCO))
2925	return true;
2926
2927	switch (CallingConv) {
2928	default:
2929	return false;
2930	case CallingConv::X86_StdCall:
2931	case CallingConv::X86_FastCall:
2932	case CallingConv::X86_ThisCall:
2933	case CallingConv::X86_VectorCall:
2934	return !is64Bit;
2935	}
2936	}
2937

Browse the source code of llvm_projects/llvm/lib/Target/X86/X86ISelLoweringCall.cpp