X86.cpp source code [llvm_projects/clang/lib/CodeGen/Targets/X86.cpp]

1	//===- X86.cpp ------------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "ABIInfoImpl.h"
10	#include "TargetInfo.h"
11	#include "clang/Basic/DiagnosticFrontend.h"
12	#include "llvm/ADT/SmallBitVector.h"
13
14	using namespace clang;
15	using namespace clang::CodeGen;
16
17	namespace {
18
19	/// IsX86_MMXType - Return true if this is an MMX type.
20	bool IsX86_MMXType(llvm::Type *IRType) {
21	// Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>.
22	return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == `64` &&
23	cast<llvm::VectorType>(Val: IRType)->getElementType()->isIntegerTy() &&
24	IRType->getScalarSizeInBits() != `64`;
25	}
26
27	static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
28	StringRef Constraint,
29	llvm::Type* Ty) {
30	bool IsMMXCons = llvm::StringSwitch<bool>(Constraint)
31	.Cases(S0: "y", S1: "&y", S2: "^Ym", Value: true)
32	.Default(Value: false);
33	if (IsMMXCons && Ty->isVectorTy()) {
34	if (cast<llvm::VectorType>(Val: Ty)->getPrimitiveSizeInBits().getFixedValue() !=
35	`64`) {
36	// Invalid MMX constraint
37	return nullptr;
38	}
39
40	return llvm::Type::getX86_MMXTy(C&: CGF.getLLVMContext());
41	}
42
43	if (Constraint == "k") {
44	llvm::Type *Int1Ty = llvm::Type::getInt1Ty(C&: CGF.getLLVMContext());
45	return llvm::FixedVectorType::get(ElementType: Int1Ty, NumElts: Ty->getScalarSizeInBits());
46	}
47
48	// No operation needed
49	return Ty;
50	}
51
52	/// Returns true if this type can be passed in SSE registers with the
53	/// X86_VectorCall calling convention. Shared between x86_32 and x86_64.
54	static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) {
55	if (const BuiltinType *BT = Ty ->getAs<BuiltinType>()) {
56	if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) {
57	if (BT->getKind() == BuiltinType::LongDouble) {
58	if (&Context.getTargetInfo().getLongDoubleFormat() ==
59	&llvm::APFloat::x87DoubleExtended())
60	return false;
61	}
62	return true;
63	}
64	} else if (const VectorType *VT = Ty ->getAs<VectorType>()) {
65	// vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX
66	// registers specially.
67	unsigned VecSize = Context.getTypeSize(T: VT);
68	if (VecSize == `128` \|\| VecSize == `256` \|\| VecSize == `512`)
69	return true;
70	}
71	return false;
72	}
73
74	/// Returns true if this aggregate is small enough to be passed in SSE registers
75	/// in the X86_VectorCall calling convention. Shared between x86_32 and x86_64.
76	static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) {
77	return NumMembers <= `4`;
78	}
79
80	/// Returns a Homogeneous Vector Aggregate ABIArgInfo, used in X86.
81	static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) {
82	auto AI = ABIArgInfo::getDirect(T);
83	AI.setInReg(true);
84	AI.setCanBeFlattened(false);
85	return AI;
86	}
87
88	//===----------------------------------------------------------------------===//
89	// X86-32 ABI Implementation
90	//===----------------------------------------------------------------------===//
91
92	/// Similar to llvm::CCState, but for Clang.
93	struct CCState {
94	CCState(CGFunctionInfo &FI)
95	: IsPreassigned (FI.arg_size()), CC(FI.getCallingConvention()),
96	Required(FI.getRequiredArgs()), IsDelegateCall(FI.isDelegateCall()) {}
97
98	llvm::SmallBitVector IsPreassigned;
99	unsigned CC = CallingConv::CC_C;
100	unsigned FreeRegs = `0`;
101	unsigned FreeSSERegs = `0`;
102	RequiredArgs Required;
103	bool IsDelegateCall = false;
104	};
105
106	/// X86_32ABIInfo - The X86-32 ABI information.
107	class X86_32ABIInfo : public ABIInfo {
108	enum Class {
109	Integer,
110	Float
111	};
112
113	static const unsigned MinABIStackAlignInBytes = `4`;
114
115	bool IsDarwinVectorABI;
116	bool IsRetSmallStructInRegABI;
117	bool IsWin32StructABI;
118	bool IsSoftFloatABI;
119	bool IsMCUABI;
120	bool IsLinuxABI;
121	unsigned DefaultNumRegisterParameters;
122
123	static bool isRegisterSize(unsigned Size) {
124	return (Size == `8` \|\| Size == `16` \|\| Size == `32` \|\| Size == `64`);
125	}
126
127	bool isHomogeneousAggregateBaseType(QualType Ty) const override {
128	// FIXME: Assumes vectorcall is in use.
129	return isX86VectorTypeForVectorCall(Context&: getContext(), Ty);
130	}
131
132	bool isHomogeneousAggregateSmallEnough(const Type *Ty,
133	uint64_t NumMembers) const override {
134	// FIXME: Assumes vectorcall is in use.
135	return isX86VectorCallAggregateSmallEnough(NumMembers);
136	}
137
138	bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const;
139
140	/// getIndirectResult - Give a source type \arg Ty, return a suitable result
141	/// such that the argument will be passed in memory.
142	ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const;
143
144	ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const;
145
146	/// Return the alignment to use for the given type on the stack.
147	unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const;
148
149	Class classify(QualType Ty) const;
150	ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const;
151	ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State,
152	unsigned ArgIndex) const;
153
154	/// Updates the number of available free registers, returns
155	/// true if any registers were allocated.
156	bool updateFreeRegs(QualType Ty, CCState &State) const;
157
158	bool shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg,
159	bool &NeedsPadding) const;
160	bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const;
161
162	bool canExpandIndirectArgument(QualType Ty) const;
163
164	/// Rewrite the function info so that all memory arguments use
165	/// inalloca.
166	void rewriteWithInAlloca(CGFunctionInfo &FI) const;
167
168	void addFieldToArgStruct(SmallVector<llvm::Type *, `6`> &FrameFields,
169	CharUnits &StackOffset, ABIArgInfo &Info,
170	QualType Type) const;
171	void runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const;
172
173	public:
174
175	void computeInfo(CGFunctionInfo &FI) const override;
176	RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
177	AggValueSlot Slot) const override;
178
179	X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
180	bool RetSmallStructInRegABI, bool Win32StructABI,
181	unsigned NumRegisterParameters, bool SoftFloatABI)
182	: ABIInfo (CGT), IsDarwinVectorABI(DarwinVectorABI),
183	IsRetSmallStructInRegABI(RetSmallStructInRegABI),
184	IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI),
185	IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
186	IsLinuxABI(CGT.getTarget().getTriple().isOSLinux() \|\|
187	CGT.getTarget().getTriple().isOSCygMing()),
188	DefaultNumRegisterParameters(NumRegisterParameters) {}
189	};
190
191	class X86_32SwiftABIInfo : public SwiftABIInfo {
192	public:
193	explicit X86_32SwiftABIInfo(CodeGenTypes &CGT)
194	: SwiftABIInfo (CGT, /SwiftErrorInRegister=/false) {}
195
196	bool shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys,
197	bool AsReturnValue) const override {
198	// LLVM's x86-32 lowering currently only assigns up to three
199	// integer registers and three fp registers. Oddly, it'll use up to
200	// four vector registers for vectors, but those can overlap with the
201	// scalar registers.
202	return occupiesMoreThan(scalarTypes: ComponentTys, /total=/maxAllRegisters: `3`);
203	}
204	};
205
206	class X86_32TargetCodeGenInfo : public TargetCodeGenInfo {
207	public:
208	X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
209	bool RetSmallStructInRegABI, bool Win32StructABI,
210	unsigned NumRegisterParameters, bool SoftFloatABI)
211	: TargetCodeGenInfo (std::make_unique<X86_32ABIInfo>(
212	args&: CGT, args&: DarwinVectorABI, args&: RetSmallStructInRegABI, args&: Win32StructABI,
213	args&: NumRegisterParameters, args&: SoftFloatABI)) {
214	SwiftInfo = std::make_unique<X86_32SwiftABIInfo>(args&: CGT);
215	}
216
217	static bool isStructReturnInRegABI(
218	const llvm::Triple &Triple, const CodeGenOptions &Opts);
219
220	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
221	CodeGen::CodeGenModule &CGM) const override;
222
223	int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
224	// Darwin uses different dwarf register numbers for EH.
225	if (CGM.getTarget().getTriple().isOSDarwin()) return `5`;
226	return `4`;
227	}
228
229	bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
230	llvm::Value Address) const* override;
231
232	llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
233	StringRef Constraint,
234	llvm::Type* Ty) const override {
235	return X86AdjustInlineAsmType(CGF, Constraint, Ty);
236	}
237
238	void addReturnRegisterOutputs(CodeGenFunction &CGF, LValue ReturnValue,
239	std::string &Constraints,
240	std::vector<llvm::Type *> &ResultRegTypes,
241	std::vector<llvm::Type *> &ResultTruncRegTypes,
242	std::vector<LValue> &ResultRegDests,
243	std::string &AsmString,
244	unsigned NumOutputs) const override;
245
246	StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
247	return "movl\t%ebp, %ebp"
248	"\t\t// marker for objc_retainAutoreleaseReturnValue";
249	}
250	};
251
252	}
253
254	/// Rewrite input constraint references after adding some output constraints.
255	/// In the case where there is one output and one input and we add one output,
256	/// we need to replace all operand references greater than or equal to 1:
257	/// mov $0, $1
258	/// mov eax, $1
259	/// The result will be:
260	/// mov $0, $2
261	/// mov eax, $2
262	static void rewriteInputConstraintReferences(unsigned FirstIn,
263	unsigned NumNewOuts,
264	std::string &AsmString) {
265	std::string Buf;
266	llvm::raw_string_ostream OS(Buf);
267	size_t Pos = `0`;
268	while (Pos < AsmString.size()) {
269	size_t DollarStart = AsmString.find(c: `'$'`, pos: Pos);
270	if (DollarStart == std::string::npos)
271	DollarStart = AsmString.size();
272	size_t DollarEnd = AsmString.find_first_not_of(c: `'$'`, pos: DollarStart);
273	if (DollarEnd == std::string::npos)
274	DollarEnd = AsmString.size();
275	OS << StringRef (&AsmString [Pos], DollarEnd - Pos);
276	Pos = DollarEnd;
277	size_t NumDollars = DollarEnd - DollarStart;
278	if (NumDollars % `2` != `0` && Pos < AsmString.size()) {
279	// We have an operand reference.
280	size_t DigitStart = Pos;
281	if (AsmString [DigitStart] == `'{'`) {
282	OS << `'{'`;
283	++DigitStart;
284	}
285	size_t DigitEnd = AsmString.find_first_not_of(s: "0123456789", pos: DigitStart);
286	if (DigitEnd == std::string::npos)
287	DigitEnd = AsmString.size();
288	StringRef OperandStr(&AsmString [DigitStart], DigitEnd - DigitStart);
289	unsigned OperandIndex;
290	if (!OperandStr.getAsInteger(Radix: `10`, Result&: OperandIndex)) {
291	if (OperandIndex >= FirstIn)
292	OperandIndex += NumNewOuts;
293	OS << OperandIndex;
294	} else {
295	OS << OperandStr;
296	}
297	Pos = DigitEnd;
298	}
299	}
300	AsmString = std::move(OS.str());
301	}
302
303	/// Add output constraints for EAX:EDX because they are return registers.
304	void X86_32TargetCodeGenInfo::addReturnRegisterOutputs(
305	CodeGenFunction &CGF, LValue ReturnSlot, std::string &Constraints,
306	std::vector<llvm::Type *> &ResultRegTypes,
307	std::vector<llvm::Type *> &ResultTruncRegTypes,
308	std::vector<LValue> &ResultRegDests, std::string &AsmString,
309	unsigned NumOutputs) const {
310	uint64_t RetWidth = CGF.getContext().getTypeSize(T: ReturnSlot.getType());
311
312	// Use the EAX constraint if the width is 32 or smaller and EAX:EDX if it is
313	// larger.
314	if (!Constraints.empty())
315	Constraints += `','`;
316	if (RetWidth <= `32`) {
317	Constraints += "={eax}";
318	ResultRegTypes.push_back(x: CGF.Int32Ty);
319	} else {
320	// Use the 'A' constraint for EAX:EDX.
321	Constraints += "=A";
322	ResultRegTypes.push_back(x: CGF.Int64Ty);
323	}
324
325	// Truncate EAX or EAX:EDX to an integer of the appropriate size.
326	llvm::Type *CoerceTy = llvm::IntegerType::get(C&: CGF.getLLVMContext(), NumBits: RetWidth);
327	ResultTruncRegTypes.push_back(x: CoerceTy);
328
329	// Coerce the integer by bitcasting the return slot pointer.
330	ReturnSlot.setAddress(ReturnSlot.getAddress().withElementType(ElemTy: CoerceTy));
331	ResultRegDests.push_back(x: ReturnSlot);
332
333	rewriteInputConstraintReferences(FirstIn: NumOutputs, NumNewOuts: `1`, AsmString);
334	}
335
336	/// shouldReturnTypeInRegister - Determine if the given type should be
337	/// returned in a register (for the Darwin and MCU ABI).
338	bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty,
339	ASTContext &Context) const {
340	uint64_t Size = Context.getTypeSize(T: Ty);
341
342	// For i386, type must be register sized.
343	// For the MCU ABI, it only needs to be <= 8-byte
344	if ((IsMCUABI && Size > `64`) \|\| (!IsMCUABI && !isRegisterSize(Size)))
345	return false;
346
347	if (Ty ->isVectorType()) {
348	// 64- and 128- bit vectors inside structures are not returned in
349	// registers.
350	if (Size == `64` \|\| Size == `128`)
351	return false;
352
353	return true;
354	}
355
356	// If this is a builtin, pointer, enum, complex type, member pointer, or
357	// member function pointer it is ok.
358	if (Ty ->getAs<BuiltinType>() \|\| Ty ->hasPointerRepresentation() \|\|
359	Ty ->isAnyComplexType() \|\| Ty ->isEnumeralType() \|\|
360	Ty ->isBlockPointerType() \|\| Ty ->isMemberPointerType())
361	return true;
362
363	// Arrays are treated like records.
364	if (const ConstantArrayType *AT = Context.getAsConstantArrayType(T: Ty))
365	return shouldReturnTypeInRegister(Ty: AT->getElementType(), Context);
366
367	// Otherwise, it must be a record type.
368	const RecordType *RT = Ty ->getAs<RecordType>();
369	if (!RT) return false;
370
371	// FIXME: Traverse bases here too.
372
373	// Structure types are passed in register if all fields would be
374	// passed in a register.
375	for (const auto *FD : RT->getDecl()->fields()) {
376	// Empty fields are ignored.
377	if (isEmptyField(Context, FD, AllowArrays: true))
378	continue;
379
380	// Check fields recursively.
381	if (!shouldReturnTypeInRegister(Ty: FD->getType(), Context))
382	return false;
383	}
384	return true;
385	}
386
387	static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) {
388	// Treat complex types as the element type.
389	if (const ComplexType *CTy = Ty ->getAs<ComplexType>())
390	Ty = CTy->getElementType();
391
392	// Check for a type which we know has a simple scalar argument-passing
393	// convention without any padding. (We're specifically looking for 32
394	// and 64-bit integer and integer-equivalents, float, and double.)
395	if (!Ty ->getAs<BuiltinType>() && !Ty ->hasPointerRepresentation() &&
396	!Ty ->isEnumeralType() && !Ty ->isBlockPointerType())
397	return false;
398
399	uint64_t Size = Context.getTypeSize(T: Ty);
400	return Size == `32` \|\| Size == `64`;
401	}
402
403	static bool addFieldSizes(ASTContext &Context, const RecordDecl *RD,
404	uint64_t &Size) {
405	for (const auto *FD : RD->fields()) {
406	// Scalar arguments on the stack get 4 byte alignment on x86. If the
407	// argument is smaller than 32-bits, expanding the struct will create
408	// alignment padding.
409	if (!is32Or64BitBasicType(Ty: FD->getType(), Context))
410	return false;
411
412	// FIXME: Reject bit-fields wholesale; there are two problems, we don't know
413	// how to expand them yet, and the predicate for telling if a bitfield still
414	// counts as "basic" is more complicated than what we were doing previously.
415	if (FD->isBitField())
416	return false;
417
418	Size += Context.getTypeSize(T: FD->getType());
419	}
420	return true;
421	}
422
423	static bool addBaseAndFieldSizes(ASTContext &Context, const CXXRecordDecl *RD,
424	uint64_t &Size) {
425	// Don't do this if there are any non-empty bases.
426	for (const CXXBaseSpecifier &Base : RD->bases()) {
427	if (!addBaseAndFieldSizes(Context, RD: Base.getType()->getAsCXXRecordDecl(),
428	Size))
429	return false;
430	}
431	if (!addFieldSizes(Context, RD, Size))
432	return false;
433	return true;
434	}
435
436	/// Test whether an argument type which is to be passed indirectly (on the
437	/// stack) would have the equivalent layout if it was expanded into separate
438	/// arguments. If so, we prefer to do the latter to avoid inhibiting
439	/// optimizations.
440	bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const {
441	// We can only expand structure types.
442	const RecordType *RT = Ty ->getAs<RecordType>();
443	if (!RT)
444	return false;
445	const RecordDecl *RD = RT->getDecl();
446	uint64_t Size = `0`;
447	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(Val: RD)) {
448	if (!IsWin32StructABI) {
449	// On non-Windows, we have to conservatively match our old bitcode
450	// prototypes in order to be ABI-compatible at the bitcode level.
451	if (!CXXRD->isCLike())
452	return false;
453	} else {
454	// Don't do this for dynamic classes.
455	if (CXXRD->isDynamicClass())
456	return false;
457	}
458	if (!addBaseAndFieldSizes(Context&: getContext(), RD: CXXRD, Size))
459	return false;
460	} else {
461	if (!addFieldSizes(Context&: getContext(), RD, Size))
462	return false;
463	}
464
465	// We can do this if there was no alignment padding.
466	return Size == getContext().getTypeSize(T: Ty);
467	}
468
469	ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const {
470	// If the return value is indirect, then the hidden argument is consuming one
471	// integer register.
472	if (State.CC != llvm::CallingConv::X86_FastCall &&
473	State.CC != llvm::CallingConv::X86_VectorCall && State.FreeRegs) {
474	--State.FreeRegs;
475	if (!IsMCUABI)
476	return getNaturalAlignIndirectInReg(Ty: RetTy);
477	}
478	return getNaturalAlignIndirect(Ty: RetTy, /ByVal=/false);
479	}
480
481	ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
482	CCState &State) const {
483	if (RetTy ->isVoidType())
484	return ABIArgInfo::getIgnore();
485
486	const Type Base = nullptr*;
487	uint64_t NumElts = `0`;
488	if ((State.CC == llvm::CallingConv::X86_VectorCall \|\|
489	State.CC == llvm::CallingConv::X86_RegCall) &&
490	isHomogeneousAggregate(Ty: RetTy, Base, Members&: NumElts)) {
491	// The LLVM struct type for such an aggregate should lower properly.
492	return ABIArgInfo::getDirect();
493	}
494
495	if (const VectorType *VT = RetTy ->getAs<VectorType>()) {
496	// On Darwin, some vectors are returned in registers.
497	if (IsDarwinVectorABI) {
498	uint64_t Size = getContext().getTypeSize(T: RetTy);
499
500	// 128-bit vectors are a special case; they are returned in
501	// registers and we need to make sure to pick a type the LLVM
502	// backend will like.
503	if (Size == `128`)
504	return ABIArgInfo::getDirect(T: llvm::FixedVectorType::get(
505	ElementType: llvm::Type::getInt64Ty(C&: getVMContext()), NumElts: `2`));
506
507	// Always return in register if it fits in a general purpose
508	// register, or if it is 64 bits and has a single element.
509	if ((Size == `8` \|\| Size == `16` \|\| Size == `32`) \|\|
510	(Size == `64` && VT->getNumElements() == `1`))
511	return ABIArgInfo::getDirect(T: llvm::IntegerType::get(C&: getVMContext(),
512	NumBits: Size));
513
514	return getIndirectReturnResult(RetTy, State);
515	}
516
517	return ABIArgInfo::getDirect();
518	}
519
520	if (isAggregateTypeForABI(T: RetTy)) {
521	if (const RecordType *RT = RetTy ->getAs<RecordType>()) {
522	// Structures with flexible arrays are always indirect.
523	if (RT->getDecl()->hasFlexibleArrayMember())
524	return getIndirectReturnResult(RetTy, State);
525	}
526
527	// If specified, structs and unions are always indirect.
528	if (!IsRetSmallStructInRegABI && !RetTy ->isAnyComplexType())
529	return getIndirectReturnResult(RetTy, State);
530
531	// Ignore empty structs/unions.
532	if (isEmptyRecord(Context&: getContext(), T: RetTy, AllowArrays: true))
533	return ABIArgInfo::getIgnore();
534
535	// Return complex of _Float16 as <2 x half> so the backend will use xmm0.
536	if (const ComplexType *CT = RetTy ->getAs<ComplexType>()) {
537	QualType ET = getContext().getCanonicalType(T: CT->getElementType());
538	if (ET ->isFloat16Type())
539	return ABIArgInfo::getDirect(T: llvm::FixedVectorType::get(
540	ElementType: llvm::Type::getHalfTy(C&: getVMContext()), NumElts: `2`));
541	}
542
543	// Small structures which are register sized are generally returned
544	// in a register.
545	if (shouldReturnTypeInRegister(Ty: RetTy, Context&: getContext())) {
546	uint64_t Size = getContext().getTypeSize(T: RetTy);
547
548	// As a special-case, if the struct is a "single-element" struct, and
549	// the field is of type "float" or "double", return it in a
550	// floating-point register. (MSVC does not apply this special case.)
551	// We apply a similar transformation for pointer types to improve the
552	// quality of the generated IR.
553	if (const Type *SeltTy = isSingleElementStruct(T: RetTy, Context&: getContext()))
554	if ((!IsWin32StructABI && SeltTy->isRealFloatingType())
555	\|\| SeltTy->hasPointerRepresentation())
556	return ABIArgInfo::getDirect(T: CGT.ConvertType(T: QualType (SeltTy, `0`)));
557
558	// FIXME: We should be able to narrow this integer in cases with dead
559	// padding.
560	return ABIArgInfo::getDirect(T: llvm::IntegerType::get(C&: getVMContext(),NumBits: Size));
561	}
562
563	return getIndirectReturnResult(RetTy, State);
564	}
565
566	// Treat an enum type as its underlying type.
567	if (const EnumType *EnumTy = RetTy ->getAs<EnumType>())
568	RetTy = EnumTy->getDecl()->getIntegerType();
569
570	if (const auto *EIT = RetTy ->getAs<BitIntType>())
571	if (EIT->getNumBits() > `64`)
572	return getIndirectReturnResult(RetTy, State);
573
574	return (isPromotableIntegerTypeForABI(Ty: RetTy) ? ABIArgInfo::getExtend(Ty: RetTy)
575	: ABIArgInfo::getDirect());
576	}
577
578	unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty,
579	unsigned Align) const {
580	// Otherwise, if the alignment is less than or equal to the minimum ABI
581	// alignment, just use the default; the backend will handle this.
582	if (Align <= MinABIStackAlignInBytes)
583	return `0`; // Use default alignment.
584
585	if (IsLinuxABI) {
586	// Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't
587	// want to spend any effort dealing with the ramifications of ABI breaks.
588	//
589	// If the vector type is __m128/__m256/__m512, return the default alignment.
590	if (Ty ->isVectorType() && (Align == `16` \|\| Align == `32` \|\| Align == `64`))
591	return Align;
592	}
593	// On non-Darwin, the stack type alignment is always 4.
594	if (!IsDarwinVectorABI) {
595	// Set explicit alignment, since we may need to realign the top.
596	return MinABIStackAlignInBytes;
597	}
598
599	// Otherwise, if the type contains an SSE vector type, the alignment is 16.
600	if (Align >= `16` && (isSIMDVectorType(Context&: getContext(), Ty) \|\|
601	isRecordWithSIMDVectorType(Context&: getContext(), Ty)))
602	return `16`;
603
604	return MinABIStackAlignInBytes;
605	}
606
607	ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal,
608	CCState &State) const {
609	if (!ByVal) {
610	if (State.FreeRegs) {
611	--State.FreeRegs; // Non-byval indirects just use one pointer.
612	if (!IsMCUABI)
613	return getNaturalAlignIndirectInReg(Ty);
614	}
615	return getNaturalAlignIndirect(Ty, ByVal: false);
616	}
617
618	// Compute the byval alignment.
619	unsigned TypeAlign = getContext().getTypeAlign(T: Ty) / `8`;
620	unsigned StackAlign = getTypeStackAlignInBytes(Ty, Align: TypeAlign);
621	if (StackAlign == `0`)
622	return ABIArgInfo::getIndirect(Alignment: CharUnits::fromQuantity(Quantity: `4`), /ByVal=/true);
623
624	// If the stack alignment is less than the type alignment, realign the
625	// argument.
626	bool Realign = TypeAlign > StackAlign;
627	return ABIArgInfo::getIndirect(Alignment: CharUnits::fromQuantity(Quantity: StackAlign),
628	/ByVal=/true, Realign);
629	}
630
631	X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const {
632	const Type *T = isSingleElementStruct(T: Ty, Context&: getContext());
633	if (!T)
634	T = Ty.getTypePtr();
635
636	if (const BuiltinType *BT = T->getAs<BuiltinType>()) {
637	BuiltinType::Kind K = BT->getKind();
638	if (K == BuiltinType::Float \|\| K == BuiltinType::Double)
639	return Float;
640	}
641	return Integer;
642	}
643
644	bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const {
645	if (!IsSoftFloatABI) {
646	Class C = classify(Ty);
647	if (C == Float)
648	return false;
649	}
650
651	unsigned Size = getContext().getTypeSize(T: Ty);
652	unsigned SizeInRegs = (Size + `31`) / `32`;
653
654	if (SizeInRegs == `0`)
655	return false;
656
657	if (!IsMCUABI) {
658	if (SizeInRegs > State.FreeRegs) {
659	State.FreeRegs = `0`;
660	return false;
661	}
662	} else {
663	// The MCU psABI allows passing parameters in-reg even if there are
664	// earlier parameters that are passed on the stack. Also,
665	// it does not allow passing >8-byte structs in-register,
666	// even if there are 3 free registers available.
667	if (SizeInRegs > State.FreeRegs \|\| SizeInRegs > `2`)
668	return false;
669	}
670
671	State.FreeRegs -= SizeInRegs;
672	return true;
673	}
674
675	bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State,
676	bool &InReg,
677	bool &NeedsPadding) const {
678	// On Windows, aggregates other than HFAs are never passed in registers, and
679	// they do not consume register slots. Homogenous floating-point aggregates
680	// (HFAs) have already been dealt with at this point.
681	if (IsWin32StructABI && isAggregateTypeForABI(T: Ty))
682	return false;
683
684	NeedsPadding = false;
685	InReg = !IsMCUABI;
686
687	if (!updateFreeRegs(Ty, State))
688	return false;
689
690	if (IsMCUABI)
691	return true;
692
693	if (State.CC == llvm::CallingConv::X86_FastCall \|\|
694	State.CC == llvm::CallingConv::X86_VectorCall \|\|
695	State.CC == llvm::CallingConv::X86_RegCall) {
696	if (getContext().getTypeSize(T: Ty) <= `32` && State.FreeRegs)
697	NeedsPadding = true;
698
699	return false;
700	}
701
702	return true;
703	}
704
705	bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const {
706	bool IsPtrOrInt = (getContext().getTypeSize(T: Ty) <= `32`) &&
707	(Ty ->isIntegralOrEnumerationType() \|\| Ty ->isPointerType() \|\|
708	Ty ->isReferenceType());
709
710	if (!IsPtrOrInt && (State.CC == llvm::CallingConv::X86_FastCall \|\|
711	State.CC == llvm::CallingConv::X86_VectorCall))
712	return false;
713
714	if (!updateFreeRegs(Ty, State))
715	return false;
716
717	if (!IsPtrOrInt && State.CC == llvm::CallingConv::X86_RegCall)
718	return false;
719
720	// Return true to apply inreg to all legal parameters except for MCU targets.
721	return !IsMCUABI;
722	}
723
724	void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const {
725	// Vectorcall x86 works subtly different than in x64, so the format is
726	// a bit different than the x64 version. First, all vector types (not HVAs)
727	// are assigned, with the first 6 ending up in the [XYZ]MM0-5 registers.
728	// This differs from the x64 implementation, where the first 6 by INDEX get
729	// registers.
730	// In the second pass over the arguments, HVAs are passed in the remaining
731	// vector registers if possible, or indirectly by address. The address will be
732	// passed in ECX/EDX if available. Any other arguments are passed according to
733	// the usual fastcall rules.
734	MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments();
735	for (int I = `0`, E = Args.size(); I < E; ++I) {
736	const Type Base = nullptr*;
737	uint64_t NumElts = `0`;
738	const QualType &Ty = Args [I].type;
739	if ((Ty ->isVectorType() \|\| Ty ->isBuiltinType()) &&
740	isHomogeneousAggregate(Ty, Base, Members&: NumElts)) {
741	if (State.FreeSSERegs >= NumElts) {
742	State.FreeSSERegs -= NumElts;
743	Args [I].info = ABIArgInfo::getDirectInReg();
744	State.IsPreassigned.set(I);
745	}
746	}
747	}
748	}
749
750	ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State,
751	unsigned ArgIndex) const {
752	// FIXME: Set alignment on indirect arguments.
753	bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall;
754	bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall;
755	bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall;
756
757	Ty = useFirstFieldIfTransparentUnion(Ty);
758	TypeInfo TI = getContext().getTypeInfo(T: Ty);
759
760	// Check with the C++ ABI first.
761	const RecordType *RT = Ty ->getAs<RecordType>();
762	if (RT) {
763	CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, CXXABI&: getCXXABI());
764	if (RAA == CGCXXABI::RAA_Indirect) {
765	return getIndirectResult(Ty, ByVal: false, State);
766	} else if (State.IsDelegateCall) {
767	// Avoid having different alignments on delegate call args by always
768	// setting the alignment to 4, which is what we do for inallocas.
769	ABIArgInfo Res = getIndirectResult(Ty, ByVal: false, State);
770	Res.setIndirectAlign(CharUnits::fromQuantity(Quantity: `4`));
771	return Res;
772	} else if (RAA == CGCXXABI::RAA_DirectInMemory) {
773	// The field index doesn't matter, we'll fix it up later.
774	return ABIArgInfo::getInAlloca(/FieldIndex=/`0`);
775	}
776	}
777
778	// Regcall uses the concept of a homogenous vector aggregate, similar
779	// to other targets.
780	const Type Base = nullptr*;
781	uint64_t NumElts = `0`;
782	if ((IsRegCall \|\| IsVectorCall) &&
783	isHomogeneousAggregate(Ty, Base, Members&: NumElts)) {
784	if (State.FreeSSERegs >= NumElts) {
785	State.FreeSSERegs -= NumElts;
786
787	// Vectorcall passes HVAs directly and does not flatten them, but regcall
788	// does.
789	if (IsVectorCall)
790	return getDirectX86Hva();
791
792	if (Ty ->isBuiltinType() \|\| Ty ->isVectorType())
793	return ABIArgInfo::getDirect();
794	return ABIArgInfo::getExpand();
795	}
796	if (IsVectorCall && Ty ->isBuiltinType())
797	return ABIArgInfo::getDirect();
798	return getIndirectResult(Ty, /ByVal=/false, State);
799	}
800
801	if (isAggregateTypeForABI(T: Ty)) {
802	// Structures with flexible arrays are always indirect.
803	// FIXME: This should not be byval!
804	if (RT && RT->getDecl()->hasFlexibleArrayMember())
805	return getIndirectResult(Ty, ByVal: true, State);
806
807	// Ignore empty structs/unions on non-Windows.
808	if (!IsWin32StructABI && isEmptyRecord(Context&: getContext(), T: Ty, AllowArrays: true))
809	return ABIArgInfo::getIgnore();
810
811	llvm::LLVMContext &LLVMContext = getVMContext();
812	llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(C&: LLVMContext);
813	bool NeedsPadding = false;
814	bool InReg;
815	if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) {
816	unsigned SizeInRegs = (TI.Width + `31`) / `32`;
817	SmallVector<llvm::Type*, `3`> Elements(SizeInRegs, Int32);
818	llvm::Type *Result = llvm::StructType::get(Context&: LLVMContext, Elements);
819	if (InReg)
820	return ABIArgInfo::getDirectInReg(T: Result);
821	else
822	return ABIArgInfo::getDirect(T: Result);
823	}
824	llvm::IntegerType PaddingType = NeedsPadding ? Int32 : nullptr*;
825
826	// Pass over-aligned aggregates to non-variadic functions on Windows
827	// indirectly. This behavior was added in MSVC 2015. Use the required
828	// alignment from the record layout, since that may be less than the
829	// regular type alignment, and types with required alignment of less than 4
830	// bytes are not passed indirectly.
831	if (IsWin32StructABI && State.Required.isRequiredArg(argIdx: ArgIndex)) {
832	unsigned AlignInBits = `0`;
833	if (RT) {
834	const ASTRecordLayout &Layout =
835	getContext().getASTRecordLayout(D: RT->getDecl());
836	AlignInBits = getContext().toBits(CharSize: Layout.getRequiredAlignment());
837	} else if (TI.isAlignRequired()) {
838	AlignInBits = TI.Align;
839	}
840	if (AlignInBits > `32`)
841	return getIndirectResult(Ty, /ByVal=/false, State);
842	}
843
844	// Expand small (<= 128-bit) record types when we know that the stack layout
845	// of those arguments will match the struct. This is important because the
846	// LLVM backend isn't smart enough to remove byval, which inhibits many
847	// optimizations.
848	// Don't do this for the MCU if there are still free integer registers
849	// (see X86_64 ABI for full explanation).
850	if (TI.Width <= `4` * `32` && (!IsMCUABI \|\| State.FreeRegs == `0`) &&
851	canExpandIndirectArgument(Ty))
852	return ABIArgInfo::getExpandWithPadding(
853	PaddingInReg: IsFastCall \|\| IsVectorCall \|\| IsRegCall, Padding: PaddingType);
854
855	return getIndirectResult(Ty, ByVal: true, State);
856	}
857
858	if (const VectorType *VT = Ty ->getAs<VectorType>()) {
859	// On Windows, vectors are passed directly if registers are available, or
860	// indirectly if not. This avoids the need to align argument memory. Pass
861	// user-defined vector types larger than 512 bits indirectly for simplicity.
862	if (IsWin32StructABI) {
863	if (TI.Width <= `512` && State.FreeSSERegs > `0`) {
864	--State.FreeSSERegs;
865	return ABIArgInfo::getDirectInReg();
866	}
867	return getIndirectResult(Ty, /ByVal=/false, State);
868	}
869
870	// On Darwin, some vectors are passed in memory, we handle this by passing
871	// it as an i8/i16/i32/i64.
872	if (IsDarwinVectorABI) {
873	if ((TI.Width == `8` \|\| TI.Width == `16` \|\| TI.Width == `32`) \|\|
874	(TI.Width == `64` && VT->getNumElements() == `1`))
875	return ABIArgInfo::getDirect(
876	T: llvm::IntegerType::get(C&: getVMContext(), NumBits: TI.Width));
877	}
878
879	if (IsX86_MMXType(IRType: CGT.ConvertType(T: Ty)))
880	return ABIArgInfo::getDirect(T: llvm::IntegerType::get(C&: getVMContext(), NumBits: `64`));
881
882	return ABIArgInfo::getDirect();
883	}
884
885
886	if (const EnumType *EnumTy = Ty ->getAs<EnumType>())
887	Ty = EnumTy->getDecl()->getIntegerType();
888
889	bool InReg = shouldPrimitiveUseInReg(Ty, State);
890
891	if (isPromotableIntegerTypeForABI(Ty)) {
892	if (InReg)
893	return ABIArgInfo::getExtendInReg(Ty);
894	return ABIArgInfo::getExtend(Ty);
895	}
896
897	if (const auto *EIT = Ty ->getAs<BitIntType>()) {
898	if (EIT->getNumBits() <= `64`) {
899	if (InReg)
900	return ABIArgInfo::getDirectInReg();
901	return ABIArgInfo::getDirect();
902	}
903	return getIndirectResult(Ty, /ByVal=/false, State);
904	}
905
906	if (InReg)
907	return ABIArgInfo::getDirectInReg();
908	return ABIArgInfo::getDirect();
909	}
910
911	void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
912	CCState State(FI);
913	if (IsMCUABI)
914	State.FreeRegs = `3`;
915	else if (State.CC == llvm::CallingConv::X86_FastCall) {
916	State.FreeRegs = `2`;
917	State.FreeSSERegs = `3`;
918	} else if (State.CC == llvm::CallingConv::X86_VectorCall) {
919	State.FreeRegs = `2`;
920	State.FreeSSERegs = `6`;
921	} else if (FI.getHasRegParm())
922	State.FreeRegs = FI.getRegParm();
923	else if (State.CC == llvm::CallingConv::X86_RegCall) {
924	State.FreeRegs = `5`;
925	State.FreeSSERegs = `8`;
926	} else if (IsWin32StructABI) {
927	// Since MSVC 2015, the first three SSE vectors have been passed in
928	// registers. The rest are passed indirectly.
929	State.FreeRegs = DefaultNumRegisterParameters;
930	State.FreeSSERegs = `3`;
931	} else
932	State.FreeRegs = DefaultNumRegisterParameters;
933
934	if (!::classifyReturnType(CXXABI: getCXXABI(), FI, Info: *this)) {
935	FI.getReturnInfo() = classifyReturnType(RetTy: FI.getReturnType(), State);
936	} else if (FI.getReturnInfo().isIndirect()) {
937	// The C++ ABI is not aware of register usage, so we have to check if the
938	// return value was sret and put it in a register ourselves if appropriate.
939	if (State.FreeRegs) {
940	--State.FreeRegs; // The sret parameter consumes a register.
941	if (!IsMCUABI)
942	FI.getReturnInfo().setInReg(true);
943	}
944	}
945
946	// The chain argument effectively gives us another free register.
947	if (FI.isChainCall())
948	++State.FreeRegs;
949
950	// For vectorcall, do a first pass over the arguments, assigning FP and vector
951	// arguments to XMM registers as available.
952	if (State.CC == llvm::CallingConv::X86_VectorCall)
953	runVectorCallFirstPass(FI, State);
954
955	bool UsedInAlloca = false;
956	MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments();
957	for (unsigned I = `0`, E = Args.size(); I < E; ++I) {
958	// Skip arguments that have already been assigned.
959	if (State.IsPreassigned.test(Idx: I))
960	continue;
961
962	Args [I].info =
963	classifyArgumentType(Ty: Args [I].type, State, ArgIndex: I);
964	UsedInAlloca \|= (Args [I].info.getKind() == ABIArgInfo::InAlloca);
965	}
966
967	// If we needed to use inalloca for any argument, do a second pass and rewrite
968	// all the memory arguments to use inalloca.
969	if (UsedInAlloca)
970	rewriteWithInAlloca(FI);
971	}
972
973	void
974	X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, `6`> &FrameFields,
975	CharUnits &StackOffset, ABIArgInfo &Info,
976	QualType Type) const {
977	// Arguments are always 4-byte-aligned.
978	CharUnits WordSize = CharUnits::fromQuantity(Quantity: `4`);
979	assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct");
980
981	// sret pointers and indirect things will require an extra pointer
982	// indirection, unless they are byval. Most things are byval, and will not
983	// require this indirection.
984	bool IsIndirect = false;
985	if (Info.isIndirect() && !Info.getIndirectByVal())
986	IsIndirect = true;
987	Info = ABIArgInfo::getInAlloca(FieldIndex: FrameFields.size(), Indirect: IsIndirect);
988	llvm::Type *LLTy = CGT.ConvertTypeForMem(T: Type);
989	if (IsIndirect)
990	LLTy = llvm::PointerType::getUnqual(C&: getVMContext());
991	FrameFields.push_back(Elt: LLTy);
992	StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(T: Type);
993
994	// Insert padding bytes to respect alignment.
995	CharUnits FieldEnd = StackOffset;
996	StackOffset = FieldEnd.alignTo(Align: WordSize);
997	if (StackOffset != FieldEnd) {
998	CharUnits NumBytes = StackOffset - FieldEnd;
999	llvm::Type *Ty = llvm::Type::getInt8Ty(C&: getVMContext());
1000	Ty = llvm::ArrayType::get(ElementType: Ty, NumElements: NumBytes.getQuantity());
1001	FrameFields.push_back(Elt: Ty);
1002	}
1003	}
1004
1005	static bool isArgInAlloca(const ABIArgInfo &Info) {
1006	// Leave ignored and inreg arguments alone.
1007	switch (Info.getKind()) {
1008	case ABIArgInfo::InAlloca:
1009	return true;
1010	case ABIArgInfo::Ignore:
1011	case ABIArgInfo::IndirectAliased:
1012	return false;
1013	case ABIArgInfo::Indirect:
1014	case ABIArgInfo::Direct:
1015	case ABIArgInfo::Extend:
1016	return !Info.getInReg();
1017	case ABIArgInfo::Expand:
1018	case ABIArgInfo::CoerceAndExpand:
1019	// These are aggregate types which are never passed in registers when
1020	// inalloca is involved.
1021	return true;
1022	}
1023	llvm_unreachable("invalid enum");
1024	}
1025
1026	void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const {
1027	assert(IsWin32StructABI && "inalloca only supported on win32");
1028
1029	// Build a packed struct type for all of the arguments in memory.
1030	SmallVector<llvm::Type *, `6`> FrameFields;
1031
1032	// The stack alignment is always 4.
1033	CharUnits StackAlign = CharUnits::fromQuantity(Quantity: `4`);
1034
1035	CharUnits StackOffset;
1036	CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end();
1037
1038	// Put 'this' into the struct before 'sret', if necessary.
1039	bool IsThisCall =
1040	FI.getCallingConvention() == llvm::CallingConv::X86_ThisCall;
1041	ABIArgInfo &Ret = FI.getReturnInfo();
1042	if (Ret.isIndirect() && Ret.isSRetAfterThis() && !IsThisCall &&
1043	isArgInAlloca(Info: I->info)) {
1044	addFieldToArgStruct(FrameFields, StackOffset, Info&: I->info, Type: I->type);
1045	++I;
1046	}
1047
1048	// Put the sret parameter into the inalloca struct if it's in memory.
1049	if (Ret.isIndirect() && !Ret.getInReg()) {
1050	addFieldToArgStruct(FrameFields, StackOffset, Info&: Ret, Type: FI.getReturnType());
1051	// On Windows, the hidden sret parameter is always returned in eax.
1052	Ret.setInAllocaSRet(IsWin32StructABI);
1053	}
1054
1055	// Skip the 'this' parameter in ecx.
1056	if (IsThisCall)
1057	++I;
1058
1059	// Put arguments passed in memory into the struct.
1060	for (; I != E; ++I) {
1061	if (isArgInAlloca(Info: I->info))
1062	addFieldToArgStruct(FrameFields, StackOffset, Info&: I->info, Type: I->type);
1063	}
1064
1065	FI.setArgStruct(Ty: llvm::StructType::get(Context&: getVMContext(), Elements: FrameFields,
1066	/isPacked=/true),
1067	Align: StackAlign);
1068	}
1069
1070	RValue X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
1071	QualType Ty, AggValueSlot Slot) const {
1072
1073	auto TypeInfo = getContext().getTypeInfoInChars(T: Ty);
1074
1075	CCState State(*const_cast<CGFunctionInfo *>(CGF.CurFnInfo));
1076	ABIArgInfo AI = classifyArgumentType(Ty, State, /ArgIndex/ `0`);
1077	// Empty records are ignored for parameter passing purposes.
1078	if (AI.isIgnore())
1079	return Slot.asRValue();
1080
1081	// x86-32 changes the alignment of certain arguments on the stack.
1082	//
1083	// Just messing with TypeInfo like this works because we never pass
1084	// anything indirectly.
1085	TypeInfo.Align = CharUnits::fromQuantity(
1086	Quantity: getTypeStackAlignInBytes(Ty, Align: TypeInfo.Align.getQuantity()));
1087
1088	return emitVoidPtrVAArg(CGF, VAListAddr, ValueTy: Ty, /Indirect/ IsIndirect: false, ValueInfo: TypeInfo,
1089	SlotSizeAndAlign: CharUnits::fromQuantity(Quantity: `4`),
1090	/AllowHigherAlign/ true, Slot);
1091	}
1092
1093	bool X86_32TargetCodeGenInfo::isStructReturnInRegABI(
1094	const llvm::Triple &Triple, const CodeGenOptions &Opts) {
1095	assert(Triple.getArch() == llvm::Triple::x86);
1096
1097	switch (Opts.getStructReturnConvention()) {
1098	case CodeGenOptions::SRCK_Default:
1099	break;
1100	case CodeGenOptions::SRCK_OnStack: // -fpcc-struct-return
1101	return false;
1102	case CodeGenOptions::SRCK_InRegs: // -freg-struct-return
1103	return true;
1104	}
1105
1106	if (Triple.isOSDarwin() \|\| Triple.isOSIAMCU())
1107	return true;
1108
1109	switch (Triple.getOS()) {
1110	case llvm::Triple::DragonFly:
1111	case llvm::Triple::FreeBSD:
1112	case llvm::Triple::OpenBSD:
1113	case llvm::Triple::Win32:
1114	return true;
1115	default:
1116	return false;
1117	}
1118	}
1119
1120	static void addX86InterruptAttrs(const FunctionDecl FD, llvm::GlobalValue GV,
1121	CodeGen::CodeGenModule &CGM) {
1122	if (!FD->hasAttr<AnyX86InterruptAttr>())
1123	return;
1124
1125	llvm::Function *Fn = cast<llvm::Function>(Val: GV);
1126	Fn->setCallingConv(llvm::CallingConv::X86_INTR);
1127	if (FD->getNumParams() == `0`)
1128	return;
1129
1130	auto PtrTy = cast<PointerType>(Val: FD->getParamDecl(i: `0`)->getType());
1131	llvm::Type *ByValTy = CGM.getTypes().ConvertType(T: PtrTy->getPointeeType());
1132	llvm::Attribute NewAttr = llvm::Attribute::getWithByValType(
1133	Context&: Fn->getContext(), Ty: ByValTy);
1134	Fn->addParamAttr(ArgNo: `0`, Attr: NewAttr);
1135	}
1136
1137	void X86_32TargetCodeGenInfo::setTargetAttributes(
1138	const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &CGM) const {
1139	if (GV->isDeclaration())
1140	return;
1141	if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Val: D)) {
1142	if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
1143	llvm::Function *Fn = cast<llvm::Function>(Val: GV);
1144	Fn->addFnAttr(Kind: "stackrealign");
1145	}
1146
1147	addX86InterruptAttrs(FD, GV, CGM);
1148	}
1149	}
1150
1151	bool X86_32TargetCodeGenInfo::initDwarfEHRegSizeTable(
1152	CodeGen::CodeGenFunction &CGF,
1153	llvm::Value Address) const* {
1154	CodeGen::CGBuilderTy &Builder = CGF.Builder;
1155
1156	llvm::Value *Four8 = llvm::ConstantInt::get(Ty: CGF.Int8Ty, V: `4`);
1157
1158	// 0-7 are the eight integer registers; the order is different
1159	// on Darwin (for EH), but the range is the same.
1160	// 8 is %eip.
1161	AssignToArrayRange(Builder, Array: Address, Value: Four8, FirstIndex: `0`, LastIndex: `8`);
1162
1163	if (CGF.CGM.getTarget().getTriple().isOSDarwin()) {
1164	// 12-16 are st(0..4). Not sure why we stop at 4.
1165	// These have size 16, which is sizeof(long double) on
1166	// platforms with 8-byte alignment for that type.
1167	llvm::Value *Sixteen8 = llvm::ConstantInt::get(Ty: CGF.Int8Ty, V: `16`);
1168	AssignToArrayRange(Builder, Array: Address, Value: Sixteen8, FirstIndex: `12`, LastIndex: `16`);
1169
1170	} else {
1171	// 9 is %eflags, which doesn't get a size on Darwin for some
1172	// reason.
1173	Builder.CreateAlignedStore(
1174	Val: Four8, Addr: Builder.CreateConstInBoundsGEP1_32(Ty: CGF.Int8Ty, Ptr: Address, Idx0: `9`),
1175	Align: CharUnits::One());
1176
1177	// 11-16 are st(0..5). Not sure why we stop at 5.
1178	// These have size 12, which is sizeof(long double) on
1179	// platforms with 4-byte alignment for that type.
1180	llvm::Value *Twelve8 = llvm::ConstantInt::get(Ty: CGF.Int8Ty, V: `12`);
1181	AssignToArrayRange(Builder, Array: Address, Value: Twelve8, FirstIndex: `11`, LastIndex: `16`);
1182	}
1183
1184	return false;
1185	}
1186
1187	//===----------------------------------------------------------------------===//
1188	// X86-64 ABI Implementation
1189	//===----------------------------------------------------------------------===//
1190
1191
1192	namespace {
1193
1194	/// \p returns the size in bits of the largest (native) vector for \p AVXLevel.
1195	static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) {
1196	switch (AVXLevel) {
1197	case X86AVXABILevel::AVX512:
1198	return `512`;
1199	case X86AVXABILevel::AVX:
1200	return `256`;
1201	case X86AVXABILevel::None:
1202	return `128`;
1203	}
1204	llvm_unreachable("Unknown AVXLevel");
1205	}
1206
1207	/// X86_64ABIInfo - The X86_64 ABI information.
1208	class X86_64ABIInfo : public ABIInfo {
1209	enum Class {
1210	Integer = `0`,
1211	SSE,
1212	SSEUp,
1213	X87,
1214	X87Up,
1215	ComplexX87,
1216	NoClass,
1217	Memory
1218	};
1219
1220	/// merge - Implement the X86_64 ABI merging algorithm.
1221	///
1222	/// Merge an accumulating classification \arg Accum with a field
1223	/// classification \arg Field.
1224	///
1225	/// \param Accum - The accumulating classification. This should
1226	/// always be either NoClass or the result of a previous merge
1227	/// call. In addition, this should never be Memory (the caller
1228	/// should just return Memory for the aggregate).
1229	static Class merge(Class Accum, Class Field);
1230
1231	/// postMerge - Implement the X86_64 ABI post merging algorithm.
1232	///
1233	/// Post merger cleanup, reduces a malformed Hi and Lo pair to
1234	/// final MEMORY or SSE classes when necessary.
1235	///
1236	/// \param AggregateSize - The size of the current aggregate in
1237	/// the classification process.
1238	///
1239	/// \param Lo - The classification for the parts of the type
1240	/// residing in the low word of the containing object.
1241	///
1242	/// \param Hi - The classification for the parts of the type
1243	/// residing in the higher words of the containing object.
1244	///
1245	void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const;
1246
1247	/// classify - Determine the x86_64 register classes in which the
1248	/// given type T should be passed.
1249	///
1250	/// \param Lo - The classification for the parts of the type
1251	/// residing in the low word of the containing object.
1252	///
1253	/// \param Hi - The classification for the parts of the type
1254	/// residing in the high word of the containing object.
1255	///
1256	/// \param OffsetBase - The bit offset of this type in the
1257	/// containing object. Some parameters are classified different
1258	/// depending on whether they straddle an eightbyte boundary.
1259	///
1260	/// \param isNamedArg - Whether the argument in question is a "named"
1261	/// argument, as used in AMD64-ABI 3.5.7.
1262	///
1263	/// \param IsRegCall - Whether the calling conversion is regcall.
1264	///
1265	/// If a word is unused its result will be NoClass; if a type should
1266	/// be passed in Memory then at least the classification of \arg Lo
1267	/// will be Memory.
1268	///
1269	/// The \arg Lo class will be NoClass iff the argument is ignored.
1270	///
1271	/// If the \arg Lo class is ComplexX87, then the \arg Hi class will
1272	/// also be ComplexX87.
1273	void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi,
1274	bool isNamedArg, bool IsRegCall = false) const;
1275
1276	llvm::Type GetByteVectorType(QualType Ty) const*;
1277	llvm::Type GetSSETypeAtOffset(llvm::Type IRType,
1278	unsigned IROffset, QualType SourceTy,
1279	unsigned SourceOffset) const;
1280	llvm::Type GetINTEGERTypeAtOffset(llvm::Type IRType,
1281	unsigned IROffset, QualType SourceTy,
1282	unsigned SourceOffset) const;
1283
1284	/// getIndirectResult - Give a source type \arg Ty, return a suitable result
1285	/// such that the argument will be returned in memory.
1286	ABIArgInfo getIndirectReturnResult(QualType Ty) const;
1287
1288	/// getIndirectResult - Give a source type \arg Ty, return a suitable result
1289	/// such that the argument will be passed in memory.
1290	///
1291	/// \param freeIntRegs - The number of free integer registers remaining
1292	/// available.
1293	ABIArgInfo getIndirectResult(QualType Ty, unsigned freeIntRegs) const;
1294
1295	ABIArgInfo classifyReturnType(QualType RetTy) const;
1296
1297	ABIArgInfo classifyArgumentType(QualType Ty, unsigned freeIntRegs,
1298	unsigned &neededInt, unsigned &neededSSE,
1299	bool isNamedArg,
1300	bool IsRegCall = false) const;
1301
1302	ABIArgInfo classifyRegCallStructType(QualType Ty, unsigned &NeededInt,
1303	unsigned &NeededSSE,
1304	unsigned &MaxVectorWidth) const;
1305
1306	ABIArgInfo classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt,
1307	unsigned &NeededSSE,
1308	unsigned &MaxVectorWidth) const;
1309
1310	bool IsIllegalVectorType(QualType Ty) const;
1311
1312	/// The 0.98 ABI revision clarified a lot of ambiguities,
1313	/// unfortunately in ways that were not always consistent with
1314	/// certain previous compilers. In particular, platforms which
1315	/// required strict binary compatibility with older versions of GCC
1316	/// may need to exempt themselves.
1317	bool honorsRevision0_98() const {
1318	return !getTarget().getTriple().isOSDarwin();
1319	}
1320
1321	/// GCC classifies <1 x long long> as SSE but some platform ABIs choose to
1322	/// classify it as INTEGER (for compatibility with older clang compilers).
1323	bool classifyIntegerMMXAsSSE() const {
1324	// Clang <= 3.8 did not do this.
1325	if (getContext().getLangOpts().getClangABICompat() <=
1326	LangOptions::ClangABI::Ver3_8)
1327	return false;
1328
1329	const llvm::Triple &Triple = getTarget().getTriple();
1330	if (Triple.isOSDarwin() \|\| Triple.isPS() \|\| Triple.isOSFreeBSD())
1331	return false;
1332	return true;
1333	}
1334
1335	// GCC classifies vectors of __int128 as memory.
1336	bool passInt128VectorsInMem() const {
1337	// Clang <= 9.0 did not do this.
1338	if (getContext().getLangOpts().getClangABICompat() <=
1339	LangOptions::ClangABI::Ver9)
1340	return false;
1341
1342	const llvm::Triple &T = getTarget().getTriple();
1343	return T.isOSLinux() \|\| T.isOSNetBSD();
1344	}
1345
1346	X86AVXABILevel AVXLevel;
1347	// Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on
1348	// 64-bit hardware.
1349	bool Has64BitPointers;
1350
1351	public:
1352	X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
1353	: ABIInfo (CGT), AVXLevel(AVXLevel),
1354	Has64BitPointers(CGT.getDataLayout().getPointerSize(AS: `0`) == `8`) {}
1355
1356	bool isPassedUsingAVXType(QualType type) const {
1357	unsigned neededInt, neededSSE;
1358	// The freeIntRegs argument doesn't matter here.
1359	ABIArgInfo info = classifyArgumentType(Ty: type, freeIntRegs: `0`, neededInt, neededSSE,
1360	/isNamedArg/true);
1361	if (info.isDirect()) {
1362	llvm::Type *ty = info.getCoerceToType();
1363	if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(Val: ty))
1364	return vectorTy->getPrimitiveSizeInBits().getFixedValue() > `128`;
1365	}
1366	return false;
1367	}
1368
1369	void computeInfo(CGFunctionInfo &FI) const override;
1370
1371	RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
1372	AggValueSlot Slot) const override;
1373	RValue EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
1374	AggValueSlot Slot) const override;
1375
1376	bool has64BitPointers() const {
1377	return Has64BitPointers;
1378	}
1379	};
1380
1381	/// WinX86_64ABIInfo - The Windows X86_64 ABI information.
1382	class WinX86_64ABIInfo : public ABIInfo {
1383	public:
1384	WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
1385	: ABIInfo (CGT), AVXLevel(AVXLevel),
1386	IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {}
1387
1388	void computeInfo(CGFunctionInfo &FI) const override;
1389
1390	RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
1391	AggValueSlot Slot) const override;
1392
1393	bool isHomogeneousAggregateBaseType(QualType Ty) const override {
1394	// FIXME: Assumes vectorcall is in use.
1395	return isX86VectorTypeForVectorCall(Context&: getContext(), Ty);
1396	}
1397
1398	bool isHomogeneousAggregateSmallEnough(const Type *Ty,
1399	uint64_t NumMembers) const override {
1400	// FIXME: Assumes vectorcall is in use.
1401	return isX86VectorCallAggregateSmallEnough(NumMembers);
1402	}
1403
1404	private:
1405	ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType,
1406	bool IsVectorCall, bool IsRegCall) const;
1407	ABIArgInfo reclassifyHvaArgForVectorCall(QualType Ty, unsigned &FreeSSERegs,
1408	const ABIArgInfo &current) const;
1409
1410	X86AVXABILevel AVXLevel;
1411
1412	bool IsMingw64;
1413	};
1414
1415	class X86_64TargetCodeGenInfo : public TargetCodeGenInfo {
1416	public:
1417	X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
1418	: TargetCodeGenInfo (std::make_unique<X86_64ABIInfo>(args&: CGT, args&: AVXLevel)) {
1419	SwiftInfo =
1420	std::make_unique<SwiftABIInfo>(args&: CGT, /SwiftErrorInRegister=/args: true);
1421	}
1422
1423	/// Disable tail call on x86-64. The epilogue code before the tail jump blocks
1424	/// autoreleaseRV/retainRV and autoreleaseRV/unsafeClaimRV optimizations.
1425	bool markARCOptimizedReturnCallsAsNoTail() const override { return true; }
1426
1427	int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
1428	return `7`;
1429	}
1430
1431	bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
1432	llvm::Value Address) const* override {
1433	llvm::Value *Eight8 = llvm::ConstantInt::get(Ty: CGF.Int8Ty, V: `8`);
1434
1435	// 0-15 are the 16 integer registers.
1436	// 16 is %rip.
1437	AssignToArrayRange(Builder&: CGF.Builder, Array: Address, Value: Eight8, FirstIndex: `0`, LastIndex: `16`);
1438	return false;
1439	}
1440
1441	llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
1442	StringRef Constraint,
1443	llvm::Type* Ty) const override {
1444	return X86AdjustInlineAsmType(CGF, Constraint, Ty);
1445	}
1446
1447	bool isNoProtoCallVariadic(const CallArgList &args,
1448	const FunctionNoProtoType fnType) const* override {
1449	// The default CC on x86-64 sets %al to the number of SSA
1450	// registers used, and GCC sets this when calling an unprototyped
1451	// function, so we override the default behavior. However, don't do
1452	// that when AVX types are involved: the ABI explicitly states it is
1453	// undefined, and it doesn't work in practice because of how the ABI
1454	// defines varargs anyway.
1455	if (fnType->getCallConv() == CC_C) {
1456	bool HasAVXType = false;
1457	for (CallArgList::const_iterator
1458	it = args.begin(), ie = args.end(); it != ie; ++it) {
1459	if (getABIInfo<X86_64ABIInfo>().isPassedUsingAVXType(type: it->Ty)) {
1460	HasAVXType = true;
1461	break;
1462	}
1463	}
1464
1465	if (!HasAVXType)
1466	return true;
1467	}
1468
1469	return TargetCodeGenInfo::isNoProtoCallVariadic(args, fnType);
1470	}
1471
1472	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
1473	CodeGen::CodeGenModule &CGM) const override {
1474	if (GV->isDeclaration())
1475	return;
1476	if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Val: D)) {
1477	if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
1478	llvm::Function *Fn = cast<llvm::Function>(Val: GV);
1479	Fn->addFnAttr(Kind: "stackrealign");
1480	}
1481
1482	addX86InterruptAttrs(FD, GV, CGM);
1483	}
1484	}
1485
1486	void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
1487	const FunctionDecl *Caller,
1488	const FunctionDecl Callee, const* CallArgList &Args,
1489	QualType ReturnType) const override;
1490	};
1491	} // namespace
1492
1493	static void initFeatureMaps(const ASTContext &Ctx,
1494	llvm::StringMap<bool> &CallerMap,
1495	const FunctionDecl *Caller,
1496	llvm::StringMap<bool> &CalleeMap,
1497	const FunctionDecl *Callee) {
1498	if (CalleeMap.empty() && CallerMap.empty()) {
1499	// The caller is potentially nullptr in the case where the call isn't in a
1500	// function. In this case, the getFunctionFeatureMap ensures we just get
1501	// the TU level setting (since it cannot be modified by 'target'..
1502	Ctx.getFunctionFeatureMap(FeatureMap&: CallerMap, Caller);
1503	Ctx.getFunctionFeatureMap(FeatureMap&: CalleeMap, Callee);
1504	}
1505	}
1506
1507	static bool checkAVXParamFeature(DiagnosticsEngine &Diag,
1508	SourceLocation CallLoc,
1509	const llvm::StringMap<bool> &CallerMap,
1510	const llvm::StringMap<bool> &CalleeMap,
1511	QualType Ty, StringRef Feature,
1512	bool IsArgument) {
1513	bool CallerHasFeat = CallerMap.lookup(Key: Feature);
1514	bool CalleeHasFeat = CalleeMap.lookup(Key: Feature);
1515	if (!CallerHasFeat && !CalleeHasFeat)
1516	return Diag.Report(Loc: CallLoc, DiagID: diag::warn_avx_calling_convention)
1517	<< IsArgument << Ty << Feature;
1518
1519	// Mixing calling conventions here is very clearly an error.
1520	if (!CallerHasFeat \|\| !CalleeHasFeat)
1521	return Diag.Report(Loc: CallLoc, DiagID: diag::err_avx_calling_convention)
1522	<< IsArgument << Ty << Feature;
1523
1524	// Else, both caller and callee have the required feature, so there is no need
1525	// to diagnose.
1526	return false;
1527	}
1528
1529	static bool checkAVX512ParamFeature(DiagnosticsEngine &Diag,
1530	SourceLocation CallLoc,
1531	const llvm::StringMap<bool> &CallerMap,
1532	const llvm::StringMap<bool> &CalleeMap,
1533	QualType Ty, bool IsArgument) {
1534	bool Caller256 = CallerMap.lookup(Key: "avx512f") && !CallerMap.lookup(Key: "evex512");
1535	bool Callee256 = CalleeMap.lookup(Key: "avx512f") && !CalleeMap.lookup(Key: "evex512");
1536
1537	// Forbid 512-bit or larger vector pass or return when we disabled ZMM
1538	// instructions.
1539	if (Caller256 \|\| Callee256)
1540	return Diag.Report(Loc: CallLoc, DiagID: diag::err_avx_calling_convention)
1541	<< IsArgument << Ty << "evex512";
1542
1543	return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty,
1544	Feature: "avx512f", IsArgument);
1545	}
1546
1547	static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx,
1548	SourceLocation CallLoc,
1549	const llvm::StringMap<bool> &CallerMap,
1550	const llvm::StringMap<bool> &CalleeMap, QualType Ty,
1551	bool IsArgument) {
1552	uint64_t Size = Ctx.getTypeSize(T: Ty);
1553	if (Size > `256`)
1554	return checkAVX512ParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty,
1555	IsArgument);
1556
1557	if (Size > `128`)
1558	return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, Feature: "avx",
1559	IsArgument);
1560
1561	return false;
1562	}
1563
1564	void X86_64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM,
1565	SourceLocation CallLoc,
1566	const FunctionDecl *Caller,
1567	const FunctionDecl *Callee,
1568	const CallArgList &Args,
1569	QualType ReturnType) const {
1570	if (!Callee)
1571	return;
1572
1573	llvm::StringMap<bool> CallerMap;
1574	llvm::StringMap<bool> CalleeMap;
1575	unsigned ArgIndex = `0`;
1576
1577	// We need to loop through the actual call arguments rather than the
1578	// function's parameters, in case this variadic.
1579	for (const CallArg &Arg : Args) {
1580	// The "avx" feature changes how vectors >128 in size are passed. "avx512f"
1581	// additionally changes how vectors >256 in size are passed. Like GCC, we
1582	// warn when a function is called with an argument where this will change.
1583	// Unlike GCC, we also error when it is an obvious ABI mismatch, that is,
1584	// the caller and callee features are mismatched.
1585	// Unfortunately, we cannot do this diagnostic in SEMA, since the callee can
1586	// change its ABI with attribute-target after this call.
1587	if (Arg.getType()->isVectorType() &&
1588	CGM.getContext().getTypeSize(T: Arg.getType()) > `128`) {
1589	initFeatureMaps(Ctx: CGM.getContext(), CallerMap, Caller, CalleeMap, Callee);
1590	QualType Ty = Arg.getType();
1591	// The CallArg seems to have desugared the type already, so for clearer
1592	// diagnostics, replace it with the type in the FunctionDecl if possible.
1593	if (ArgIndex < Callee->getNumParams())
1594	Ty = Callee->getParamDecl(i: ArgIndex)->getType();
1595
1596	if (checkAVXParam(Diag&: CGM.getDiags(), Ctx&: CGM.getContext(), CallLoc, CallerMap,
1597	CalleeMap, Ty, /IsArgument/ true))
1598	return;
1599	}
1600	++ArgIndex;
1601	}
1602
1603	// Check return always, as we don't have a good way of knowing in codegen
1604	// whether this value is used, tail-called, etc.
1605	if (Callee->getReturnType()->isVectorType() &&
1606	CGM.getContext().getTypeSize(T: Callee->getReturnType()) > `128`) {
1607	initFeatureMaps(Ctx: CGM.getContext(), CallerMap, Caller, CalleeMap, Callee);
1608	checkAVXParam(Diag&: CGM.getDiags(), Ctx&: CGM.getContext(), CallLoc, CallerMap,
1609	CalleeMap, Ty: Callee->getReturnType(),
1610	/IsArgument/ false);
1611	}
1612	}
1613
1614	std::string TargetCodeGenInfo::qualifyWindowsLibrary(StringRef Lib) {
1615	// If the argument does not end in .lib, automatically add the suffix.
1616	// If the argument contains a space, enclose it in quotes.
1617	// This matches the behavior of MSVC.
1618	bool Quote = Lib.contains(C: `' '`);
1619	std::string ArgStr = Quote ? "\"" : "";
1620	ArgStr += Lib;
1621	if (!Lib.ends_with_insensitive(Suffix: ".lib") && !Lib.ends_with_insensitive(Suffix: ".a"))
1622	ArgStr += ".lib";
1623	ArgStr += Quote ? "\"" : "";
1624	return ArgStr;
1625	}
1626
1627	namespace {
1628	class WinX86_32TargetCodeGenInfo : public X86_32TargetCodeGenInfo {
1629	public:
1630	WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
1631	bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI,
1632	unsigned NumRegisterParameters)
1633	: X86_32TargetCodeGenInfo (CGT, DarwinVectorABI, RetSmallStructInRegABI,
1634	Win32StructABI, NumRegisterParameters, false) {}
1635
1636	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
1637	CodeGen::CodeGenModule &CGM) const override;
1638
1639	void getDependentLibraryOption(llvm::StringRef Lib,
1640	llvm::SmallString<`24`> &Opt) const override {
1641	Opt = "/DEFAULTLIB:";
1642	Opt += qualifyWindowsLibrary(Lib);
1643	}
1644
1645	void getDetectMismatchOption(llvm::StringRef Name,
1646	llvm::StringRef Value,
1647	llvm::SmallString<`32`> &Opt) const override {
1648	Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
1649	}
1650	};
1651	} // namespace
1652
1653	void WinX86_32TargetCodeGenInfo::setTargetAttributes(
1654	const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &CGM) const {
1655	X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
1656	if (GV->isDeclaration())
1657	return;
1658	addStackProbeTargetAttributes(D, GV, CGM);
1659	}
1660
1661	namespace {
1662	class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo {
1663	public:
1664	WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
1665	X86AVXABILevel AVXLevel)
1666	: TargetCodeGenInfo (std::make_unique<WinX86_64ABIInfo>(args&: CGT, args&: AVXLevel)) {
1667	SwiftInfo =
1668	std::make_unique<SwiftABIInfo>(args&: CGT, /SwiftErrorInRegister=/args: true);
1669	}
1670
1671	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
1672	CodeGen::CodeGenModule &CGM) const override;
1673
1674	int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
1675	return `7`;
1676	}
1677
1678	bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
1679	llvm::Value Address) const* override {
1680	llvm::Value *Eight8 = llvm::ConstantInt::get(Ty: CGF.Int8Ty, V: `8`);
1681
1682	// 0-15 are the 16 integer registers.
1683	// 16 is %rip.
1684	AssignToArrayRange(Builder&: CGF.Builder, Array: Address, Value: Eight8, FirstIndex: `0`, LastIndex: `16`);
1685	return false;
1686	}
1687
1688	void getDependentLibraryOption(llvm::StringRef Lib,
1689	llvm::SmallString<`24`> &Opt) const override {
1690	Opt = "/DEFAULTLIB:";
1691	Opt += qualifyWindowsLibrary(Lib);
1692	}
1693
1694	void getDetectMismatchOption(llvm::StringRef Name,
1695	llvm::StringRef Value,
1696	llvm::SmallString<`32`> &Opt) const override {
1697	Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
1698	}
1699	};
1700	} // namespace
1701
1702	void WinX86_64TargetCodeGenInfo::setTargetAttributes(
1703	const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &CGM) const {
1704	TargetCodeGenInfo::setTargetAttributes(D, GV, M&: CGM);
1705	if (GV->isDeclaration())
1706	return;
1707	if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Val: D)) {
1708	if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
1709	llvm::Function *Fn = cast<llvm::Function>(Val: GV);
1710	Fn->addFnAttr(Kind: "stackrealign");
1711	}
1712
1713	addX86InterruptAttrs(FD, GV, CGM);
1714	}
1715
1716	addStackProbeTargetAttributes(D, GV, CGM);
1717	}
1718
1719	void X86_64ABIInfo::postMerge(unsigned AggregateSize, Class &Lo,
1720	Class &Hi) const {
1721	// AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done:
1722	//
1723	// (a) If one of the classes is Memory, the whole argument is passed in
1724	// memory.
1725	//
1726	// (b) If X87UP is not preceded by X87, the whole argument is passed in
1727	// memory.
1728	//
1729	// (c) If the size of the aggregate exceeds two eightbytes and the first
1730	// eightbyte isn't SSE or any other eightbyte isn't SSEUP, the whole
1731	// argument is passed in memory. NOTE: This is necessary to keep the
1732	// ABI working for processors that don't support the __m256 type.
1733	//
1734	// (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE.
1735	//
1736	// Some of these are enforced by the merging logic. Others can arise
1737	// only with unions; for example:
1738	// union { _Complex double; unsigned; }
1739	//
1740	// Note that clauses (b) and (c) were added in 0.98.
1741	//
1742	if (Hi == Memory)
1743	Lo = Memory;
1744	if (Hi == X87Up && Lo != X87 && honorsRevision0_98())
1745	Lo = Memory;
1746	if (AggregateSize > `128` && (Lo != SSE \|\| Hi != SSEUp))
1747	Lo = Memory;
1748	if (Hi == SSEUp && Lo != SSE)
1749	Hi = SSE;
1750	}
1751
1752	X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) {
1753	// AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is
1754	// classified recursively so that always two fields are
1755	// considered. The resulting class is calculated according to
1756	// the classes of the fields in the eightbyte:
1757	//
1758	// (a) If both classes are equal, this is the resulting class.
1759	//
1760	// (b) If one of the classes is NO_CLASS, the resulting class is
1761	// the other class.
1762	//
1763	// (c) If one of the classes is MEMORY, the result is the MEMORY
1764	// class.
1765	//
1766	// (d) If one of the classes is INTEGER, the result is the
1767	// INTEGER.
1768	//
1769	// (e) If one of the classes is X87, X87UP, COMPLEX_X87 class,
1770	// MEMORY is used as class.
1771	//
1772	// (f) Otherwise class SSE is used.
1773
1774	// Accum should never be memory (we should have returned) or
1775	// ComplexX87 (because this cannot be passed in a structure).
1776	assert((Accum != Memory && Accum != ComplexX87) &&
1777	"Invalid accumulated classification during merge.");
1778	if (Accum == Field \|\| Field == NoClass)
1779	return Accum;
1780	if (Field == Memory)
1781	return Memory;
1782	if (Accum == NoClass)
1783	return Field;
1784	if (Accum == Integer \|\| Field == Integer)
1785	return Integer;
1786	if (Field == X87 \|\| Field == X87Up \|\| Field == ComplexX87 \|\|
1787	Accum == X87 \|\| Accum == X87Up)
1788	return Memory;
1789	return SSE;
1790	}
1791
1792	void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo,
1793	Class &Hi, bool isNamedArg, bool IsRegCall) const {
1794	// FIXME: This code can be simplified by introducing a simple value class for
1795	// Class pairs with appropriate constructor methods for the various
1796	// situations.
1797
1798	// FIXME: Some of the split computations are wrong; unaligned vectors
1799	// shouldn't be passed in registers for example, so there is no chance they
1800	// can straddle an eightbyte. Verify & simplify.
1801
1802	Lo = Hi = NoClass;
1803
1804	Class &Current = OffsetBase < `64` ? Lo : Hi;
1805	Current = Memory;
1806
1807	if (const BuiltinType *BT = Ty ->getAs<BuiltinType>()) {
1808	BuiltinType::Kind k = BT->getKind();
1809
1810	if (k == BuiltinType::Void) {
1811	Current = NoClass;
1812	} else if (k == BuiltinType::Int128 \|\| k == BuiltinType::UInt128) {
1813	Lo = Integer;
1814	Hi = Integer;
1815	} else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) {
1816	Current = Integer;
1817	} else if (k == BuiltinType::Float \|\| k == BuiltinType::Double \|\|
1818	k == BuiltinType::Float16 \|\| k == BuiltinType::BFloat16) {
1819	Current = SSE;
1820	} else if (k == BuiltinType::Float128) {
1821	Lo = SSE;
1822	Hi = SSEUp;
1823	} else if (k == BuiltinType::LongDouble) {
1824	const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
1825	if (LDF == &llvm::APFloat::IEEEquad()) {
1826	Lo = SSE;
1827	Hi = SSEUp;
1828	} else if (LDF == &llvm::APFloat::x87DoubleExtended()) {
1829	Lo = X87;
1830	Hi = X87Up;
1831	} else if (LDF == &llvm::APFloat::IEEEdouble()) {
1832	Current = SSE;
1833	} else
1834	llvm_unreachable("unexpected long double representation!");
1835	}
1836	// FIXME: _Decimal32 and _Decimal64 are SSE.
1837	// FIXME: _float128 and _Decimal128 are (SSE, SSEUp).
1838	return;
1839	}
1840
1841	if (const EnumType *ET = Ty ->getAs<EnumType>()) {
1842	// Classify the underlying integer type.
1843	classify(Ty: ET->getDecl()->getIntegerType(), OffsetBase, Lo, Hi, isNamedArg);
1844	return;
1845	}
1846
1847	if (Ty ->hasPointerRepresentation()) {
1848	Current = Integer;
1849	return;
1850	}
1851
1852	if (Ty ->isMemberPointerType()) {
1853	if (Ty ->isMemberFunctionPointerType()) {
1854	if (Has64BitPointers) {
1855	// If Has64BitPointers, this is an {i64, i64}, so classify both
1856	// Lo and Hi now.
1857	Lo = Hi = Integer;
1858	} else {
1859	// Otherwise, with 32-bit pointers, this is an {i32, i32}. If that
1860	// straddles an eightbyte boundary, Hi should be classified as well.
1861	uint64_t EB_FuncPtr = (OffsetBase) / `64`;
1862	uint64_t EB_ThisAdj = (OffsetBase + `64` - `1`) / `64`;
1863	if (EB_FuncPtr != EB_ThisAdj) {
1864	Lo = Hi = Integer;
1865	} else {
1866	Current = Integer;
1867	}
1868	}
1869	} else {
1870	Current = Integer;
1871	}
1872	return;
1873	}
1874
1875	if (const VectorType *VT = Ty ->getAs<VectorType>()) {
1876	uint64_t Size = getContext().getTypeSize(T: VT);
1877	if (Size == `1` \|\| Size == `8` \|\| Size == `16` \|\| Size == `32`) {
1878	// gcc passes the following as integer:
1879	// 4 bytes - <4 x char>, <2 x short>, <1 x int>, <1 x float>
1880	// 2 bytes - <2 x char>, <1 x short>
1881	// 1 byte - <1 x char>
1882	Current = Integer;
1883
1884	// If this type crosses an eightbyte boundary, it should be
1885	// split.
1886	uint64_t EB_Lo = (OffsetBase) / `64`;
1887	uint64_t EB_Hi = (OffsetBase + Size - `1`) / `64`;
1888	if (EB_Lo != EB_Hi)
1889	Hi = Lo;
1890	} else if (Size == `64`) {
1891	QualType ElementType = VT->getElementType();
1892
1893	// gcc passes <1 x double> in memory. :(
1894	if (ElementType ->isSpecificBuiltinType(K: BuiltinType::Double))
1895	return;
1896
1897	// gcc passes <1 x long long> as SSE but clang used to unconditionally
1898	// pass them as integer. For platforms where clang is the de facto
1899	// platform compiler, we must continue to use integer.
1900	if (!classifyIntegerMMXAsSSE() &&
1901	(ElementType ->isSpecificBuiltinType(K: BuiltinType::LongLong) \|\|
1902	ElementType ->isSpecificBuiltinType(K: BuiltinType::ULongLong) \|\|
1903	ElementType ->isSpecificBuiltinType(K: BuiltinType::Long) \|\|
1904	ElementType ->isSpecificBuiltinType(K: BuiltinType::ULong)))
1905	Current = Integer;
1906	else
1907	Current = SSE;
1908
1909	// If this type crosses an eightbyte boundary, it should be
1910	// split.
1911	if (OffsetBase && OffsetBase != `64`)
1912	Hi = Lo;
1913	} else if (Size == `128` \|\|
1914	(isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) {
1915	QualType ElementType = VT->getElementType();
1916
1917	// gcc passes 256 and 512 bit <X x __int128> vectors in memory. :(
1918	if (passInt128VectorsInMem() && Size != `128` &&
1919	(ElementType ->isSpecificBuiltinType(K: BuiltinType::Int128) \|\|
1920	ElementType ->isSpecificBuiltinType(K: BuiltinType::UInt128)))
1921	return;
1922
1923	// Arguments of 256-bits are split into four eightbyte chunks. The
1924	// least significant one belongs to class SSE and all the others to class
1925	// SSEUP. The original Lo and Hi design considers that types can't be
1926	// greater than 128-bits, so a 64-bit split in Hi and Lo makes sense.
1927	// This design isn't correct for 256-bits, but since there're no cases
1928	// where the upper parts would need to be inspected, avoid adding
1929	// complexity and just consider Hi to match the 64-256 part.
1930	//
1931	// Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in
1932	// registers if they are "named", i.e. not part of the "..." of a
1933	// variadic function.
1934	//
1935	// Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are
1936	// split into eight eightbyte chunks, one SSE and seven SSEUP.
1937	Lo = SSE;
1938	Hi = SSEUp;
1939	}
1940	return;
1941	}
1942
1943	if (const ComplexType *CT = Ty ->getAs<ComplexType>()) {
1944	QualType ET = getContext().getCanonicalType(T: CT->getElementType());
1945
1946	uint64_t Size = getContext().getTypeSize(T: Ty);
1947	if (ET ->isIntegralOrEnumerationType()) {
1948	if (Size <= `64`)
1949	Current = Integer;
1950	else if (Size <= `128`)
1951	Lo = Hi = Integer;
1952	} else if (ET ->isFloat16Type() \|\| ET == getContext().FloatTy \|\|
1953	ET ->isBFloat16Type()) {
1954	Current = SSE;
1955	} else if (ET == getContext().DoubleTy) {
1956	Lo = Hi = SSE;
1957	} else if (ET == getContext().LongDoubleTy) {
1958	const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
1959	if (LDF == &llvm::APFloat::IEEEquad())
1960	Current = Memory;
1961	else if (LDF == &llvm::APFloat::x87DoubleExtended())
1962	Current = ComplexX87;
1963	else if (LDF == &llvm::APFloat::IEEEdouble())
1964	Lo = Hi = SSE;
1965	else
1966	llvm_unreachable("unexpected long double representation!");
1967	}
1968
1969	// If this complex type crosses an eightbyte boundary then it
1970	// should be split.
1971	uint64_t EB_Real = (OffsetBase) / `64`;
1972	uint64_t EB_Imag = (OffsetBase + getContext().getTypeSize(T: ET)) / `64`;
1973	if (Hi == NoClass && EB_Real != EB_Imag)
1974	Hi = Lo;
1975
1976	return;
1977	}
1978
1979	if (const auto *EITy = Ty ->getAs<BitIntType>()) {
1980	if (EITy->getNumBits() <= `64`)
1981	Current = Integer;
1982	else if (EITy->getNumBits() <= `128`)
1983	Lo = Hi = Integer;
1984	// Larger values need to get passed in memory.
1985	return;
1986	}
1987
1988	if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(T: Ty)) {
1989	// Arrays are treated like structures.
1990
1991	uint64_t Size = getContext().getTypeSize(T: Ty);
1992
1993	// AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger
1994	// than eight eightbytes, ..., it has class MEMORY.
1995	// regcall ABI doesn't have limitation to an object. The only limitation
1996	// is the free registers, which will be checked in computeInfo.
1997	if (!IsRegCall && Size > `512`)
1998	return;
1999
2000	// AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned
2001	// fields, it has class MEMORY.
2002	//
2003	// Only need to check alignment of array base.
2004	if (OffsetBase % getContext().getTypeAlign(T: AT->getElementType()))
2005	return;
2006
2007	// Otherwise implement simplified merge. We could be smarter about
2008	// this, but it isn't worth it and would be harder to verify.
2009	Current = NoClass;
2010	uint64_t EltSize = getContext().getTypeSize(T: AT->getElementType());
2011	uint64_t ArraySize = AT->getZExtSize();
2012
2013	// The only case a 256-bit wide vector could be used is when the array
2014	// contains a single 256-bit element. Since Lo and Hi logic isn't extended
2015	// to work for sizes wider than 128, early check and fallback to memory.
2016	//
2017	if (Size > `128` &&
2018	(Size != EltSize \|\| Size > getNativeVectorSizeForAVXABI(AVXLevel)))
2019	return;
2020
2021	for (uint64_t i=`0`, Offset=OffsetBase; i<ArraySize; ++i, Offset += EltSize) {
2022	Class FieldLo, FieldHi;
2023	classify(Ty: AT->getElementType(), OffsetBase: Offset, Lo&: FieldLo, Hi&: FieldHi, isNamedArg);
2024	Lo = merge(Accum: Lo, Field: FieldLo);
2025	Hi = merge(Accum: Hi, Field: FieldHi);
2026	if (Lo == Memory \|\| Hi == Memory)
2027	break;
2028	}
2029
2030	postMerge(AggregateSize: Size, Lo, Hi);
2031	assert((Hi != SSEUp \|\| Lo == SSE) && "Invalid SSEUp array classification.");
2032	return;
2033	}
2034
2035	if (const RecordType *RT = Ty ->getAs<RecordType>()) {
2036	uint64_t Size = getContext().getTypeSize(T: Ty);
2037
2038	// AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger
2039	// than eight eightbytes, ..., it has class MEMORY.
2040	if (Size > `512`)
2041	return;
2042
2043	// AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial
2044	// copy constructor or a non-trivial destructor, it is passed by invisible
2045	// reference.
2046	if (getRecordArgABI(RT, CXXABI&: getCXXABI()))
2047	return;
2048
2049	const RecordDecl *RD = RT->getDecl();
2050
2051	// Assume variable sized types are passed in memory.
2052	if (RD->hasFlexibleArrayMember())
2053	return;
2054
2055	const ASTRecordLayout &Layout = getContext().getASTRecordLayout(D: RD);
2056
2057	// Reset Lo class, this will be recomputed.
2058	Current = NoClass;
2059
2060	// If this is a C++ record, classify the bases first.
2061	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(Val: RD)) {
2062	for (const auto &I : CXXRD->bases()) {
2063	assert(!I.isVirtual() && !I.getType()->isDependentType() &&
2064	"Unexpected base class!");
2065	const auto *Base =
2066	cast<CXXRecordDecl>(Val: I.getType()->castAs<RecordType>()->getDecl());
2067
2068	// Classify this field.
2069	//
2070	// AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate exceeds a
2071	// single eightbyte, each is classified separately. Each eightbyte gets
2072	// initialized to class NO_CLASS.
2073	Class FieldLo, FieldHi;
2074	uint64_t Offset =
2075	OffsetBase + getContext().toBits(CharSize: Layout.getBaseClassOffset(Base));
2076	classify(Ty: I.getType(), OffsetBase: Offset, Lo&: FieldLo, Hi&: FieldHi, isNamedArg);
2077	Lo = merge(Accum: Lo, Field: FieldLo);
2078	Hi = merge(Accum: Hi, Field: FieldHi);
2079	if (Lo == Memory \|\| Hi == Memory) {
2080	postMerge(AggregateSize: Size, Lo, Hi);
2081	return;
2082	}
2083	}
2084	}
2085
2086	// Classify the fields one at a time, merging the results.
2087	unsigned idx = `0`;
2088	bool UseClang11Compat = getContext().getLangOpts().getClangABICompat() <=
2089	LangOptions::ClangABI::Ver11 \|\|
2090	getContext().getTargetInfo().getTriple().isPS();
2091	bool IsUnion = RT->isUnionType() && !UseClang11Compat;
2092
2093	for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
2094	i != e; ++i, ++idx) {
2095	uint64_t Offset = OffsetBase + Layout.getFieldOffset(FieldNo: idx);
2096	bool BitField = i ->isBitField();
2097
2098	// Ignore padding bit-fields.
2099	if (BitField && i ->isUnnamedBitField())
2100	continue;
2101
2102	// AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than
2103	// eight eightbytes, or it contains unaligned fields, it has class MEMORY.
2104	//
2105	// The only case a 256-bit or a 512-bit wide vector could be used is when
2106	// the struct contains a single 256-bit or 512-bit element. Early check
2107	// and fallback to memory.
2108	//
2109	// FIXME: Extended the Lo and Hi logic properly to work for size wider
2110	// than 128.
2111	if (Size > `128` &&
2112	((!IsUnion && Size != getContext().getTypeSize(T: i ->getType())) \|\|
2113	Size > getNativeVectorSizeForAVXABI(AVXLevel))) {
2114	Lo = Memory;
2115	postMerge(AggregateSize: Size, Lo, Hi);
2116	return;
2117	}
2118
2119	bool IsInMemory =
2120	Offset % getContext().getTypeAlign(T: i ->getType().getCanonicalType());
2121	// Note, skip this test for bit-fields, see below.
2122	if (!BitField && IsInMemory) {
2123	Lo = Memory;
2124	postMerge(AggregateSize: Size, Lo, Hi);
2125	return;
2126	}
2127
2128	// Classify this field.
2129	//
2130	// AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate
2131	// exceeds a single eightbyte, each is classified
2132	// separately. Each eightbyte gets initialized to class
2133	// NO_CLASS.
2134	Class FieldLo, FieldHi;
2135
2136	// Bit-fields require special handling, they do not force the
2137	// structure to be passed in memory even if unaligned, and
2138	// therefore they can straddle an eightbyte.
2139	if (BitField) {
2140	assert(!i->isUnnamedBitField());
2141	uint64_t Offset = OffsetBase + Layout.getFieldOffset(FieldNo: idx);
2142	uint64_t Size = i ->getBitWidthValue(Ctx: getContext());
2143
2144	uint64_t EB_Lo = Offset / `64`;
2145	uint64_t EB_Hi = (Offset + Size - `1`) / `64`;
2146
2147	if (EB_Lo) {
2148	assert(EB_Hi == EB_Lo && "Invalid classification, type > 16 bytes.");
2149	FieldLo = NoClass;
2150	FieldHi = Integer;
2151	} else {
2152	FieldLo = Integer;
2153	FieldHi = EB_Hi ? Integer : NoClass;
2154	}
2155	} else
2156	classify(Ty: i ->getType(), OffsetBase: Offset, Lo&: FieldLo, Hi&: FieldHi, isNamedArg);
2157	Lo = merge(Accum: Lo, Field: FieldLo);
2158	Hi = merge(Accum: Hi, Field: FieldHi);
2159	if (Lo == Memory \|\| Hi == Memory)
2160	break;
2161	}
2162
2163	postMerge(AggregateSize: Size, Lo, Hi);
2164	}
2165	}
2166
2167	ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const {
2168	// If this is a scalar LLVM value then assume LLVM will pass it in the right
2169	// place naturally.
2170	if (!isAggregateTypeForABI(T: Ty)) {
2171	// Treat an enum type as its underlying type.
2172	if (const EnumType *EnumTy = Ty ->getAs<EnumType>())
2173	Ty = EnumTy->getDecl()->getIntegerType();
2174
2175	if (Ty ->isBitIntType())
2176	return getNaturalAlignIndirect(Ty);
2177
2178	return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
2179	: ABIArgInfo::getDirect());
2180	}
2181
2182	return getNaturalAlignIndirect(Ty);
2183	}
2184
2185	bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const {
2186	if (const VectorType *VecTy = Ty ->getAs<VectorType>()) {
2187	uint64_t Size = getContext().getTypeSize(T: VecTy);
2188	unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel);
2189	if (Size <= `64` \|\| Size > LargestVector)
2190	return true;
2191	QualType EltTy = VecTy->getElementType();
2192	if (passInt128VectorsInMem() &&
2193	(EltTy ->isSpecificBuiltinType(K: BuiltinType::Int128) \|\|
2194	EltTy ->isSpecificBuiltinType(K: BuiltinType::UInt128)))
2195	return true;
2196	}
2197
2198	return false;
2199	}
2200
2201	ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty,
2202	unsigned freeIntRegs) const {
2203	// If this is a scalar LLVM value then assume LLVM will pass it in the right
2204	// place naturally.
2205	//
2206	// This assumption is optimistic, as there could be free registers available
2207	// when we need to pass this argument in memory, and LLVM could try to pass
2208	// the argument in the free register. This does not seem to happen currently,
2209	// but this code would be much safer if we could mark the argument with
2210	// 'onstack'. See PR12193.
2211	if (!isAggregateTypeForABI(T: Ty) && !IsIllegalVectorType(Ty) &&
2212	!Ty ->isBitIntType()) {
2213	// Treat an enum type as its underlying type.
2214	if (const EnumType *EnumTy = Ty ->getAs<EnumType>())
2215	Ty = EnumTy->getDecl()->getIntegerType();
2216
2217	return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
2218	: ABIArgInfo::getDirect());
2219	}
2220
2221	if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(T: Ty, CXXABI&: getCXXABI()))
2222	return getNaturalAlignIndirect(Ty, ByVal: RAA == CGCXXABI::RAA_DirectInMemory);
2223
2224	// Compute the byval alignment. We specify the alignment of the byval in all
2225	// cases so that the mid-level optimizer knows the alignment of the byval.
2226	unsigned Align = std::max(a: getContext().getTypeAlign(T: Ty) / `8`, b: `8U`);
2227
2228	// Attempt to avoid passing indirect results using byval when possible. This
2229	// is important for good codegen.
2230	//
2231	// We do this by coercing the value into a scalar type which the backend can
2232	// handle naturally (i.e., without using byval).
2233	//
2234	// For simplicity, we currently only do this when we have exhausted all of the
2235	// free integer registers. Doing this when there are free integer registers
2236	// would require more care, as we would have to ensure that the coerced value
2237	// did not claim the unused register. That would require either reording the
2238	// arguments to the function (so that any subsequent inreg values came first),
2239	// or only doing this optimization when there were no following arguments that
2240	// might be inreg.
2241	//
2242	// We currently expect it to be rare (particularly in well written code) for
2243	// arguments to be passed on the stack when there are still free integer
2244	// registers available (this would typically imply large structs being passed
2245	// by value), so this seems like a fair tradeoff for now.
2246	//
2247	// We can revisit this if the backend grows support for 'onstack' parameter
2248	// attributes. See PR12193.
2249	if (freeIntRegs == `0`) {
2250	uint64_t Size = getContext().getTypeSize(T: Ty);
2251
2252	// If this type fits in an eightbyte, coerce it into the matching integral
2253	// type, which will end up on the stack (with alignment 8).
2254	if (Align == `8` && Size <= `64`)
2255	return ABIArgInfo::getDirect(T: llvm::IntegerType::get(C&: getVMContext(),
2256	NumBits: Size));
2257	}
2258
2259	return ABIArgInfo::getIndirect(Alignment: CharUnits::fromQuantity(Quantity: Align));
2260	}
2261
2262	/// The ABI specifies that a value should be passed in a full vector XMM/YMM
2263	/// register. Pick an LLVM IR type that will be passed as a vector register.
2264	llvm::Type X86_64ABIInfo::GetByteVectorType(QualType Ty) const* {
2265	// Wrapper structs/arrays that only contain vectors are passed just like
2266	// vectors; strip them off if present.
2267	if (const Type *InnerTy = isSingleElementStruct(T: Ty, Context&: getContext()))
2268	Ty = QualType (InnerTy, `0`);
2269
2270	llvm::Type *IRType = CGT.ConvertType(T: Ty);
2271	if (isa<llvm::VectorType>(Val: IRType)) {
2272	// Don't pass vXi128 vectors in their native type, the backend can't
2273	// legalize them.
2274	if (passInt128VectorsInMem() &&
2275	cast<llvm::VectorType>(Val: IRType)->getElementType()->isIntegerTy(Bitwidth: `128`)) {
2276	// Use a vXi64 vector.
2277	uint64_t Size = getContext().getTypeSize(T: Ty);
2278	return llvm::FixedVectorType::get(ElementType: llvm::Type::getInt64Ty(C&: getVMContext()),
2279	NumElts: Size / `64`);
2280	}
2281
2282	return IRType;
2283	}
2284
2285	if (IRType->getTypeID() == llvm::Type::FP128TyID)
2286	return IRType;
2287
2288	// We couldn't find the preferred IR vector type for 'Ty'.
2289	uint64_t Size = getContext().getTypeSize(T: Ty);
2290	assert((Size == `128` \|\| Size == `256` \|\| Size == `512`) && "Invalid type found!");
2291
2292
2293	// Return a LLVM IR vector type based on the size of 'Ty'.
2294	return llvm::FixedVectorType::get(ElementType: llvm::Type::getDoubleTy(C&: getVMContext()),
2295	NumElts: Size / `64`);
2296	}
2297
2298	/// BitsContainNoUserData - Return true if the specified [start,end) bit range
2299	/// is known to either be off the end of the specified type or being in
2300	/// alignment padding. The user type specified is known to be at most 128 bits
2301	/// in size, and have passed through X86_64ABIInfo::classify with a successful
2302	/// classification that put one of the two halves in the INTEGER class.
2303	///
2304	/// It is conservatively correct to return false.
2305	static bool BitsContainNoUserData(QualType Ty, unsigned StartBit,
2306	unsigned EndBit, ASTContext &Context) {
2307	// If the bytes being queried are off the end of the type, there is no user
2308	// data hiding here. This handles analysis of builtins, vectors and other
2309	// types that don't contain interesting padding.
2310	unsigned TySize = (unsigned)Context.getTypeSize(T: Ty);
2311	if (TySize <= StartBit)
2312	return true;
2313
2314	if (const ConstantArrayType *AT = Context.getAsConstantArrayType(T: Ty)) {
2315	unsigned EltSize = (unsigned)Context.getTypeSize(T: AT->getElementType());
2316	unsigned NumElts = (unsigned)AT->getZExtSize();
2317
2318	// Check each element to see if the element overlaps with the queried range.
2319	for (unsigned i = `0`; i != NumElts; ++i) {
2320	// If the element is after the span we care about, then we're done..
2321	unsigned EltOffset = i*EltSize;
2322	if (EltOffset >= EndBit) break;
2323
2324	unsigned EltStart = EltOffset < StartBit ? StartBit-EltOffset :`0`;
2325	if (!BitsContainNoUserData(Ty: AT->getElementType(), StartBit: EltStart,
2326	EndBit: EndBit-EltOffset, Context))
2327	return false;
2328	}
2329	// If it overlaps no elements, then it is safe to process as padding.
2330	return true;
2331	}
2332
2333	if (const RecordType *RT = Ty ->getAs<RecordType>()) {
2334	const RecordDecl *RD = RT->getDecl();
2335	const ASTRecordLayout &Layout = Context.getASTRecordLayout(D: RD);
2336
2337	// If this is a C++ record, check the bases first.
2338	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(Val: RD)) {
2339	for (const auto &I : CXXRD->bases()) {
2340	assert(!I.isVirtual() && !I.getType()->isDependentType() &&
2341	"Unexpected base class!");
2342	const auto *Base =
2343	cast<CXXRecordDecl>(Val: I.getType()->castAs<RecordType>()->getDecl());
2344
2345	// If the base is after the span we care about, ignore it.
2346	unsigned BaseOffset = Context.toBits(CharSize: Layout.getBaseClassOffset(Base));
2347	if (BaseOffset >= EndBit) continue;
2348
2349	unsigned BaseStart = BaseOffset < StartBit ? StartBit-BaseOffset :`0`;
2350	if (!BitsContainNoUserData(Ty: I.getType(), StartBit: BaseStart,
2351	EndBit: EndBit-BaseOffset, Context))
2352	return false;
2353	}
2354	}
2355
2356	// Verify that no field has data that overlaps the region of interest. Yes
2357	// this could be sped up a lot by being smarter about queried fields,
2358	// however we're only looking at structs up to 16 bytes, so we don't care
2359	// much.
2360	unsigned idx = `0`;
2361	for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
2362	i != e; ++i, ++idx) {
2363	unsigned FieldOffset = (unsigned)Layout.getFieldOffset(FieldNo: idx);
2364
2365	// If we found a field after the region we care about, then we're done.
2366	if (FieldOffset >= EndBit) break;
2367
2368	unsigned FieldStart = FieldOffset < StartBit ? StartBit-FieldOffset :`0`;
2369	if (!BitsContainNoUserData(Ty: i ->getType(), StartBit: FieldStart, EndBit: EndBit-FieldOffset,
2370	Context))
2371	return false;
2372	}
2373
2374	// If nothing in this record overlapped the area of interest, then we're
2375	// clean.
2376	return true;
2377	}
2378
2379	return false;
2380	}
2381
2382	/// getFPTypeAtOffset - Return a floating point type at the specified offset.
2383	static llvm::Type getFPTypeAtOffset(llvm::Type IRType, unsigned IROffset,
2384	const llvm::DataLayout &TD) {
2385	if (IROffset == `0` && IRType->isFloatingPointTy())
2386	return IRType;
2387
2388	// If this is a struct, recurse into the field at the specified offset.
2389	if (llvm::StructType *STy = dyn_cast<llvm::StructType>(Val: IRType)) {
2390	if (!STy->getNumContainedTypes())
2391	return nullptr;
2392
2393	const llvm::StructLayout *SL = TD.getStructLayout(Ty: STy);
2394	unsigned Elt = SL->getElementContainingOffset(FixedOffset: IROffset);
2395	IROffset -= SL->getElementOffset(Idx: Elt);
2396	return getFPTypeAtOffset(IRType: STy->getElementType(N: Elt), IROffset, TD);
2397	}
2398
2399	// If this is an array, recurse into the field at the specified offset.
2400	if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(Val: IRType)) {
2401	llvm::Type *EltTy = ATy->getElementType();
2402	unsigned EltSize = TD.getTypeAllocSize(Ty: EltTy);
2403	IROffset -= IROffset / EltSize * EltSize;
2404	return getFPTypeAtOffset(IRType: EltTy, IROffset, TD);
2405	}
2406
2407	return nullptr;
2408	}
2409
2410	/// GetSSETypeAtOffset - Return a type that will be passed by the backend in the
2411	/// low 8 bytes of an XMM register, corresponding to the SSE class.
2412	llvm::Type *X86_64ABIInfo::
2413	GetSSETypeAtOffset(llvm::Type IRType, unsigned* IROffset,
2414	QualType SourceTy, unsigned SourceOffset) const {
2415	const llvm::DataLayout &TD = getDataLayout();
2416	unsigned SourceSize =
2417	(unsigned)getContext().getTypeSize(T: SourceTy) / `8` - SourceOffset;
2418	llvm::Type *T0 = getFPTypeAtOffset(IRType, IROffset, TD);
2419	if (!T0 \|\| T0->isDoubleTy())
2420	return llvm::Type::getDoubleTy(C&: getVMContext());
2421
2422	// Get the adjacent FP type.
2423	llvm::Type T1 = nullptr*;
2424	unsigned T0Size = TD.getTypeAllocSize(Ty: T0);
2425	if (SourceSize > T0Size)
2426	T1 = getFPTypeAtOffset(IRType, IROffset: IROffset + T0Size, TD);
2427	if (T1 == nullptr) {
2428	// Check if IRType is a half/bfloat + float. float type will be in IROffset+4 due
2429	// to its alignment.
2430	if (T0->is16bitFPTy() && SourceSize > `4`)
2431	T1 = getFPTypeAtOffset(IRType, IROffset: IROffset + `4`, TD);
2432	// If we can't get a second FP type, return a simple half or float.
2433	// avx512fp16-abi.c:pr51813_2 shows it works to return float for
2434	// {float, i8} too.
2435	if (T1 == nullptr)
2436	return T0;
2437	}
2438
2439	if (T0->isFloatTy() && T1->isFloatTy())
2440	return llvm::FixedVectorType::get(ElementType: T0, NumElts: `2`);
2441
2442	if (T0->is16bitFPTy() && T1->is16bitFPTy()) {
2443	llvm::Type T2 = nullptr*;
2444	if (SourceSize > `4`)
2445	T2 = getFPTypeAtOffset(IRType, IROffset: IROffset + `4`, TD);
2446	if (T2 == nullptr)
2447	return llvm::FixedVectorType::get(ElementType: T0, NumElts: `2`);
2448	return llvm::FixedVectorType::get(ElementType: T0, NumElts: `4`);
2449	}
2450
2451	if (T0->is16bitFPTy() \|\| T1->is16bitFPTy())
2452	return llvm::FixedVectorType::get(ElementType: llvm::Type::getHalfTy(C&: getVMContext()), NumElts: `4`);
2453
2454	return llvm::Type::getDoubleTy(C&: getVMContext());
2455	}
2456
2457
2458	/// GetINTEGERTypeAtOffset - The ABI specifies that a value should be passed in
2459	/// an 8-byte GPR. This means that we either have a scalar or we are talking
2460	/// about the high or low part of an up-to-16-byte struct. This routine picks
2461	/// the best LLVM IR type to represent this, which may be i64 or may be anything
2462	/// else that the backend will pass in a GPR that works better (e.g. i8, %foo,*
2463	/// etc).
2464	///
2465	/// PrefType is an LLVM IR type that corresponds to (part of) the IR type for
2466	/// the source type. IROffset is an offset in bytes into the LLVM IR type that
2467	/// the 8-byte value references. PrefType may be null.
2468	///
2469	/// SourceTy is the source-level type for the entire argument. SourceOffset is
2470	/// an offset into this that we're processing (which is always either 0 or 8).
2471	///
2472	llvm::Type *X86_64ABIInfo::
2473	GetINTEGERTypeAtOffset(llvm::Type IRType, unsigned* IROffset,
2474	QualType SourceTy, unsigned SourceOffset) const {
2475	// If we're dealing with an un-offset LLVM IR type, then it means that we're
2476	// returning an 8-byte unit starting with it. See if we can safely use it.
2477	if (IROffset == `0`) {
2478	// Pointers and int64's always fill the 8-byte unit.
2479	if ((isa<llvm::PointerType>(Val: IRType) && Has64BitPointers) \|\|
2480	IRType->isIntegerTy(Bitwidth: `64`))
2481	return IRType;
2482
2483	// If we have a 1/2/4-byte integer, we can use it only if the rest of the
2484	// goodness in the source type is just tail padding. This is allowed to
2485	// kick in for struct {double,int} on the int, but not on
2486	// struct{double,int,int} because we wouldn't return the second int. We
2487	// have to do this analysis on the source type because we can't depend on
2488	// unions being lowered a specific way etc.
2489	if (IRType->isIntegerTy(Bitwidth: `8`) \|\| IRType->isIntegerTy(Bitwidth: `16`) \|\|
2490	IRType->isIntegerTy(Bitwidth: `32`) \|\|
2491	(isa<llvm::PointerType>(Val: IRType) && !Has64BitPointers)) {
2492	unsigned BitWidth = isa<llvm::PointerType>(Val: IRType) ? `32` :
2493	cast<llvm::IntegerType>(Val: IRType)->getBitWidth();
2494
2495	if (BitsContainNoUserData(Ty: SourceTy, StartBit: SourceOffset*`8`+BitWidth,
2496	EndBit: SourceOffset*`8`+`64`, Context&: getContext()))
2497	return IRType;
2498	}
2499	}
2500
2501	if (llvm::StructType *STy = dyn_cast<llvm::StructType>(Val: IRType)) {
2502	// If this is a struct, recurse into the field at the specified offset.
2503	const llvm::StructLayout *SL = getDataLayout().getStructLayout(Ty: STy);
2504	if (IROffset < SL->getSizeInBytes()) {
2505	unsigned FieldIdx = SL->getElementContainingOffset(FixedOffset: IROffset);
2506	IROffset -= SL->getElementOffset(Idx: FieldIdx);
2507
2508	return GetINTEGERTypeAtOffset(IRType: STy->getElementType(N: FieldIdx), IROffset,
2509	SourceTy, SourceOffset);
2510	}
2511	}
2512
2513	if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(Val: IRType)) {
2514	llvm::Type *EltTy = ATy->getElementType();
2515	unsigned EltSize = getDataLayout().getTypeAllocSize(Ty: EltTy);
2516	unsigned EltOffset = IROffset/EltSize*EltSize;
2517	return GetINTEGERTypeAtOffset(IRType: EltTy, IROffset: IROffset-EltOffset, SourceTy,
2518	SourceOffset);
2519	}
2520
2521	// Okay, we don't have any better idea of what to pass, so we pass this in an
2522	// integer register that isn't too big to fit the rest of the struct.
2523	unsigned TySizeInBytes =
2524	(unsigned)getContext().getTypeSizeInChars(T: SourceTy).getQuantity();
2525
2526	assert(TySizeInBytes != SourceOffset && "Empty field?");
2527
2528	// It is always safe to classify this as an integer type up to i64 that
2529	// isn't larger than the structure.
2530	return llvm::IntegerType::get(C&: getVMContext(),
2531	NumBits: std::min(a: TySizeInBytes-SourceOffset, b: `8U`)*`8`);
2532	}
2533
2534
2535	/// GetX86_64ByValArgumentPair - Given a high and low type that can ideally
2536	/// be used as elements of a two register pair to pass or return, return a
2537	/// first class aggregate to represent them. For example, if the low part of
2538	/// a by-value argument should be passed as i32 and the high part as float,*
2539	/// return {i32, float}.*
2540	static llvm::Type *
2541	GetX86_64ByValArgumentPair(llvm::Type Lo, llvm::Type Hi,
2542	const llvm::DataLayout &TD) {
2543	// In order to correctly satisfy the ABI, we need to the high part to start
2544	// at offset 8. If the high and low parts we inferred are both 4-byte types
2545	// (e.g. i32 and i32) then the resultant struct type ({i32,i32}) won't have
2546	// the second element at offset 8. Check for this:
2547	unsigned LoSize = (unsigned)TD.getTypeAllocSize(Ty: Lo);
2548	llvm::Align HiAlign = TD.getABITypeAlign(Ty: Hi);
2549	unsigned HiStart = llvm::alignTo(Size: LoSize, A: HiAlign);
2550	assert(HiStart != `0` && HiStart <= `8` && "Invalid x86-64 argument pair!");
2551
2552	// To handle this, we have to increase the size of the low part so that the
2553	// second element will start at an 8 byte offset. We can't increase the size
2554	// of the second element because it might make us access off the end of the
2555	// struct.
2556	if (HiStart != `8`) {
2557	// There are usually two sorts of types the ABI generation code can produce
2558	// for the low part of a pair that aren't 8 bytes in size: half, float or
2559	// i8/i16/i32. This can also include pointers when they are 32-bit (X32 and
2560	// NaCl).
2561	// Promote these to a larger type.
2562	if (Lo->isHalfTy() \|\| Lo->isFloatTy())
2563	Lo = llvm::Type::getDoubleTy(C&: Lo->getContext());
2564	else {
2565	assert((Lo->isIntegerTy() \|\| Lo->isPointerTy())
2566	&& "Invalid/unknown lo type");
2567	Lo = llvm::Type::getInt64Ty(C&: Lo->getContext());
2568	}
2569	}
2570
2571	llvm::StructType *Result = llvm::StructType::get(elt1: Lo, elts: Hi);
2572
2573	// Verify that the second element is at an 8-byte offset.
2574	assert(TD.getStructLayout(Result)->getElementOffset(`1`) == `8` &&
2575	"Invalid x86-64 argument pair!");
2576	return Result;
2577	}
2578
2579	ABIArgInfo X86_64ABIInfo::
2580	classifyReturnType(QualType RetTy) const {
2581	// AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the
2582	// classification algorithm.
2583	X86_64ABIInfo::Class Lo, Hi;
2584	classify(Ty: RetTy, OffsetBase: `0`, Lo, Hi, /isNamedArg/ true);
2585
2586	// Check some invariants.
2587	assert((Hi != Memory \|\| Lo == Memory) && "Invalid memory classification.");
2588	assert((Hi != SSEUp \|\| Lo == SSE) && "Invalid SSEUp classification.");
2589
2590	llvm::Type ResType = nullptr*;
2591	switch (Lo) {
2592	case NoClass:
2593	if (Hi == NoClass)
2594	return ABIArgInfo::getIgnore();
2595	// If the low part is just padding, it takes no register, leave ResType
2596	// null.
2597	assert((Hi == SSE \|\| Hi == Integer \|\| Hi == X87Up) &&
2598	"Unknown missing lo part");
2599	break;
2600
2601	case SSEUp:
2602	case X87Up:
2603	llvm_unreachable("Invalid classification for lo word.");
2604
2605	// AMD64-ABI 3.2.3p4: Rule 2. Types of class memory are returned via
2606	// hidden argument.
2607	case Memory:
2608	return getIndirectReturnResult(Ty: RetTy);
2609
2610	// AMD64-ABI 3.2.3p4: Rule 3. If the class is INTEGER, the next
2611	// available register of the sequence %rax, %rdx is used.
2612	case Integer:
2613	ResType = GetINTEGERTypeAtOffset(IRType: CGT.ConvertType(T: RetTy), IROffset: `0`, SourceTy: RetTy, SourceOffset: `0`);
2614
2615	// If we have a sign or zero extended integer, make sure to return Extend
2616	// so that the parameter gets the right LLVM IR attributes.
2617	if (Hi == NoClass && isa<llvm::IntegerType>(Val: ResType)) {
2618	// Treat an enum type as its underlying type.
2619	if (const EnumType *EnumTy = RetTy ->getAs<EnumType>())
2620	RetTy = EnumTy->getDecl()->getIntegerType();
2621
2622	if (RetTy ->isIntegralOrEnumerationType() &&
2623	isPromotableIntegerTypeForABI(Ty: RetTy))
2624	return ABIArgInfo::getExtend(Ty: RetTy);
2625	}
2626	break;
2627
2628	// AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next
2629	// available SSE register of the sequence %xmm0, %xmm1 is used.
2630	case SSE:
2631	ResType = GetSSETypeAtOffset(IRType: CGT.ConvertType(T: RetTy), IROffset: `0`, SourceTy: RetTy, SourceOffset: `0`);
2632	break;
2633
2634	// AMD64-ABI 3.2.3p4: Rule 6. If the class is X87, the value is
2635	// returned on the X87 stack in %st0 as 80-bit x87 number.
2636	case X87:
2637	ResType = llvm::Type::getX86_FP80Ty(C&: getVMContext());
2638	break;
2639
2640	// AMD64-ABI 3.2.3p4: Rule 8. If the class is COMPLEX_X87, the real
2641	// part of the value is returned in %st0 and the imaginary part in
2642	// %st1.
2643	case ComplexX87:
2644	assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification.");
2645	ResType = llvm::StructType::get(elt1: llvm::Type::getX86_FP80Ty(C&: getVMContext()),
2646	elts: llvm::Type::getX86_FP80Ty(C&: getVMContext()));
2647	break;
2648	}
2649
2650	llvm::Type HighPart = nullptr*;
2651	switch (Hi) {
2652	// Memory was handled previously and X87 should
2653	// never occur as a hi class.
2654	case Memory:
2655	case X87:
2656	llvm_unreachable("Invalid classification for hi word.");
2657
2658	case ComplexX87: // Previously handled.
2659	case NoClass:
2660	break;
2661
2662	case Integer:
2663	HighPart = GetINTEGERTypeAtOffset(IRType: CGT.ConvertType(T: RetTy), IROffset: `8`, SourceTy: RetTy, SourceOffset: `8`);
2664	if (Lo == NoClass) // Return HighPart at offset 8 in memory.
2665	return ABIArgInfo::getDirect(T: HighPart, Offset: `8`);
2666	break;
2667	case SSE:
2668	HighPart = GetSSETypeAtOffset(IRType: CGT.ConvertType(T: RetTy), IROffset: `8`, SourceTy: RetTy, SourceOffset: `8`);
2669	if (Lo == NoClass) // Return HighPart at offset 8 in memory.
2670	return ABIArgInfo::getDirect(T: HighPart, Offset: `8`);
2671	break;
2672
2673	// AMD64-ABI 3.2.3p4: Rule 5. If the class is SSEUP, the eightbyte
2674	// is passed in the next available eightbyte chunk if the last used
2675	// vector register.
2676	//
2677	// SSEUP should always be preceded by SSE, just widen.
2678	case SSEUp:
2679	assert(Lo == SSE && "Unexpected SSEUp classification.");
2680	ResType = GetByteVectorType(Ty: RetTy);
2681	break;
2682
2683	// AMD64-ABI 3.2.3p4: Rule 7. If the class is X87UP, the value is
2684	// returned together with the previous X87 value in %st0.
2685	case X87Up:
2686	// If X87Up is preceded by X87, we don't need to do
2687	// anything. However, in some cases with unions it may not be
2688	// preceded by X87. In such situations we follow gcc and pass the
2689	// extra bits in an SSE reg.
2690	if (Lo != X87) {
2691	HighPart = GetSSETypeAtOffset(IRType: CGT.ConvertType(T: RetTy), IROffset: `8`, SourceTy: RetTy, SourceOffset: `8`);
2692	if (Lo == NoClass) // Return HighPart at offset 8 in memory.
2693	return ABIArgInfo::getDirect(T: HighPart, Offset: `8`);
2694	}
2695	break;
2696	}
2697
2698	// If a high part was specified, merge it together with the low part. It is
2699	// known to pass in the high eightbyte of the result. We do this by forming a
2700	// first class struct aggregate with the high and low part: {low, high}
2701	if (HighPart)
2702	ResType = GetX86_64ByValArgumentPair(Lo: ResType, Hi: HighPart, TD: getDataLayout());
2703
2704	return ABIArgInfo::getDirect(T: ResType);
2705	}
2706
2707	ABIArgInfo
2708	X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned freeIntRegs,
2709	unsigned &neededInt, unsigned &neededSSE,
2710	bool isNamedArg, bool IsRegCall) const {
2711	Ty = useFirstFieldIfTransparentUnion(Ty);
2712
2713	X86_64ABIInfo::Class Lo, Hi;
2714	classify(Ty, OffsetBase: `0`, Lo, Hi, isNamedArg, IsRegCall);
2715
2716	// Check some invariants.
2717	// FIXME: Enforce these by construction.
2718	assert((Hi != Memory \|\| Lo == Memory) && "Invalid memory classification.");
2719	assert((Hi != SSEUp \|\| Lo == SSE) && "Invalid SSEUp classification.");
2720
2721	neededInt = `0`;
2722	neededSSE = `0`;
2723	llvm::Type ResType = nullptr*;
2724	switch (Lo) {
2725	case NoClass:
2726	if (Hi == NoClass)
2727	return ABIArgInfo::getIgnore();
2728	// If the low part is just padding, it takes no register, leave ResType
2729	// null.
2730	assert((Hi == SSE \|\| Hi == Integer \|\| Hi == X87Up) &&
2731	"Unknown missing lo part");
2732	break;
2733
2734	// AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument
2735	// on the stack.
2736	case Memory:
2737
2738	// AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or
2739	// COMPLEX_X87, it is passed in memory.
2740	case X87:
2741	case ComplexX87:
2742	if (getRecordArgABI(T: Ty, CXXABI&: getCXXABI()) == CGCXXABI::RAA_Indirect)
2743	++neededInt;
2744	return getIndirectResult(Ty, freeIntRegs);
2745
2746	case SSEUp:
2747	case X87Up:
2748	llvm_unreachable("Invalid classification for lo word.");
2749
2750	// AMD64-ABI 3.2.3p3: Rule 2. If the class is INTEGER, the next
2751	// available register of the sequence %rdi, %rsi, %rdx, %rcx, %r8
2752	// and %r9 is used.
2753	case Integer:
2754	++neededInt;
2755
2756	// Pick an 8-byte type based on the preferred type.
2757	ResType = GetINTEGERTypeAtOffset(IRType: CGT.ConvertType(T: Ty), IROffset: `0`, SourceTy: Ty, SourceOffset: `0`);
2758
2759	// If we have a sign or zero extended integer, make sure to return Extend
2760	// so that the parameter gets the right LLVM IR attributes.
2761	if (Hi == NoClass && isa<llvm::IntegerType>(Val: ResType)) {
2762	// Treat an enum type as its underlying type.
2763	if (const EnumType *EnumTy = Ty ->getAs<EnumType>())
2764	Ty = EnumTy->getDecl()->getIntegerType();
2765
2766	if (Ty ->isIntegralOrEnumerationType() &&
2767	isPromotableIntegerTypeForABI(Ty))
2768	return ABIArgInfo::getExtend(Ty);
2769	}
2770
2771	break;
2772
2773	// AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next
2774	// available SSE register is used, the registers are taken in the
2775	// order from %xmm0 to %xmm7.
2776	case SSE: {
2777	llvm::Type *IRType = CGT.ConvertType(T: Ty);
2778	ResType = GetSSETypeAtOffset(IRType, IROffset: `0`, SourceTy: Ty, SourceOffset: `0`);
2779	++neededSSE;
2780	break;
2781	}
2782	}
2783
2784	llvm::Type HighPart = nullptr*;
2785	switch (Hi) {
2786	// Memory was handled previously, ComplexX87 and X87 should
2787	// never occur as hi classes, and X87Up must be preceded by X87,
2788	// which is passed in memory.
2789	case Memory:
2790	case X87:
2791	case ComplexX87:
2792	llvm_unreachable("Invalid classification for hi word.");
2793
2794	case NoClass: break;
2795
2796	case Integer:
2797	++neededInt;
2798	// Pick an 8-byte type based on the preferred type.
2799	HighPart = GetINTEGERTypeAtOffset(IRType: CGT.ConvertType(T: Ty), IROffset: `8`, SourceTy: Ty, SourceOffset: `8`);
2800
2801	if (Lo == NoClass) // Pass HighPart at offset 8 in memory.
2802	return ABIArgInfo::getDirect(T: HighPart, Offset: `8`);
2803	break;
2804
2805	// X87Up generally doesn't occur here (long double is passed in
2806	// memory), except in situations involving unions.
2807	case X87Up:
2808	case SSE:
2809	++neededSSE;
2810	HighPart = GetSSETypeAtOffset(IRType: CGT.ConvertType(T: Ty), IROffset: `8`, SourceTy: Ty, SourceOffset: `8`);
2811
2812	if (Lo == NoClass) // Pass HighPart at offset 8 in memory.
2813	return ABIArgInfo::getDirect(T: HighPart, Offset: `8`);
2814	break;
2815
2816	// AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the
2817	// eightbyte is passed in the upper half of the last used SSE
2818	// register. This only happens when 128-bit vectors are passed.
2819	case SSEUp:
2820	assert(Lo == SSE && "Unexpected SSEUp classification");
2821	ResType = GetByteVectorType(Ty);
2822	break;
2823	}
2824
2825	// If a high part was specified, merge it together with the low part. It is
2826	// known to pass in the high eightbyte of the result. We do this by forming a
2827	// first class struct aggregate with the high and low part: {low, high}
2828	if (HighPart)
2829	ResType = GetX86_64ByValArgumentPair(Lo: ResType, Hi: HighPart, TD: getDataLayout());
2830
2831	return ABIArgInfo::getDirect(T: ResType);
2832	}
2833
2834	ABIArgInfo
2835	X86_64ABIInfo::classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt,
2836	unsigned &NeededSSE,
2837	unsigned &MaxVectorWidth) const {
2838	auto RT = Ty ->getAs<RecordType>();
2839	assert(RT && "classifyRegCallStructType only valid with struct types");
2840
2841	if (RT->getDecl()->hasFlexibleArrayMember())
2842	return getIndirectReturnResult(Ty);
2843
2844	// Sum up bases
2845	if (auto CXXRD = dyn_cast<CXXRecordDecl>(Val: RT->getDecl())) {
2846	if (CXXRD->isDynamicClass()) {
2847	NeededInt = NeededSSE = `0`;
2848	return getIndirectReturnResult(Ty);
2849	}
2850
2851	for (const auto &I : CXXRD->bases())
2852	if (classifyRegCallStructTypeImpl(Ty: I.getType(), NeededInt, NeededSSE,
2853	MaxVectorWidth)
2854	.isIndirect()) {
2855	NeededInt = NeededSSE = `0`;
2856	return getIndirectReturnResult(Ty);
2857	}
2858	}
2859
2860	// Sum up members
2861	for (const auto *FD : RT->getDecl()->fields()) {
2862	QualType MTy = FD->getType();
2863	if (MTy ->isRecordType() && !MTy ->isUnionType()) {
2864	if (classifyRegCallStructTypeImpl(Ty: MTy, NeededInt, NeededSSE,
2865	MaxVectorWidth)
2866	.isIndirect()) {
2867	NeededInt = NeededSSE = `0`;
2868	return getIndirectReturnResult(Ty);
2869	}
2870	} else {
2871	unsigned LocalNeededInt, LocalNeededSSE;
2872	if (classifyArgumentType(Ty: MTy, UINT_MAX, neededInt&: LocalNeededInt, neededSSE&: LocalNeededSSE,
2873	isNamedArg: true, IsRegCall: true)
2874	.isIndirect()) {
2875	NeededInt = NeededSSE = `0`;
2876	return getIndirectReturnResult(Ty);
2877	}
2878	if (const auto *AT = getContext().getAsConstantArrayType(T: MTy))
2879	MTy = AT->getElementType();
2880	if (const auto *VT = MTy ->getAs<VectorType>())
2881	if (getContext().getTypeSize(T: VT) > MaxVectorWidth)
2882	MaxVectorWidth = getContext().getTypeSize(T: VT);
2883	NeededInt += LocalNeededInt;
2884	NeededSSE += LocalNeededSSE;
2885	}
2886	}
2887
2888	return ABIArgInfo::getDirect();
2889	}
2890
2891	ABIArgInfo
2892	X86_64ABIInfo::classifyRegCallStructType(QualType Ty, unsigned &NeededInt,
2893	unsigned &NeededSSE,
2894	unsigned &MaxVectorWidth) const {
2895
2896	NeededInt = `0`;
2897	NeededSSE = `0`;
2898	MaxVectorWidth = `0`;
2899
2900	return classifyRegCallStructTypeImpl(Ty, NeededInt, NeededSSE,
2901	MaxVectorWidth);
2902	}
2903
2904	void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
2905
2906	const unsigned CallingConv = FI.getCallingConvention();
2907	// It is possible to force Win64 calling convention on any x86_64 target by
2908	// using __attribute__((ms_abi)). In such case to correctly emit Win64
2909	// compatible code delegate this call to WinX86_64ABIInfo::computeInfo.
2910	if (CallingConv == llvm::CallingConv::Win64) {
2911	WinX86_64ABIInfo Win64ABIInfo(CGT, AVXLevel);
2912	Win64ABIInfo.computeInfo(FI);
2913	return;
2914	}
2915
2916	bool IsRegCall = CallingConv == llvm::CallingConv::X86_RegCall;
2917
2918	// Keep track of the number of assigned registers.
2919	unsigned FreeIntRegs = IsRegCall ? `11` : `6`;
2920	unsigned FreeSSERegs = IsRegCall ? `16` : `8`;
2921	unsigned NeededInt = `0`, NeededSSE = `0`, MaxVectorWidth = `0`;
2922
2923	if (!::classifyReturnType(CXXABI: getCXXABI(), FI, Info: *this)) {
2924	if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() &&
2925	!FI.getReturnType()->getTypePtr()->isUnionType()) {
2926	FI.getReturnInfo() = classifyRegCallStructType(
2927	Ty: FI.getReturnType(), NeededInt, NeededSSE, MaxVectorWidth);
2928	if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) {
2929	FreeIntRegs -= NeededInt;
2930	FreeSSERegs -= NeededSSE;
2931	} else {
2932	FI.getReturnInfo() = getIndirectReturnResult(Ty: FI.getReturnType());
2933	}
2934	} else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>() &&
2935	getContext().getCanonicalType(T: FI.getReturnType()
2936	->getAs<ComplexType>()
2937	->getElementType()) ==
2938	getContext().LongDoubleTy)
2939	// Complex Long Double Type is passed in Memory when Regcall
2940	// calling convention is used.
2941	FI.getReturnInfo() = getIndirectReturnResult(Ty: FI.getReturnType());
2942	else
2943	FI.getReturnInfo() = classifyReturnType(RetTy: FI.getReturnType());
2944	}
2945
2946	// If the return value is indirect, then the hidden argument is consuming one
2947	// integer register.
2948	if (FI.getReturnInfo().isIndirect())
2949	--FreeIntRegs;
2950	else if (NeededSSE && MaxVectorWidth > `0`)
2951	FI.setMaxVectorWidth(MaxVectorWidth);
2952
2953	// The chain argument effectively gives us another free register.
2954	if (FI.isChainCall())
2955	++FreeIntRegs;
2956
2957	unsigned NumRequiredArgs = FI.getNumRequiredArgs();
2958	// AMD64-ABI 3.2.3p3: Once arguments are classified, the registers
2959	// get assigned (in left-to-right order) for passing as follows...
2960	unsigned ArgNo = `0`;
2961	for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
2962	it != ie; ++it, ++ArgNo) {
2963	bool IsNamedArg = ArgNo < NumRequiredArgs;
2964
2965	if (IsRegCall && it->type ->isStructureOrClassType())
2966	it->info = classifyRegCallStructType(Ty: it->type, NeededInt, NeededSSE,
2967	MaxVectorWidth);
2968	else
2969	it->info = classifyArgumentType(Ty: it->type, freeIntRegs: FreeIntRegs, neededInt&: NeededInt,
2970	neededSSE&: NeededSSE, isNamedArg: IsNamedArg);
2971
2972	// AMD64-ABI 3.2.3p3: If there are no registers available for any
2973	// eightbyte of an argument, the whole argument is passed on the
2974	// stack. If registers have already been assigned for some
2975	// eightbytes of such an argument, the assignments get reverted.
2976	if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) {
2977	FreeIntRegs -= NeededInt;
2978	FreeSSERegs -= NeededSSE;
2979	if (MaxVectorWidth > FI.getMaxVectorWidth())
2980	FI.setMaxVectorWidth(MaxVectorWidth);
2981	} else {
2982	it->info = getIndirectResult(Ty: it->type, freeIntRegs: FreeIntRegs);
2983	}
2984	}
2985	}
2986
2987	static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF,
2988	Address VAListAddr, QualType Ty) {
2989	Address overflow_arg_area_p =
2990	CGF.Builder.CreateStructGEP(Addr: VAListAddr, Index: `2`, Name: "overflow_arg_area_p");
2991	llvm::Value *overflow_arg_area =
2992	CGF.Builder.CreateLoad(Addr: overflow_arg_area_p, Name: "overflow_arg_area");
2993
2994	// AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
2995	// byte boundary if alignment needed by type exceeds 8 byte boundary.
2996	// It isn't stated explicitly in the standard, but in practice we use
2997	// alignment greater than 16 where necessary.
2998	CharUnits Align = CGF.getContext().getTypeAlignInChars(T: Ty);
2999	if (Align > CharUnits::fromQuantity(Quantity: `8`)) {
3000	overflow_arg_area = emitRoundPointerUpToAlignment(CGF, Ptr: overflow_arg_area,
3001	Align);
3002	}
3003
3004	// AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
3005	llvm::Type *LTy = CGF.ConvertTypeForMem(T: Ty);
3006	llvm::Value *Res = overflow_arg_area;
3007
3008	// AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to:
3009	// l->overflow_arg_area + sizeof(type).
3010	// AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to
3011	// an 8 byte boundary.
3012
3013	uint64_t SizeInBytes = (CGF.getContext().getTypeSize(T: Ty) + `7`) / `8`;
3014	llvm::Value *Offset =
3015	llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: (SizeInBytes + `7`) & ~`7`);
3016	overflow_arg_area = CGF.Builder.CreateGEP(Ty: CGF.Int8Ty, Ptr: overflow_arg_area,
3017	IdxList: Offset, Name: "overflow_arg_area.next");
3018	CGF.Builder.CreateStore(Val: overflow_arg_area, Addr: overflow_arg_area_p);
3019
3020	// AMD64-ABI 3.5.7p5: Step 11. Return the fetched type.
3021	return Address (Res, LTy, Align);
3022	}
3023
3024	RValue X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
3025	QualType Ty, AggValueSlot Slot) const {
3026	// Assume that va_list type is correct; should be pointer to LLVM type:
3027	// struct {
3028	// i32 gp_offset;
3029	// i32 fp_offset;
3030	// i8 overflow_arg_area;*
3031	// i8 reg_save_area;*
3032	// };
3033	unsigned neededInt, neededSSE;
3034
3035	Ty = getContext().getCanonicalType(T: Ty);
3036	ABIArgInfo AI = classifyArgumentType(Ty, freeIntRegs: `0`, neededInt, neededSSE,
3037	/isNamedArg/false);
3038
3039	// Empty records are ignored for parameter passing purposes.
3040	if (AI.isIgnore())
3041	return Slot.asRValue();
3042
3043	// AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
3044	// in the registers. If not go to step 7.
3045	if (!neededInt && !neededSSE)
3046	return CGF.EmitLoadOfAnyValue(
3047	V: CGF.MakeAddrLValue(Addr: EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty), T: Ty),
3048	Slot);
3049
3050	// AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of
3051	// general purpose registers needed to pass type and num_fp to hold
3052	// the number of floating point registers needed.
3053
3054	// AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into
3055	// registers. In the case: l->gp_offset > 48 - num_gp 8 or*
3056	// l->fp_offset > 304 - num_fp 16 go to step 7.*
3057	//
3058	// NOTE: 304 is a typo, there are (6 8 + 8 * 16) = 176 bytes of*
3059	// register save space).
3060
3061	llvm::Value InRegs = nullptr*;
3062	Address gp_offset_p = Address::invalid(), fp_offset_p = Address::invalid();
3063	llvm::Value gp_offset = nullptr, fp_offset = nullptr;
3064	if (neededInt) {
3065	gp_offset_p = CGF.Builder.CreateStructGEP(Addr: VAListAddr, Index: `0`, Name: "gp_offset_p");
3066	gp_offset = CGF.Builder.CreateLoad(Addr: gp_offset_p, Name: "gp_offset");
3067	InRegs = llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: `48` - neededInt * `8`);
3068	InRegs = CGF.Builder.CreateICmpULE(LHS: gp_offset, RHS: InRegs, Name: "fits_in_gp");
3069	}
3070
3071	if (neededSSE) {
3072	fp_offset_p = CGF.Builder.CreateStructGEP(Addr: VAListAddr, Index: `1`, Name: "fp_offset_p");
3073	fp_offset = CGF.Builder.CreateLoad(Addr: fp_offset_p, Name: "fp_offset");
3074	llvm::Value *FitsInFP =
3075	llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: `176` - neededSSE * `16`);
3076	FitsInFP = CGF.Builder.CreateICmpULE(LHS: fp_offset, RHS: FitsInFP, Name: "fits_in_fp");
3077	InRegs = InRegs ? CGF.Builder.CreateAnd(LHS: InRegs, RHS: FitsInFP) : FitsInFP;
3078	}
3079
3080	llvm::BasicBlock *InRegBlock = CGF.createBasicBlock(name: "vaarg.in_reg");
3081	llvm::BasicBlock *InMemBlock = CGF.createBasicBlock(name: "vaarg.in_mem");
3082	llvm::BasicBlock *ContBlock = CGF.createBasicBlock(name: "vaarg.end");
3083	CGF.Builder.CreateCondBr(Cond: InRegs, True: InRegBlock, False: InMemBlock);
3084
3085	// Emit code to load the value if it was passed in registers.
3086
3087	CGF.EmitBlock(BB: InRegBlock);
3088
3089	// AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with
3090	// an offset of l->gp_offset and/or l->fp_offset. This may require
3091	// copying to a temporary location in case the parameter is passed
3092	// in different register classes or requires an alignment greater
3093	// than 8 for general purpose registers and 16 for XMM registers.
3094	//
3095	// FIXME: This really results in shameful code when we end up needing to
3096	// collect arguments from different places; often what should result in a
3097	// simple assembling of a structure from scattered addresses has many more
3098	// loads than necessary. Can we clean this up?
3099	llvm::Type *LTy = CGF.ConvertTypeForMem(T: Ty);
3100	llvm::Value *RegSaveArea = CGF.Builder.CreateLoad(
3101	Addr: CGF.Builder.CreateStructGEP(Addr: VAListAddr, Index: `3`), Name: "reg_save_area");
3102
3103	Address RegAddr = Address::invalid();
3104	if (neededInt && neededSSE) {
3105	// FIXME: Cleanup.
3106	assert(AI.isDirect() && "Unexpected ABI info for mixed regs");
3107	llvm::StructType *ST = cast<llvm::StructType>(Val: AI.getCoerceToType());
3108	Address Tmp = CGF.CreateMemTemp(T: Ty);
3109	Tmp = Tmp.withElementType(ElemTy: ST);
3110	assert(ST->getNumElements() == `2` && "Unexpected ABI info for mixed regs");
3111	llvm::Type *TyLo = ST->getElementType(N: `0`);
3112	llvm::Type *TyHi = ST->getElementType(N: `1`);
3113	assert((TyLo->isFPOrFPVectorTy() ^ TyHi->isFPOrFPVectorTy()) &&
3114	"Unexpected ABI info for mixed regs");
3115	llvm::Value *GPAddr =
3116	CGF.Builder.CreateGEP(Ty: CGF.Int8Ty, Ptr: RegSaveArea, IdxList: gp_offset);
3117	llvm::Value *FPAddr =
3118	CGF.Builder.CreateGEP(Ty: CGF.Int8Ty, Ptr: RegSaveArea, IdxList: fp_offset);
3119	llvm::Value *RegLoAddr = TyLo->isFPOrFPVectorTy() ? FPAddr : GPAddr;
3120	llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr;
3121
3122	// Copy the first element.
3123	// FIXME: Our choice of alignment here and below is probably pessimistic.
3124	llvm::Value *V = CGF.Builder.CreateAlignedLoad(
3125	Ty: TyLo, Addr: RegLoAddr,
3126	Align: CharUnits::fromQuantity(Quantity: getDataLayout().getABITypeAlign(Ty: TyLo)));
3127	CGF.Builder.CreateStore(Val: V, Addr: CGF.Builder.CreateStructGEP(Addr: Tmp, Index: `0`));
3128
3129	// Copy the second element.
3130	V = CGF.Builder.CreateAlignedLoad(
3131	Ty: TyHi, Addr: RegHiAddr,
3132	Align: CharUnits::fromQuantity(Quantity: getDataLayout().getABITypeAlign(Ty: TyHi)));
3133	CGF.Builder.CreateStore(Val: V, Addr: CGF.Builder.CreateStructGEP(Addr: Tmp, Index: `1`));
3134
3135	RegAddr = Tmp.withElementType(ElemTy: LTy);
3136	} else if (neededInt) {
3137	RegAddr = Address (CGF.Builder.CreateGEP(Ty: CGF.Int8Ty, Ptr: RegSaveArea, IdxList: gp_offset),
3138	LTy, CharUnits::fromQuantity(Quantity: `8`));
3139
3140	// Copy to a temporary if necessary to ensure the appropriate alignment.
3141	auto TInfo = getContext().getTypeInfoInChars(T: Ty);
3142	uint64_t TySize = TInfo.Width.getQuantity();
3143	CharUnits TyAlign = TInfo.Align;
3144
3145	// Copy into a temporary if the type is more aligned than the
3146	// register save area.
3147	if (TyAlign.getQuantity() > `8`) {
3148	Address Tmp = CGF.CreateMemTemp(T: Ty);
3149	CGF.Builder.CreateMemCpy(Dest: Tmp, Src: RegAddr, Size: TySize, IsVolatile: false);
3150	RegAddr = Tmp;
3151	}
3152
3153	} else if (neededSSE == `1`) {
3154	RegAddr = Address (CGF.Builder.CreateGEP(Ty: CGF.Int8Ty, Ptr: RegSaveArea, IdxList: fp_offset),
3155	LTy, CharUnits::fromQuantity(Quantity: `16`));
3156	} else {
3157	assert(neededSSE == `2` && "Invalid number of needed registers!");
3158	// SSE registers are spaced 16 bytes apart in the register save
3159	// area, we need to collect the two eightbytes together.
3160	// The ABI isn't explicit about this, but it seems reasonable
3161	// to assume that the slots are 16-byte aligned, since the stack is
3162	// naturally 16-byte aligned and the prologue is expected to store
3163	// all the SSE registers to the RSA.
3164	Address RegAddrLo = Address (CGF.Builder.CreateGEP(Ty: CGF.Int8Ty, Ptr: RegSaveArea,
3165	IdxList: fp_offset),
3166	CGF.Int8Ty, CharUnits::fromQuantity(Quantity: `16`));
3167	Address RegAddrHi =
3168	CGF.Builder.CreateConstInBoundsByteGEP(Addr: RegAddrLo,
3169	Offset: CharUnits::fromQuantity(Quantity: `16`));
3170	llvm::Type *ST = AI.canHaveCoerceToType()
3171	? AI.getCoerceToType()
3172	: llvm::StructType::get(elt1: CGF.DoubleTy, elts: CGF.DoubleTy);
3173	llvm::Value *V;
3174	Address Tmp = CGF.CreateMemTemp(T: Ty);
3175	Tmp = Tmp.withElementType(ElemTy: ST);
3176	V = CGF.Builder.CreateLoad(
3177	Addr: RegAddrLo.withElementType(ElemTy: ST->getStructElementType(N: `0`)));
3178	CGF.Builder.CreateStore(Val: V, Addr: CGF.Builder.CreateStructGEP(Addr: Tmp, Index: `0`));
3179	V = CGF.Builder.CreateLoad(
3180	Addr: RegAddrHi.withElementType(ElemTy: ST->getStructElementType(N: `1`)));
3181	CGF.Builder.CreateStore(Val: V, Addr: CGF.Builder.CreateStructGEP(Addr: Tmp, Index: `1`));
3182
3183	RegAddr = Tmp.withElementType(ElemTy: LTy);
3184	}
3185
3186	// AMD64-ABI 3.5.7p5: Step 5. Set:
3187	// l->gp_offset = l->gp_offset + num_gp 8*
3188	// l->fp_offset = l->fp_offset + num_fp 16.*
3189	if (neededInt) {
3190	llvm::Value Offset = llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: neededInt `8`);
3191	CGF.Builder.CreateStore(Val: CGF.Builder.CreateAdd(LHS: gp_offset, RHS: Offset),
3192	Addr: gp_offset_p);
3193	}
3194	if (neededSSE) {
3195	llvm::Value Offset = llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: neededSSE `16`);
3196	CGF.Builder.CreateStore(Val: CGF.Builder.CreateAdd(LHS: fp_offset, RHS: Offset),
3197	Addr: fp_offset_p);
3198	}
3199	CGF.EmitBranch(Block: ContBlock);
3200
3201	// Emit code to load the value if it was passed in memory.
3202
3203	CGF.EmitBlock(BB: InMemBlock);
3204	Address MemAddr = EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty);
3205
3206	// Return the appropriate result.
3207
3208	CGF.EmitBlock(BB: ContBlock);
3209	Address ResAddr = emitMergePHI(CGF, Addr1: RegAddr, Block1: InRegBlock, Addr2: MemAddr, Block2: InMemBlock,
3210	Name: "vaarg.addr");
3211	return CGF.EmitLoadOfAnyValue(V: CGF.MakeAddrLValue(Addr: ResAddr, T: Ty), Slot);
3212	}
3213
3214	RValue X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
3215	QualType Ty, AggValueSlot Slot) const {
3216	// MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
3217	// not 1, 2, 4, or 8 bytes, must be passed by reference."
3218	uint64_t Width = getContext().getTypeSize(T: Ty);
3219	bool IsIndirect = Width > `64` \|\| !llvm::isPowerOf2_64(Value: Width);
3220
3221	return emitVoidPtrVAArg(CGF, VAListAddr, ValueTy: Ty, IsIndirect,
3222	ValueInfo: CGF.getContext().getTypeInfoInChars(T: Ty),
3223	SlotSizeAndAlign: CharUnits::fromQuantity(Quantity: `8`),
3224	/allowHigherAlign/ AllowHigherAlign: false, Slot);
3225	}
3226
3227	ABIArgInfo WinX86_64ABIInfo::reclassifyHvaArgForVectorCall(
3228	QualType Ty, unsigned &FreeSSERegs, const ABIArgInfo &current) const {
3229	const Type Base = nullptr*;
3230	uint64_t NumElts = `0`;
3231
3232	if (!Ty ->isBuiltinType() && !Ty ->isVectorType() &&
3233	isHomogeneousAggregate(Ty, Base, Members&: NumElts) && FreeSSERegs >= NumElts) {
3234	FreeSSERegs -= NumElts;
3235	return getDirectX86Hva();
3236	}
3237	return current;
3238	}
3239
3240	ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
3241	bool IsReturnType, bool IsVectorCall,
3242	bool IsRegCall) const {
3243
3244	if (Ty ->isVoidType())
3245	return ABIArgInfo::getIgnore();
3246
3247	if (const EnumType *EnumTy = Ty ->getAs<EnumType>())
3248	Ty = EnumTy->getDecl()->getIntegerType();
3249
3250	TypeInfo Info = getContext().getTypeInfo(T: Ty);
3251	uint64_t Width = Info.Width;
3252	CharUnits Align = getContext().toCharUnitsFromBits(BitSize: Info.Align);
3253
3254	const RecordType *RT = Ty ->getAs<RecordType>();
3255	if (RT) {
3256	if (!IsReturnType) {
3257	if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, CXXABI&: getCXXABI()))
3258	return getNaturalAlignIndirect(Ty, ByVal: RAA == CGCXXABI::RAA_DirectInMemory);
3259	}
3260
3261	if (RT->getDecl()->hasFlexibleArrayMember())
3262	return getNaturalAlignIndirect(Ty, /ByVal=/false);
3263
3264	}
3265
3266	const Type Base = nullptr*;
3267	uint64_t NumElts = `0`;
3268	// vectorcall adds the concept of a homogenous vector aggregate, similar to
3269	// other targets.
3270	if ((IsVectorCall \|\| IsRegCall) &&
3271	isHomogeneousAggregate(Ty, Base, Members&: NumElts)) {
3272	if (IsRegCall) {
3273	if (FreeSSERegs >= NumElts) {
3274	FreeSSERegs -= NumElts;
3275	if (IsReturnType \|\| Ty ->isBuiltinType() \|\| Ty ->isVectorType())
3276	return ABIArgInfo::getDirect();
3277	return ABIArgInfo::getExpand();
3278	}
3279	return ABIArgInfo::getIndirect(Alignment: Align, /ByVal=/false);
3280	} else if (IsVectorCall) {
3281	if (FreeSSERegs >= NumElts &&
3282	(IsReturnType \|\| Ty ->isBuiltinType() \|\| Ty ->isVectorType())) {
3283	FreeSSERegs -= NumElts;
3284	return ABIArgInfo::getDirect();
3285	} else if (IsReturnType) {
3286	return ABIArgInfo::getExpand();
3287	} else if (!Ty ->isBuiltinType() && !Ty ->isVectorType()) {
3288	// HVAs are delayed and reclassified in the 2nd step.
3289	return ABIArgInfo::getIndirect(Alignment: Align, /ByVal=/false);
3290	}
3291	}
3292	}
3293
3294	if (Ty ->isMemberPointerType()) {
3295	// If the member pointer is represented by an LLVM int or ptr, pass it
3296	// directly.
3297	llvm::Type *LLTy = CGT.ConvertType(T: Ty);
3298	if (LLTy->isPointerTy() \|\| LLTy->isIntegerTy())
3299	return ABIArgInfo::getDirect();
3300	}
3301
3302	if (RT \|\| Ty ->isAnyComplexType() \|\| Ty ->isMemberPointerType()) {
3303	// MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
3304	// not 1, 2, 4, or 8 bytes, must be passed by reference."
3305	if (Width > `64` \|\| !llvm::isPowerOf2_64(Value: Width))
3306	return getNaturalAlignIndirect(Ty, /ByVal=/false);
3307
3308	// Otherwise, coerce it to a small integer.
3309	return ABIArgInfo::getDirect(T: llvm::IntegerType::get(C&: getVMContext(), NumBits: Width));
3310	}
3311
3312	if (const BuiltinType *BT = Ty ->getAs<BuiltinType>()) {
3313	switch (BT->getKind()) {
3314	case BuiltinType::Bool:
3315	// Bool type is always extended to the ABI, other builtin types are not
3316	// extended.
3317	return ABIArgInfo::getExtend(Ty);
3318
3319	case BuiltinType::LongDouble:
3320	// Mingw64 GCC uses the old 80 bit extended precision floating point
3321	// unit. It passes them indirectly through memory.
3322	if (IsMingw64) {
3323	const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
3324	if (LDF == &llvm::APFloat::x87DoubleExtended())
3325	return ABIArgInfo::getIndirect(Alignment: Align, /ByVal=/false);
3326	}
3327	break;
3328
3329	case BuiltinType::Int128:
3330	case BuiltinType::UInt128:
3331	// If it's a parameter type, the normal ABI rule is that arguments larger
3332	// than 8 bytes are passed indirectly. GCC follows it. We follow it too,
3333	// even though it isn't particularly efficient.
3334	if (!IsReturnType)
3335	return ABIArgInfo::getIndirect(Alignment: Align, /ByVal=/false);
3336
3337	// Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that.
3338	// Clang matches them for compatibility.
3339	return ABIArgInfo::getDirect(T: llvm::FixedVectorType::get(
3340	ElementType: llvm::Type::getInt64Ty(C&: getVMContext()), NumElts: `2`));
3341
3342	default:
3343	break;
3344	}
3345	}
3346
3347	if (Ty ->isBitIntType()) {
3348	// MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
3349	// not 1, 2, 4, or 8 bytes, must be passed by reference."
3350	// However, non-power-of-two bit-precise integers will be passed as 1, 2, 4,
3351	// or 8 bytes anyway as long is it fits in them, so we don't have to check
3352	// the power of 2.
3353	if (Width <= `64`)
3354	return ABIArgInfo::getDirect();
3355	return ABIArgInfo::getIndirect(Alignment: Align, /ByVal=/false);
3356	}
3357
3358	return ABIArgInfo::getDirect();
3359	}
3360
3361	void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
3362	const unsigned CC = FI.getCallingConvention();
3363	bool IsVectorCall = CC == llvm::CallingConv::X86_VectorCall;
3364	bool IsRegCall = CC == llvm::CallingConv::X86_RegCall;
3365
3366	// If __attribute__((sysv_abi)) is in use, use the SysV argument
3367	// classification rules.
3368	if (CC == llvm::CallingConv::X86_64_SysV) {
3369	X86_64ABIInfo SysVABIInfo(CGT, AVXLevel);
3370	SysVABIInfo.computeInfo(FI);
3371	return;
3372	}
3373
3374	unsigned FreeSSERegs = `0`;
3375	if (IsVectorCall) {
3376	// We can use up to 4 SSE return registers with vectorcall.
3377	FreeSSERegs = `4`;
3378	} else if (IsRegCall) {
3379	// RegCall gives us 16 SSE registers.
3380	FreeSSERegs = `16`;
3381	}
3382
3383	if (!getCXXABI().classifyReturnType(FI))
3384	FI.getReturnInfo() = classify(Ty: FI.getReturnType(), FreeSSERegs, IsReturnType: true,
3385	IsVectorCall, IsRegCall);
3386
3387	if (IsVectorCall) {
3388	// We can use up to 6 SSE register parameters with vectorcall.
3389	FreeSSERegs = `6`;
3390	} else if (IsRegCall) {
3391	// RegCall gives us 16 SSE registers, we can reuse the return registers.
3392	FreeSSERegs = `16`;
3393	}
3394
3395	unsigned ArgNum = `0`;
3396	unsigned ZeroSSERegs = `0`;
3397	for (auto &I : FI.arguments()) {
3398	// Vectorcall in x64 only permits the first 6 arguments to be passed as
3399	// XMM/YMM registers. After the sixth argument, pretend no vector
3400	// registers are left.
3401	unsigned *MaybeFreeSSERegs =
3402	(IsVectorCall && ArgNum >= `6`) ? &ZeroSSERegs : &FreeSSERegs;
3403	I.info =
3404	classify(Ty: I.type, FreeSSERegs&: MaybeFreeSSERegs, IsReturnType: false*, IsVectorCall, IsRegCall);
3405	++ArgNum;
3406	}
3407
3408	if (IsVectorCall) {
3409	// For vectorcall, assign aggregate HVAs to any free vector registers in a
3410	// second pass.
3411	for (auto &I : FI.arguments())
3412	I.info = reclassifyHvaArgForVectorCall(Ty: I.type, FreeSSERegs, current: I.info);
3413	}
3414	}
3415
3416	RValue WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
3417	QualType Ty, AggValueSlot Slot) const {
3418	// MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
3419	// not 1, 2, 4, or 8 bytes, must be passed by reference."
3420	uint64_t Width = getContext().getTypeSize(T: Ty);
3421	bool IsIndirect = Width > `64` \|\| !llvm::isPowerOf2_64(Value: Width);
3422
3423	return emitVoidPtrVAArg(CGF, VAListAddr, ValueTy: Ty, IsIndirect,
3424	ValueInfo: CGF.getContext().getTypeInfoInChars(T: Ty),
3425	SlotSizeAndAlign: CharUnits::fromQuantity(Quantity: `8`),
3426	/allowHigherAlign/ AllowHigherAlign: false, Slot);
3427	}
3428
3429	std::unique_ptr<TargetCodeGenInfo> CodeGen::createX86_32TargetCodeGenInfo(
3430	CodeGenModule &CGM, bool DarwinVectorABI, bool Win32StructABI,
3431	unsigned NumRegisterParameters, bool SoftFloatABI) {
3432	bool RetSmallStructInRegABI = X86_32TargetCodeGenInfo::isStructReturnInRegABI(
3433	Triple: CGM.getTriple(), Opts: CGM.getCodeGenOpts());
3434	return std::make_unique<X86_32TargetCodeGenInfo>(
3435	args&: CGM.getTypes(), args&: DarwinVectorABI, args&: RetSmallStructInRegABI, args&: Win32StructABI,
3436	args&: NumRegisterParameters, args&: SoftFloatABI);
3437	}
3438
3439	std::unique_ptr<TargetCodeGenInfo> CodeGen::createWinX86_32TargetCodeGenInfo(
3440	CodeGenModule &CGM, bool DarwinVectorABI, bool Win32StructABI,
3441	unsigned NumRegisterParameters) {
3442	bool RetSmallStructInRegABI = X86_32TargetCodeGenInfo::isStructReturnInRegABI(
3443	Triple: CGM.getTriple(), Opts: CGM.getCodeGenOpts());
3444	return std::make_unique<WinX86_32TargetCodeGenInfo>(
3445	args&: CGM.getTypes(), args&: DarwinVectorABI, args&: RetSmallStructInRegABI, args&: Win32StructABI,
3446	args&: NumRegisterParameters);
3447	}
3448
3449	std::unique_ptr<TargetCodeGenInfo>
3450	CodeGen::createX86_64TargetCodeGenInfo(CodeGenModule &CGM,
3451	X86AVXABILevel AVXLevel) {
3452	return std::make_unique<X86_64TargetCodeGenInfo>(args&: CGM.getTypes(), args&: AVXLevel);
3453	}
3454
3455	std::unique_ptr<TargetCodeGenInfo>
3456	CodeGen::createWinX86_64TargetCodeGenInfo(CodeGenModule &CGM,
3457	X86AVXABILevel AVXLevel) {
3458	return std::make_unique<WinX86_64TargetCodeGenInfo>(args&: CGM.getTypes(), args&: AVXLevel);
3459	}
3460

Browse the source code of llvm_projects/clang/lib/CodeGen/Targets/X86.cpp