1//===- AArch64.cpp --------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ABIInfoImpl.h"
10#include "TargetInfo.h"
11#include "clang/AST/Decl.h"
12#include "clang/Basic/DiagnosticFrontend.h"
13#include "llvm/TargetParser/AArch64TargetParser.h"
14
15using namespace clang;
16using namespace clang::CodeGen;
17
18//===----------------------------------------------------------------------===//
19// AArch64 ABI Implementation
20//===----------------------------------------------------------------------===//
21
22namespace {
23
24class AArch64ABIInfo : public ABIInfo {
25 AArch64ABIKind Kind;
26
27 std::unique_ptr<TargetCodeGenInfo> WinX86_64CodegenInfo;
28
29public:
30 AArch64ABIInfo(CodeGenModule &CGM, AArch64ABIKind Kind)
31 : ABIInfo(CGM.getTypes()), Kind(Kind) {
32 if (getTarget().getTriple().isWindowsArm64EC()) {
33 WinX86_64CodegenInfo =
34 createWinX86_64TargetCodeGenInfo(CGM, AVXLevel: X86AVXABILevel::None);
35 }
36 }
37
38 bool isSoftFloat() const { return Kind == AArch64ABIKind::AAPCSSoft; }
39
40private:
41 AArch64ABIKind getABIKind() const { return Kind; }
42 bool isDarwinPCS() const { return Kind == AArch64ABIKind::DarwinPCS; }
43
44 ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadicFn) const;
45 ABIArgInfo classifyArgumentType(QualType RetTy, bool IsVariadicFn,
46 bool IsNamedArg, unsigned CallingConvention,
47 unsigned &NSRN, unsigned &NPRN) const;
48 llvm::Type *convertFixedToScalableVectorType(const VectorType *VT) const;
49 ABIArgInfo coerceIllegalVector(QualType Ty, unsigned &NSRN,
50 unsigned &NPRN) const;
51 ABIArgInfo coerceAndExpandPureScalableAggregate(
52 QualType Ty, bool IsNamedArg, unsigned NVec, unsigned NPred,
53 const SmallVectorImpl<llvm::Type *> &UnpaddedCoerceToSeq, unsigned &NSRN,
54 unsigned &NPRN) const;
55 bool isHomogeneousAggregateBaseType(QualType Ty) const override;
56 bool isHomogeneousAggregateSmallEnough(const Type *Ty,
57 uint64_t Members) const override;
58 bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const override;
59
60 bool isIllegalVectorType(QualType Ty) const;
61
62 bool passAsAggregateType(QualType Ty) const;
63 bool passAsPureScalableType(QualType Ty, unsigned &NV, unsigned &NP,
64 SmallVectorImpl<llvm::Type *> &CoerceToSeq) const;
65
66 void flattenType(llvm::Type *Ty,
67 SmallVectorImpl<llvm::Type *> &Flattened) const;
68
69 void computeInfo(CGFunctionInfo &FI) const override {
70 if (!::classifyReturnType(CXXABI: getCXXABI(), FI, Info: *this))
71 FI.getReturnInfo() =
72 classifyReturnType(RetTy: FI.getReturnType(), IsVariadicFn: FI.isVariadic());
73
74 unsigned ArgNo = 0;
75 unsigned NSRN = 0, NPRN = 0;
76 for (auto &it : FI.arguments()) {
77 const bool IsNamedArg =
78 !FI.isVariadic() || ArgNo < FI.getRequiredArgs().getNumRequiredArgs();
79 ++ArgNo;
80 it.info = classifyArgumentType(RetTy: it.type, IsVariadicFn: FI.isVariadic(), IsNamedArg,
81 CallingConvention: FI.getCallingConvention(), NSRN, NPRN);
82 }
83 }
84
85 RValue EmitDarwinVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF,
86 AggValueSlot Slot) const;
87
88 RValue EmitAAPCSVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF,
89 AArch64ABIKind Kind, AggValueSlot Slot) const;
90
91 RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
92 AggValueSlot Slot) const override {
93 llvm::Type *BaseTy = CGF.ConvertType(T: Ty);
94 if (isa<llvm::ScalableVectorType>(Val: BaseTy))
95 llvm::report_fatal_error(reason: "Passing SVE types to variadic functions is "
96 "currently not supported");
97
98 return Kind == AArch64ABIKind::Win64
99 ? EmitMSVAArg(CGF, VAListAddr, Ty, Slot)
100 : isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF, Slot)
101 : EmitAAPCSVAArg(VAListAddr, Ty, CGF, Kind, Slot);
102 }
103
104 RValue EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
105 AggValueSlot Slot) const override;
106
107 bool allowBFloatArgsAndRet() const override {
108 return getTarget().hasBFloat16Type();
109 }
110
111 using ABIInfo::appendAttributeMangling;
112 void appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index,
113 raw_ostream &Out) const override;
114 void appendAttributeMangling(StringRef AttrStr,
115 raw_ostream &Out) const override;
116};
117
118class AArch64SwiftABIInfo : public SwiftABIInfo {
119public:
120 explicit AArch64SwiftABIInfo(CodeGenTypes &CGT)
121 : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/true) {}
122
123 bool isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy,
124 unsigned NumElts) const override;
125};
126
127class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
128public:
129 AArch64TargetCodeGenInfo(CodeGenModule &CGM, AArch64ABIKind Kind)
130 : TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(args&: CGM, args&: Kind)) {
131 SwiftInfo = std::make_unique<AArch64SwiftABIInfo>(args&: CGM.getTypes());
132 }
133
134 StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
135 return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue";
136 }
137
138 int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
139 return 31;
140 }
141
142 bool doesReturnSlotInterfereWithArgs() const override { return false; }
143
144 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
145 CodeGen::CodeGenModule &CGM) const override {
146 auto *Fn = dyn_cast<llvm::Function>(Val: GV);
147 if (!Fn)
148 return;
149
150 const auto *FD = dyn_cast_or_null<FunctionDecl>(Val: D);
151 TargetInfo::BranchProtectionInfo BPI(CGM.getLangOpts());
152
153 if (FD && FD->hasAttr<TargetAttr>()) {
154 const auto *TA = FD->getAttr<TargetAttr>();
155 ParsedTargetAttr Attr =
156 CGM.getTarget().parseTargetAttr(Str: TA->getFeaturesStr());
157 if (!Attr.BranchProtection.empty()) {
158 StringRef Error;
159 (void)CGM.getTarget().validateBranchProtection(
160 Spec: Attr.BranchProtection, Arch: Attr.CPU, BPI, LO: CGM.getLangOpts(), Err&: Error);
161 assert(Error.empty());
162 }
163 }
164 setBranchProtectionFnAttributes(BPI, F&: *Fn);
165 setPointerAuthFnAttributes(Opts: CGM.getCodeGenOpts().PointerAuth, F&: *Fn);
166 }
167
168 bool isScalarizableAsmOperand(CodeGen::CodeGenFunction &CGF,
169 llvm::Type *Ty) const override {
170 if (CGF.getTarget().hasFeature(Feature: "ls64")) {
171 auto *ST = dyn_cast<llvm::StructType>(Val: Ty);
172 if (ST && ST->getNumElements() == 1) {
173 auto *AT = dyn_cast<llvm::ArrayType>(Val: ST->getElementType(N: 0));
174 if (AT && AT->getNumElements() == 8 &&
175 AT->getElementType()->isIntegerTy(BitWidth: 64))
176 return true;
177 }
178 }
179 return TargetCodeGenInfo::isScalarizableAsmOperand(CGF, Ty);
180 }
181
182 void checkFunctionABI(CodeGenModule &CGM,
183 const FunctionDecl *Decl) const override;
184
185 void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
186 const FunctionDecl *Caller,
187 const FunctionDecl *Callee, const CallArgList &Args,
188 QualType ReturnType) const override;
189
190 bool wouldInliningViolateFunctionCallABI(
191 const FunctionDecl *Caller, const FunctionDecl *Callee) const override;
192
193private:
194 // Diagnose calls between functions with incompatible Streaming SVE
195 // attributes.
196 void checkFunctionCallABIStreaming(CodeGenModule &CGM, SourceLocation CallLoc,
197 const FunctionDecl *Caller,
198 const FunctionDecl *Callee) const;
199 // Diagnose calls which must pass arguments in floating-point registers when
200 // the selected target does not have floating-point registers.
201 void checkFunctionCallABISoftFloat(CodeGenModule &CGM, SourceLocation CallLoc,
202 const FunctionDecl *Caller,
203 const FunctionDecl *Callee,
204 const CallArgList &Args,
205 QualType ReturnType) const;
206};
207
208class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo {
209public:
210 WindowsAArch64TargetCodeGenInfo(CodeGenModule &CGM, AArch64ABIKind K)
211 : AArch64TargetCodeGenInfo(CGM, K) {}
212
213 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
214 CodeGen::CodeGenModule &CGM) const override;
215
216 void getDependentLibraryOption(llvm::StringRef Lib,
217 llvm::SmallString<24> &Opt) const override {
218 Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib);
219 }
220
221 void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value,
222 llvm::SmallString<32> &Opt) const override {
223 Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
224 }
225};
226
227void WindowsAArch64TargetCodeGenInfo::setTargetAttributes(
228 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
229 AArch64TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
230 if (GV->isDeclaration())
231 return;
232 addStackProbeTargetAttributes(D, GV, CGM);
233}
234}
235
236llvm::Type *
237AArch64ABIInfo::convertFixedToScalableVectorType(const VectorType *VT) const {
238 assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
239
240 if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
241 assert(VT->getElementType()->castAs<BuiltinType>()->getKind() ==
242 BuiltinType::UChar &&
243 "unexpected builtin type for SVE predicate!");
244 return llvm::ScalableVectorType::get(ElementType: llvm::Type::getInt1Ty(C&: getVMContext()),
245 MinNumElts: 16);
246 }
247
248 if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
249 const auto *BT = VT->getElementType()->castAs<BuiltinType>();
250 switch (BT->getKind()) {
251 default:
252 llvm_unreachable("unexpected builtin type for SVE vector!");
253
254 case BuiltinType::SChar:
255 case BuiltinType::UChar:
256 case BuiltinType::MFloat8:
257 return llvm::ScalableVectorType::get(
258 ElementType: llvm::Type::getInt8Ty(C&: getVMContext()), MinNumElts: 16);
259
260 case BuiltinType::Short:
261 case BuiltinType::UShort:
262 return llvm::ScalableVectorType::get(
263 ElementType: llvm::Type::getInt16Ty(C&: getVMContext()), MinNumElts: 8);
264
265 case BuiltinType::Int:
266 case BuiltinType::UInt:
267 return llvm::ScalableVectorType::get(
268 ElementType: llvm::Type::getInt32Ty(C&: getVMContext()), MinNumElts: 4);
269
270 case BuiltinType::Long:
271 case BuiltinType::ULong:
272 return llvm::ScalableVectorType::get(
273 ElementType: llvm::Type::getInt64Ty(C&: getVMContext()), MinNumElts: 2);
274
275 case BuiltinType::Half:
276 return llvm::ScalableVectorType::get(
277 ElementType: llvm::Type::getHalfTy(C&: getVMContext()), MinNumElts: 8);
278
279 case BuiltinType::Float:
280 return llvm::ScalableVectorType::get(
281 ElementType: llvm::Type::getFloatTy(C&: getVMContext()), MinNumElts: 4);
282
283 case BuiltinType::Double:
284 return llvm::ScalableVectorType::get(
285 ElementType: llvm::Type::getDoubleTy(C&: getVMContext()), MinNumElts: 2);
286
287 case BuiltinType::BFloat16:
288 return llvm::ScalableVectorType::get(
289 ElementType: llvm::Type::getBFloatTy(C&: getVMContext()), MinNumElts: 8);
290 }
291 }
292
293 llvm_unreachable("expected fixed-length SVE vector");
294}
295
296ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty, unsigned &NSRN,
297 unsigned &NPRN) const {
298 assert(Ty->isVectorType() && "expected vector type!");
299
300 const auto *VT = Ty->castAs<VectorType>();
301 if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
302 assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
303 assert(VT->getElementType()->castAs<BuiltinType>()->getKind() ==
304 BuiltinType::UChar &&
305 "unexpected builtin type for SVE predicate!");
306 NPRN = std::min(a: NPRN + 1, b: 4u);
307 return ABIArgInfo::getDirect(T: llvm::ScalableVectorType::get(
308 ElementType: llvm::Type::getInt1Ty(C&: getVMContext()), MinNumElts: 16));
309 }
310
311 if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
312 NSRN = std::min(a: NSRN + 1, b: 8u);
313 return ABIArgInfo::getDirect(T: convertFixedToScalableVectorType(VT));
314 }
315
316 uint64_t Size = getContext().getTypeSize(T: Ty);
317 // Android promotes <2 x i8> to i16, not i32
318 if ((isAndroid() || isOHOSFamily()) && (Size <= 16)) {
319 llvm::Type *ResType = llvm::Type::getInt16Ty(C&: getVMContext());
320 return ABIArgInfo::getDirect(T: ResType);
321 }
322 if (Size <= 32) {
323 llvm::Type *ResType = llvm::Type::getInt32Ty(C&: getVMContext());
324 return ABIArgInfo::getDirect(T: ResType);
325 }
326 if (Size == 64) {
327 NSRN = std::min(a: NSRN + 1, b: 8u);
328 auto *ResType =
329 llvm::FixedVectorType::get(ElementType: llvm::Type::getInt32Ty(C&: getVMContext()), NumElts: 2);
330 return ABIArgInfo::getDirect(T: ResType);
331 }
332 if (Size == 128) {
333 NSRN = std::min(a: NSRN + 1, b: 8u);
334 auto *ResType =
335 llvm::FixedVectorType::get(ElementType: llvm::Type::getInt32Ty(C&: getVMContext()), NumElts: 4);
336 return ABIArgInfo::getDirect(T: ResType);
337 }
338
339 return getNaturalAlignIndirect(Ty, AddrSpace: getDataLayout().getAllocaAddrSpace(),
340 /*ByVal=*/false);
341}
342
343ABIArgInfo AArch64ABIInfo::coerceAndExpandPureScalableAggregate(
344 QualType Ty, bool IsNamedArg, unsigned NVec, unsigned NPred,
345 const SmallVectorImpl<llvm::Type *> &UnpaddedCoerceToSeq, unsigned &NSRN,
346 unsigned &NPRN) const {
347 if (!IsNamedArg || NSRN + NVec > 8 || NPRN + NPred > 4)
348 return getNaturalAlignIndirect(Ty, AddrSpace: getDataLayout().getAllocaAddrSpace(),
349 /*ByVal=*/false);
350 NSRN += NVec;
351 NPRN += NPred;
352
353 // Handle SVE vector tuples.
354 if (Ty->isSVESizelessBuiltinType())
355 return ABIArgInfo::getDirect();
356
357 llvm::Type *UnpaddedCoerceToType =
358 UnpaddedCoerceToSeq.size() == 1
359 ? UnpaddedCoerceToSeq[0]
360 : llvm::StructType::get(Context&: CGT.getLLVMContext(), Elements: UnpaddedCoerceToSeq,
361 isPacked: true);
362
363 SmallVector<llvm::Type *> CoerceToSeq;
364 flattenType(Ty: CGT.ConvertType(T: Ty), Flattened&: CoerceToSeq);
365 auto *CoerceToType =
366 llvm::StructType::get(Context&: CGT.getLLVMContext(), Elements: CoerceToSeq, isPacked: false);
367
368 return ABIArgInfo::getCoerceAndExpand(coerceToType: CoerceToType, unpaddedCoerceToType: UnpaddedCoerceToType);
369}
370
371ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn,
372 bool IsNamedArg,
373 unsigned CallingConvention,
374 unsigned &NSRN,
375 unsigned &NPRN) const {
376 Ty = useFirstFieldIfTransparentUnion(Ty);
377
378 if (IsVariadicFn && getTarget().getTriple().isWindowsArm64EC()) {
379 // Arm64EC varargs functions use the x86_64 classification rules,
380 // not the AArch64 ABI rules.
381 return WinX86_64CodegenInfo->getABIInfo().classifyArgForArm64ECVarArg(Ty);
382 }
383
384 // Handle illegal vector types here.
385 if (isIllegalVectorType(Ty))
386 return coerceIllegalVector(Ty, NSRN, NPRN);
387
388 if (!passAsAggregateType(Ty)) {
389 // Treat an enum type as its underlying type.
390 if (const auto *ED = Ty->getAsEnumDecl())
391 Ty = ED->getIntegerType();
392
393 if (const auto *EIT = Ty->getAs<BitIntType>())
394 if (EIT->getNumBits() > 128)
395 return getNaturalAlignIndirect(Ty, AddrSpace: getDataLayout().getAllocaAddrSpace(),
396 ByVal: false);
397
398 if (Ty->isVectorType())
399 NSRN = std::min(a: NSRN + 1, b: 8u);
400 else if (const auto *BT = Ty->getAs<BuiltinType>()) {
401 if (BT->isFloatingPoint())
402 NSRN = std::min(a: NSRN + 1, b: 8u);
403 else {
404 switch (BT->getKind()) {
405 case BuiltinType::SveBool:
406 case BuiltinType::SveCount:
407 NPRN = std::min(a: NPRN + 1, b: 4u);
408 break;
409 case BuiltinType::SveBoolx2:
410 NPRN = std::min(a: NPRN + 2, b: 4u);
411 break;
412 case BuiltinType::SveBoolx4:
413 NPRN = std::min(a: NPRN + 4, b: 4u);
414 break;
415 case BuiltinType::MFloat8:
416 NSRN = std::min(a: NSRN + 1, b: 8u);
417 break;
418 default:
419 if (BT->isSVESizelessBuiltinType())
420 NSRN = std::min(
421 a: NSRN + getContext().getBuiltinVectorTypeInfo(VecTy: BT).NumVectors,
422 b: 8u);
423 }
424 }
425 }
426
427 return (isPromotableIntegerTypeForABI(Ty) && isDarwinPCS()
428 ? ABIArgInfo::getExtend(Ty, T: CGT.ConvertType(T: Ty))
429 : ABIArgInfo::getDirect());
430 }
431
432 // Structures with either a non-trivial destructor or a non-trivial
433 // copy constructor are always indirect.
434 if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(T: Ty, CXXABI&: getCXXABI())) {
435 return getNaturalAlignIndirect(
436 Ty, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(),
437 /*ByVal=*/RAA == CGCXXABI::RAA_DirectInMemory);
438 }
439
440 // Empty records:
441 // AAPCS64 does not say that empty records are ignored as arguments,
442 // but other compilers do so in certain situations, and we copy that behavior.
443 // Those situations are in fact language-mode-specific, which seems really
444 // unfortunate, but it's something we just have to accept. If this doesn't
445 // apply, just fall through to the standard argument-handling path.
446 // Darwin overrides the psABI here to ignore all empty records in all modes.
447 uint64_t Size = getContext().getTypeSize(T: Ty);
448 bool IsEmpty = isEmptyRecord(Context&: getContext(), T: Ty, AllowArrays: true);
449 if (!Ty->isSVESizelessBuiltinType() && (IsEmpty || Size == 0)) {
450 // Empty records are ignored in C mode, and in C++ on Darwin.
451 if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS())
452 return ABIArgInfo::getIgnore();
453
454 // In C++ mode, arguments which have sizeof() == 0 (which are non-standard
455 // C++) are ignored. This isn't defined by any standard, so we copy GCC's
456 // behaviour here.
457 if (Size == 0)
458 return ABIArgInfo::getIgnore();
459 }
460
461 // Homogeneous Floating-point Aggregates (HFAs) need to be expanded.
462 const Type *Base = nullptr;
463 uint64_t Members = 0;
464 bool IsWin64 = Kind == AArch64ABIKind::Win64 ||
465 CallingConvention == llvm::CallingConv::Win64;
466 bool IsWinVariadic = IsWin64 && IsVariadicFn;
467 // In variadic functions on Windows, all composite types are treated alike,
468 // no special handling of HFAs/HVAs.
469 if (!IsWinVariadic && isHomogeneousAggregate(Ty, Base, Members)) {
470 NSRN = std::min(a: NSRN + Members, b: uint64_t(8));
471 if (Kind != AArch64ABIKind::AAPCS)
472 return ABIArgInfo::getDirect(
473 T: llvm::ArrayType::get(ElementType: CGT.ConvertType(T: QualType(Base, 0)), NumElements: Members));
474
475 // For HFAs/HVAs, cap the argument alignment to 16, otherwise
476 // set it to 8 according to the AAPCS64 document.
477 unsigned Align =
478 getContext().getTypeUnadjustedAlignInChars(T: Ty).getQuantity();
479 Align = (Align >= 16) ? 16 : 8;
480 return ABIArgInfo::getDirect(
481 T: llvm::ArrayType::get(ElementType: CGT.ConvertType(T: QualType(Base, 0)), NumElements: Members), Offset: 0,
482 Padding: nullptr, CanBeFlattened: true, Align);
483 }
484
485 // In AAPCS named arguments of a Pure Scalable Type are passed expanded in
486 // registers, or indirectly if there are not enough registers.
487 if (Kind == AArch64ABIKind::AAPCS) {
488 unsigned NVec = 0, NPred = 0;
489 SmallVector<llvm::Type *> UnpaddedCoerceToSeq;
490 if (passAsPureScalableType(Ty, NV&: NVec, NP&: NPred, CoerceToSeq&: UnpaddedCoerceToSeq) &&
491 (NVec + NPred) > 0)
492 return coerceAndExpandPureScalableAggregate(
493 Ty, IsNamedArg, NVec, NPred, UnpaddedCoerceToSeq, NSRN, NPRN);
494 }
495
496 // Aggregates <= 16 bytes are passed directly in registers or on the stack.
497 if (Size <= 128) {
498 unsigned Alignment;
499 if (Kind == AArch64ABIKind::AAPCS) {
500 Alignment = getContext().getTypeUnadjustedAlign(T: Ty);
501 Alignment = Alignment < 128 ? 64 : 128;
502 } else {
503 Alignment =
504 std::max(a: getContext().getTypeAlign(T: Ty),
505 b: (unsigned)getTarget().getPointerWidth(AddrSpace: LangAS::Default));
506 }
507 Size = llvm::alignTo(Value: Size, Align: Alignment);
508
509 // If the Aggregate is made up of pointers, use an array of pointers for the
510 // coerced type. This prevents having to convert ptr2int->int2ptr through
511 // the call, allowing alias analysis to produce better code.
512 auto ContainsOnlyPointers = [&](const auto &Self, QualType Ty) {
513 if (isEmptyRecord(Context&: getContext(), T: Ty, AllowArrays: true))
514 return false;
515 const auto *RD = Ty->getAsRecordDecl();
516 if (!RD)
517 return false;
518 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(Val: RD)) {
519 for (const auto &I : CXXRD->bases())
520 if (!Self(Self, I.getType()))
521 return false;
522 }
523 return all_of(RD->fields(), [&](FieldDecl *FD) {
524 QualType FDTy = FD->getType();
525 if (FDTy->isArrayType())
526 FDTy = getContext().getBaseElementType(QT: FDTy);
527 return (FDTy->isPointerOrReferenceType() &&
528 getContext().getTypeSize(T: FDTy) == 64 &&
529 !FDTy->getPointeeType().hasAddressSpace()) ||
530 Self(Self, FDTy);
531 });
532 };
533
534 // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
535 // For aggregates with 16-byte alignment, we use i128.
536 llvm::Type *BaseTy = llvm::Type::getIntNTy(C&: getVMContext(), N: Alignment);
537 if ((Size == 64 || Size == 128) && Alignment == 64 &&
538 ContainsOnlyPointers(ContainsOnlyPointers, Ty))
539 BaseTy = llvm::PointerType::getUnqual(C&: getVMContext());
540 return ABIArgInfo::getDirect(
541 T: Size == Alignment ? BaseTy
542 : llvm::ArrayType::get(ElementType: BaseTy, NumElements: Size / Alignment));
543 }
544
545 return getNaturalAlignIndirect(Ty, AddrSpace: getDataLayout().getAllocaAddrSpace(),
546 /*ByVal=*/false);
547}
548
549ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
550 bool IsVariadicFn) const {
551 if (RetTy->isVoidType())
552 return ABIArgInfo::getIgnore();
553
554 if (const auto *VT = RetTy->getAs<VectorType>()) {
555 if (VT->getVectorKind() == VectorKind::SveFixedLengthData ||
556 VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
557 unsigned NSRN = 0, NPRN = 0;
558 return coerceIllegalVector(Ty: RetTy, NSRN, NPRN);
559 }
560 }
561
562 // Large vector types should be returned via memory.
563 if (RetTy->isVectorType() && getContext().getTypeSize(T: RetTy) > 128)
564 return getNaturalAlignIndirect(Ty: RetTy, AddrSpace: getDataLayout().getAllocaAddrSpace());
565
566 if (!passAsAggregateType(Ty: RetTy)) {
567 // Treat an enum type as its underlying type.
568 if (const auto *ED = RetTy->getAsEnumDecl())
569 RetTy = ED->getIntegerType();
570
571 if (const auto *EIT = RetTy->getAs<BitIntType>())
572 if (EIT->getNumBits() > 128)
573 return getNaturalAlignIndirect(Ty: RetTy,
574 AddrSpace: getDataLayout().getAllocaAddrSpace());
575
576 return (isPromotableIntegerTypeForABI(Ty: RetTy) && isDarwinPCS()
577 ? ABIArgInfo::getExtend(Ty: RetTy)
578 : ABIArgInfo::getDirect());
579 }
580
581 uint64_t Size = getContext().getTypeSize(T: RetTy);
582 if (!RetTy->isSVESizelessBuiltinType() &&
583 (isEmptyRecord(Context&: getContext(), T: RetTy, AllowArrays: true) || Size == 0))
584 return ABIArgInfo::getIgnore();
585
586 const Type *Base = nullptr;
587 uint64_t Members = 0;
588 if (isHomogeneousAggregate(Ty: RetTy, Base, Members) &&
589 !(getTarget().getTriple().getArch() == llvm::Triple::aarch64_32 &&
590 IsVariadicFn))
591 // Homogeneous Floating-point Aggregates (HFAs) are returned directly.
592 return ABIArgInfo::getDirect();
593
594 // In AAPCS return values of a Pure Scalable type are treated as a single
595 // named argument and passed expanded in registers, or indirectly if there are
596 // not enough registers.
597 if (Kind == AArch64ABIKind::AAPCS) {
598 unsigned NSRN = 0, NPRN = 0;
599 unsigned NVec = 0, NPred = 0;
600 SmallVector<llvm::Type *> UnpaddedCoerceToSeq;
601 if (passAsPureScalableType(Ty: RetTy, NV&: NVec, NP&: NPred, CoerceToSeq&: UnpaddedCoerceToSeq) &&
602 (NVec + NPred) > 0)
603 return coerceAndExpandPureScalableAggregate(
604 Ty: RetTy, /* IsNamedArg */ true, NVec, NPred, UnpaddedCoerceToSeq, NSRN,
605 NPRN);
606 }
607
608 // Aggregates <= 16 bytes are returned directly in registers or on the stack.
609 if (Size <= 128) {
610 if (Size <= 64 && getDataLayout().isLittleEndian()) {
611 // Composite types are returned in lower bits of a 64-bit register for LE,
612 // and in higher bits for BE. However, integer types are always returned
613 // in lower bits for both LE and BE, and they are not rounded up to
614 // 64-bits. We can skip rounding up of composite types for LE, but not for
615 // BE, otherwise composite types will be indistinguishable from integer
616 // types.
617 return ABIArgInfo::getDirect(
618 T: llvm::IntegerType::get(C&: getVMContext(), NumBits: Size));
619 }
620
621 unsigned Alignment = getContext().getTypeAlign(T: RetTy);
622 Size = llvm::alignTo(Value: Size, Align: 64); // round up to multiple of 8 bytes
623
624 // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
625 // For aggregates with 16-byte alignment, we use i128.
626 if (Alignment < 128 && Size == 128) {
627 llvm::Type *BaseTy = llvm::Type::getInt64Ty(C&: getVMContext());
628 return ABIArgInfo::getDirect(T: llvm::ArrayType::get(ElementType: BaseTy, NumElements: Size / 64));
629 }
630 return ABIArgInfo::getDirect(T: llvm::IntegerType::get(C&: getVMContext(), NumBits: Size));
631 }
632
633 return getNaturalAlignIndirect(Ty: RetTy, AddrSpace: getDataLayout().getAllocaAddrSpace());
634}
635
636/// isIllegalVectorType - check whether the vector type is legal for AArch64.
637bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
638 if (const VectorType *VT = Ty->getAs<VectorType>()) {
639 // Check whether VT is a fixed-length SVE vector. These types are
640 // represented as scalable vectors in function args/return and must be
641 // coerced from fixed vectors.
642 if (VT->getVectorKind() == VectorKind::SveFixedLengthData ||
643 VT->getVectorKind() == VectorKind::SveFixedLengthPredicate)
644 return true;
645
646 // Check whether VT is legal.
647 unsigned NumElements = VT->getNumElements();
648 uint64_t Size = getContext().getTypeSize(T: VT);
649 // NumElements should be power of 2.
650 if (!llvm::isPowerOf2_32(Value: NumElements))
651 return true;
652
653 // arm64_32 has to be compatible with the ARM logic here, which allows huge
654 // vectors for some reason.
655 llvm::Triple Triple = getTarget().getTriple();
656 if (Triple.getArch() == llvm::Triple::aarch64_32 &&
657 Triple.isOSBinFormatMachO())
658 return Size <= 32;
659
660 return Size != 64 && (Size != 128 || NumElements == 1);
661 }
662 return false;
663}
664
665bool AArch64SwiftABIInfo::isLegalVectorType(CharUnits VectorSize,
666 llvm::Type *EltTy,
667 unsigned NumElts) const {
668 if (!llvm::isPowerOf2_32(Value: NumElts))
669 return false;
670 if (VectorSize.getQuantity() != 8 &&
671 (VectorSize.getQuantity() != 16 || NumElts == 1))
672 return false;
673 return true;
674}
675
676bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
677 // For the soft-float ABI variant, no types are considered to be homogeneous
678 // aggregates.
679 if (isSoftFloat())
680 return false;
681
682 // Homogeneous aggregates for AAPCS64 must have base types of a floating
683 // point type or a short-vector type. This is the same as the 32-bit ABI,
684 // but with the difference that any floating-point type is allowed,
685 // including __fp16.
686 if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
687 if (BT->isFloatingPoint())
688 return true;
689 } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
690 if (auto Kind = VT->getVectorKind();
691 Kind == VectorKind::SveFixedLengthData ||
692 Kind == VectorKind::SveFixedLengthPredicate)
693 return false;
694
695 unsigned VecSize = getContext().getTypeSize(T: VT);
696 if (VecSize == 64 || VecSize == 128)
697 return true;
698 }
699 return false;
700}
701
702bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
703 uint64_t Members) const {
704 return Members <= 4;
705}
706
707bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate()
708 const {
709 // AAPCS64 says that the rule for whether something is a homogeneous
710 // aggregate is applied to the output of the data layout decision. So
711 // anything that doesn't affect the data layout also does not affect
712 // homogeneity. In particular, zero-length bitfields don't stop a struct
713 // being homogeneous.
714 return true;
715}
716
717bool AArch64ABIInfo::passAsAggregateType(QualType Ty) const {
718 if (Kind == AArch64ABIKind::AAPCS && Ty->isSVESizelessBuiltinType()) {
719 const auto *BT = Ty->castAs<BuiltinType>();
720 return !BT->isSVECount() &&
721 getContext().getBuiltinVectorTypeInfo(VecTy: BT).NumVectors > 1;
722 }
723 return isAggregateTypeForABI(T: Ty);
724}
725
726// Check if a type needs to be passed in registers as a Pure Scalable Type (as
727// defined by AAPCS64). Return the number of data vectors and the number of
728// predicate vectors in the type, into `NVec` and `NPred`, respectively. Upon
729// return `CoerceToSeq` contains an expanded sequence of LLVM IR types, one
730// element for each non-composite member. For practical purposes, limit the
731// length of `CoerceToSeq` to about 12 (the maximum that could possibly fit
732// in registers) and return false, the effect of which will be to pass the
733// argument under the rules for a large (> 128 bytes) composite.
734bool AArch64ABIInfo::passAsPureScalableType(
735 QualType Ty, unsigned &NVec, unsigned &NPred,
736 SmallVectorImpl<llvm::Type *> &CoerceToSeq) const {
737 if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(T: Ty)) {
738 uint64_t NElt = AT->getZExtSize();
739 if (NElt == 0)
740 return false;
741
742 unsigned NV = 0, NP = 0;
743 SmallVector<llvm::Type *> EltCoerceToSeq;
744 if (!passAsPureScalableType(Ty: AT->getElementType(), NVec&: NV, NPred&: NP, CoerceToSeq&: EltCoerceToSeq))
745 return false;
746
747 if (CoerceToSeq.size() + NElt * EltCoerceToSeq.size() > 12)
748 return false;
749
750 for (uint64_t I = 0; I < NElt; ++I)
751 llvm::append_range(C&: CoerceToSeq, R&: EltCoerceToSeq);
752
753 NVec += NElt * NV;
754 NPred += NElt * NP;
755 return true;
756 }
757
758 if (const RecordType *RT = Ty->getAsCanonical<RecordType>()) {
759 // If the record cannot be passed in registers, then it's not a PST.
760 if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, CXXABI&: getCXXABI());
761 RAA != CGCXXABI::RAA_Default)
762 return false;
763
764 // Pure scalable types are never unions and never contain unions.
765 const RecordDecl *RD = RT->getDecl()->getDefinitionOrSelf();
766 if (RD->isUnion())
767 return false;
768
769 // If this is a C++ record, check the bases.
770 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(Val: RD)) {
771 for (const auto &I : CXXRD->bases()) {
772 if (isEmptyRecord(Context&: getContext(), T: I.getType(), AllowArrays: true))
773 continue;
774 if (!passAsPureScalableType(Ty: I.getType(), NVec, NPred, CoerceToSeq))
775 return false;
776 }
777 }
778
779 // Check members.
780 for (const auto *FD : RD->fields()) {
781 QualType FT = FD->getType();
782 if (isEmptyField(Context&: getContext(), FD, /* AllowArrays */ true))
783 continue;
784 if (!passAsPureScalableType(Ty: FT, NVec, NPred, CoerceToSeq))
785 return false;
786 }
787
788 return true;
789 }
790
791 if (const auto *VT = Ty->getAs<VectorType>()) {
792 if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
793 ++NPred;
794 if (CoerceToSeq.size() + 1 > 12)
795 return false;
796 CoerceToSeq.push_back(Elt: convertFixedToScalableVectorType(VT));
797 return true;
798 }
799
800 if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
801 ++NVec;
802 if (CoerceToSeq.size() + 1 > 12)
803 return false;
804 CoerceToSeq.push_back(Elt: convertFixedToScalableVectorType(VT));
805 return true;
806 }
807
808 return false;
809 }
810
811 if (!Ty->isBuiltinType())
812 return false;
813
814 bool isPredicate;
815 switch (Ty->castAs<BuiltinType>()->getKind()) {
816#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \
817 case BuiltinType::Id: \
818 isPredicate = false; \
819 break;
820#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId) \
821 case BuiltinType::Id: \
822 isPredicate = true; \
823 break;
824#include "clang/Basic/AArch64ACLETypes.def"
825 default:
826 return false;
827 }
828
829 ASTContext::BuiltinVectorTypeInfo Info =
830 getContext().getBuiltinVectorTypeInfo(VecTy: cast<BuiltinType>(Val&: Ty));
831 assert(Info.NumVectors > 0 && Info.NumVectors <= 4 &&
832 "Expected 1, 2, 3 or 4 vectors!");
833 if (isPredicate)
834 NPred += Info.NumVectors;
835 else
836 NVec += Info.NumVectors;
837 llvm::Type *EltTy = Info.ElementType->isMFloat8Type()
838 ? llvm::Type::getInt8Ty(C&: getVMContext())
839 : CGT.ConvertType(T: Info.ElementType);
840 auto *VTy = llvm::ScalableVectorType::get(ElementType: EltTy, MinNumElts: Info.EC.getKnownMinValue());
841
842 if (CoerceToSeq.size() + Info.NumVectors > 12)
843 return false;
844 std::fill_n(first: std::back_inserter(x&: CoerceToSeq), n: Info.NumVectors, value: VTy);
845
846 return true;
847}
848
849// Expand an LLVM IR type into a sequence with a element for each non-struct,
850// non-array member of the type, with the exception of the padding types, which
851// are retained.
852void AArch64ABIInfo::flattenType(
853 llvm::Type *Ty, SmallVectorImpl<llvm::Type *> &Flattened) const {
854
855 if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType: Ty)) {
856 Flattened.push_back(Elt: Ty);
857 return;
858 }
859
860 if (const auto *AT = dyn_cast<llvm::ArrayType>(Val: Ty)) {
861 uint64_t NElt = AT->getNumElements();
862 if (NElt == 0)
863 return;
864
865 SmallVector<llvm::Type *> EltFlattened;
866 flattenType(Ty: AT->getElementType(), Flattened&: EltFlattened);
867
868 for (uint64_t I = 0; I < NElt; ++I)
869 llvm::append_range(C&: Flattened, R&: EltFlattened);
870 return;
871 }
872
873 if (const auto *ST = dyn_cast<llvm::StructType>(Val: Ty)) {
874 for (auto *ET : ST->elements())
875 flattenType(Ty: ET, Flattened);
876 return;
877 }
878
879 Flattened.push_back(Elt: Ty);
880}
881
882RValue AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
883 CodeGenFunction &CGF, AArch64ABIKind Kind,
884 AggValueSlot Slot) const {
885 // These numbers are not used for variadic arguments, hence it doesn't matter
886 // they don't retain their values across multiple calls to
887 // `classifyArgumentType` here.
888 unsigned NSRN = 0, NPRN = 0;
889 ABIArgInfo AI =
890 classifyArgumentType(Ty, /*IsVariadicFn=*/true, /* IsNamedArg */ false,
891 CallingConvention: CGF.CurFnInfo->getCallingConvention(), NSRN, NPRN);
892 // Empty records are ignored for parameter passing purposes.
893 if (AI.isIgnore())
894 return Slot.asRValue();
895
896 bool IsIndirect = AI.isIndirect();
897
898 llvm::Type *BaseTy = CGF.ConvertType(T: Ty);
899 if (IsIndirect)
900 BaseTy = llvm::PointerType::getUnqual(C&: BaseTy->getContext());
901 else if (AI.getCoerceToType())
902 BaseTy = AI.getCoerceToType();
903
904 unsigned NumRegs = 1;
905 if (llvm::ArrayType *ArrTy = dyn_cast<llvm::ArrayType>(Val: BaseTy)) {
906 BaseTy = ArrTy->getElementType();
907 NumRegs = ArrTy->getNumElements();
908 }
909 bool IsFPR =
910 !isSoftFloat() && (BaseTy->isFloatingPointTy() || BaseTy->isVectorTy());
911
912 // The AArch64 va_list type and handling is specified in the Procedure Call
913 // Standard, section B.4:
914 //
915 // struct {
916 // void *__stack;
917 // void *__gr_top;
918 // void *__vr_top;
919 // int __gr_offs;
920 // int __vr_offs;
921 // };
922
923 llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock(name: "vaarg.maybe_reg");
924 llvm::BasicBlock *InRegBlock = CGF.createBasicBlock(name: "vaarg.in_reg");
925 llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock(name: "vaarg.on_stack");
926 llvm::BasicBlock *ContBlock = CGF.createBasicBlock(name: "vaarg.end");
927
928 CharUnits TySize = getContext().getTypeSizeInChars(T: Ty);
929 CharUnits TyAlign = getContext().getTypeUnadjustedAlignInChars(T: Ty);
930
931 Address reg_offs_p = Address::invalid();
932 llvm::Value *reg_offs = nullptr;
933 int reg_top_index;
934 int RegSize = IsIndirect ? 8 : TySize.getQuantity();
935 if (!IsFPR) {
936 // 3 is the field number of __gr_offs
937 reg_offs_p = CGF.Builder.CreateStructGEP(Addr: VAListAddr, Index: 3, Name: "gr_offs_p");
938 reg_offs = CGF.Builder.CreateLoad(Addr: reg_offs_p, Name: "gr_offs");
939 reg_top_index = 1; // field number for __gr_top
940 RegSize = llvm::alignTo(Value: RegSize, Align: 8);
941 } else {
942 // 4 is the field number of __vr_offs.
943 reg_offs_p = CGF.Builder.CreateStructGEP(Addr: VAListAddr, Index: 4, Name: "vr_offs_p");
944 reg_offs = CGF.Builder.CreateLoad(Addr: reg_offs_p, Name: "vr_offs");
945 reg_top_index = 2; // field number for __vr_top
946 RegSize = 16 * NumRegs;
947 }
948
949 //=======================================
950 // Find out where argument was passed
951 //=======================================
952
953 // If reg_offs >= 0 we're already using the stack for this type of
954 // argument. We don't want to keep updating reg_offs (in case it overflows,
955 // though anyone passing 2GB of arguments, each at most 16 bytes, deserves
956 // whatever they get).
957 llvm::Value *UsingStack = nullptr;
958 UsingStack = CGF.Builder.CreateICmpSGE(
959 LHS: reg_offs, RHS: llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: 0));
960
961 CGF.Builder.CreateCondBr(Cond: UsingStack, True: OnStackBlock, False: MaybeRegBlock);
962
963 // Otherwise, at least some kind of argument could go in these registers, the
964 // question is whether this particular type is too big.
965 CGF.EmitBlock(BB: MaybeRegBlock);
966
967 // Integer arguments may need to correct register alignment (for example a
968 // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
969 // align __gr_offs to calculate the potential address.
970 if (!IsFPR && !IsIndirect && TyAlign.getQuantity() > 8) {
971 int Align = TyAlign.getQuantity();
972
973 reg_offs = CGF.Builder.CreateAdd(
974 LHS: reg_offs, RHS: llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: Align - 1),
975 Name: "align_regoffs");
976 reg_offs = CGF.Builder.CreateAnd(
977 LHS: reg_offs, RHS: llvm::ConstantInt::getSigned(Ty: CGF.Int32Ty, V: -Align),
978 Name: "aligned_regoffs");
979 }
980
981 // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list.
982 // The fact that this is done unconditionally reflects the fact that
983 // allocating an argument to the stack also uses up all the remaining
984 // registers of the appropriate kind.
985 llvm::Value *NewOffset = nullptr;
986 NewOffset = CGF.Builder.CreateAdd(
987 LHS: reg_offs, RHS: llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: RegSize), Name: "new_reg_offs");
988 CGF.Builder.CreateStore(Val: NewOffset, Addr: reg_offs_p);
989
990 // Now we're in a position to decide whether this argument really was in
991 // registers or not.
992 llvm::Value *InRegs = nullptr;
993 InRegs = CGF.Builder.CreateICmpSLE(
994 LHS: NewOffset, RHS: llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: 0), Name: "inreg");
995
996 CGF.Builder.CreateCondBr(Cond: InRegs, True: InRegBlock, False: OnStackBlock);
997
998 //=======================================
999 // Argument was in registers
1000 //=======================================
1001
1002 // Now we emit the code for if the argument was originally passed in
1003 // registers. First start the appropriate block:
1004 CGF.EmitBlock(BB: InRegBlock);
1005
1006 llvm::Value *reg_top = nullptr;
1007 Address reg_top_p =
1008 CGF.Builder.CreateStructGEP(Addr: VAListAddr, Index: reg_top_index, Name: "reg_top_p");
1009 reg_top = CGF.Builder.CreateLoad(Addr: reg_top_p, Name: "reg_top");
1010 Address BaseAddr(CGF.Builder.CreateInBoundsGEP(Ty: CGF.Int8Ty, Ptr: reg_top, IdxList: reg_offs),
1011 CGF.Int8Ty, CharUnits::fromQuantity(Quantity: IsFPR ? 16 : 8));
1012 Address RegAddr = Address::invalid();
1013 llvm::Type *MemTy = CGF.ConvertTypeForMem(T: Ty), *ElementTy = MemTy;
1014
1015 if (IsIndirect) {
1016 // If it's been passed indirectly (actually a struct), whatever we find from
1017 // stored registers or on the stack will actually be a struct **.
1018 MemTy = llvm::PointerType::getUnqual(C&: MemTy->getContext());
1019 }
1020
1021 const Type *Base = nullptr;
1022 uint64_t NumMembers = 0;
1023 bool IsHFA = isHomogeneousAggregate(Ty, Base, Members&: NumMembers);
1024 if (IsHFA && NumMembers > 1) {
1025 // Homogeneous aggregates passed in registers will have their elements split
1026 // and stored 16-bytes apart regardless of size (they're notionally in qN,
1027 // qN+1, ...). We reload and store into a temporary local variable
1028 // contiguously.
1029 assert(!IsIndirect && "Homogeneous aggregates should be passed directly");
1030 auto BaseTyInfo = getContext().getTypeInfoInChars(T: QualType(Base, 0));
1031 llvm::Type *BaseTy = CGF.ConvertType(T: QualType(Base, 0));
1032 llvm::Type *HFATy = llvm::ArrayType::get(ElementType: BaseTy, NumElements: NumMembers);
1033 Address Tmp = CGF.CreateTempAlloca(Ty: HFATy,
1034 align: std::max(a: TyAlign, b: BaseTyInfo.Align));
1035
1036 // On big-endian platforms, the value will be right-aligned in its slot.
1037 int Offset = 0;
1038 if (CGF.CGM.getDataLayout().isBigEndian() &&
1039 BaseTyInfo.Width.getQuantity() < 16)
1040 Offset = 16 - BaseTyInfo.Width.getQuantity();
1041
1042 for (unsigned i = 0; i < NumMembers; ++i) {
1043 CharUnits BaseOffset = CharUnits::fromQuantity(Quantity: 16 * i + Offset);
1044 Address LoadAddr =
1045 CGF.Builder.CreateConstInBoundsByteGEP(Addr: BaseAddr, Offset: BaseOffset);
1046 LoadAddr = LoadAddr.withElementType(ElemTy: BaseTy);
1047
1048 Address StoreAddr = CGF.Builder.CreateConstArrayGEP(Addr: Tmp, Index: i);
1049
1050 llvm::Value *Elem = CGF.Builder.CreateLoad(Addr: LoadAddr);
1051 CGF.Builder.CreateStore(Val: Elem, Addr: StoreAddr);
1052 }
1053
1054 RegAddr = Tmp.withElementType(ElemTy: MemTy);
1055 } else {
1056 // Otherwise the object is contiguous in memory.
1057
1058 // It might be right-aligned in its slot.
1059 CharUnits SlotSize = BaseAddr.getAlignment();
1060 if (CGF.CGM.getDataLayout().isBigEndian() && !IsIndirect &&
1061 (IsHFA || !isAggregateTypeForABI(T: Ty)) &&
1062 TySize < SlotSize) {
1063 CharUnits Offset = SlotSize - TySize;
1064 BaseAddr = CGF.Builder.CreateConstInBoundsByteGEP(Addr: BaseAddr, Offset);
1065 }
1066
1067 RegAddr = BaseAddr.withElementType(ElemTy: MemTy);
1068 }
1069
1070 CGF.EmitBranch(Block: ContBlock);
1071
1072 //=======================================
1073 // Argument was on the stack
1074 //=======================================
1075 CGF.EmitBlock(BB: OnStackBlock);
1076
1077 Address stack_p = CGF.Builder.CreateStructGEP(Addr: VAListAddr, Index: 0, Name: "stack_p");
1078 llvm::Value *OnStackPtr = CGF.Builder.CreateLoad(Addr: stack_p, Name: "stack");
1079
1080 // Again, stack arguments may need realignment. In this case both integer and
1081 // floating-point ones might be affected.
1082 if (!IsIndirect && TyAlign.getQuantity() > 8) {
1083 OnStackPtr = emitRoundPointerUpToAlignment(CGF, Ptr: OnStackPtr, Align: TyAlign);
1084 }
1085 Address OnStackAddr = Address(OnStackPtr, CGF.Int8Ty,
1086 std::max(a: CharUnits::fromQuantity(Quantity: 8), b: TyAlign));
1087
1088 // All stack slots are multiples of 8 bytes.
1089 CharUnits StackSlotSize = CharUnits::fromQuantity(Quantity: 8);
1090 CharUnits StackSize;
1091 if (IsIndirect)
1092 StackSize = StackSlotSize;
1093 else
1094 StackSize = TySize.alignTo(Align: StackSlotSize);
1095
1096 llvm::Value *StackSizeC = CGF.Builder.getSize(N: StackSize);
1097 llvm::Value *NewStack = CGF.Builder.CreateInBoundsGEP(
1098 Ty: CGF.Int8Ty, Ptr: OnStackPtr, IdxList: StackSizeC, Name: "new_stack");
1099
1100 // Write the new value of __stack for the next call to va_arg
1101 CGF.Builder.CreateStore(Val: NewStack, Addr: stack_p);
1102
1103 if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(T: Ty) &&
1104 TySize < StackSlotSize) {
1105 CharUnits Offset = StackSlotSize - TySize;
1106 OnStackAddr = CGF.Builder.CreateConstInBoundsByteGEP(Addr: OnStackAddr, Offset);
1107 }
1108
1109 OnStackAddr = OnStackAddr.withElementType(ElemTy: MemTy);
1110
1111 CGF.EmitBranch(Block: ContBlock);
1112
1113 //=======================================
1114 // Tidy up
1115 //=======================================
1116 CGF.EmitBlock(BB: ContBlock);
1117
1118 Address ResAddr = emitMergePHI(CGF, Addr1: RegAddr, Block1: InRegBlock, Addr2: OnStackAddr,
1119 Block2: OnStackBlock, Name: "vaargs.addr");
1120
1121 if (IsIndirect)
1122 return CGF.EmitLoadOfAnyValue(
1123 V: CGF.MakeAddrLValue(
1124 Addr: Address(CGF.Builder.CreateLoad(Addr: ResAddr, Name: "vaarg.addr"), ElementTy,
1125 TyAlign),
1126 T: Ty),
1127 Slot);
1128
1129 return CGF.EmitLoadOfAnyValue(V: CGF.MakeAddrLValue(Addr: ResAddr, T: Ty), Slot);
1130}
1131
1132RValue AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
1133 CodeGenFunction &CGF,
1134 AggValueSlot Slot) const {
1135 // The backend's lowering doesn't support va_arg for aggregates or
1136 // illegal vector types. Lower VAArg here for these cases and use
1137 // the LLVM va_arg instruction for everything else.
1138 if (!isAggregateTypeForABI(T: Ty) && !isIllegalVectorType(Ty))
1139 return CGF.EmitLoadOfAnyValue(
1140 V: CGF.MakeAddrLValue(
1141 Addr: EmitVAArgInstr(CGF, VAListAddr, Ty, AI: ABIArgInfo::getDirect()), T: Ty),
1142 Slot);
1143
1144 uint64_t PointerSize = getTarget().getPointerWidth(AddrSpace: LangAS::Default) / 8;
1145 CharUnits SlotSize = CharUnits::fromQuantity(Quantity: PointerSize);
1146
1147 // Empty records are ignored for parameter passing purposes.
1148 if (isEmptyRecord(Context&: getContext(), T: Ty, AllowArrays: true))
1149 return Slot.asRValue();
1150
1151 // The size of the actual thing passed, which might end up just
1152 // being a pointer for indirect types.
1153 auto TyInfo = getContext().getTypeInfoInChars(T: Ty);
1154
1155 // Arguments bigger than 16 bytes which aren't homogeneous
1156 // aggregates should be passed indirectly.
1157 bool IsIndirect = false;
1158 if (TyInfo.Width.getQuantity() > 16) {
1159 const Type *Base = nullptr;
1160 uint64_t Members = 0;
1161 IsIndirect = !isHomogeneousAggregate(Ty, Base, Members);
1162 }
1163
1164 return emitVoidPtrVAArg(CGF, VAListAddr, ValueTy: Ty, IsIndirect, ValueInfo: TyInfo, SlotSizeAndAlign: SlotSize,
1165 /*AllowHigherAlign*/ true, Slot);
1166}
1167
1168RValue AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
1169 QualType Ty, AggValueSlot Slot) const {
1170 bool IsIndirect = false;
1171
1172 if (getTarget().getTriple().isWindowsArm64EC()) {
1173 // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
1174 // not 1, 2, 4, or 8 bytes, must be passed by reference."
1175 uint64_t Width = getContext().getTypeSize(T: Ty);
1176 IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Value: Width);
1177 } else {
1178 // Composites larger than 16 bytes are passed by reference.
1179 if (isAggregateTypeForABI(T: Ty) && getContext().getTypeSize(T: Ty) > 128)
1180 IsIndirect = true;
1181 }
1182
1183 return emitVoidPtrVAArg(CGF, VAListAddr, ValueTy: Ty, IsIndirect,
1184 ValueInfo: CGF.getContext().getTypeInfoInChars(T: Ty),
1185 SlotSizeAndAlign: CharUnits::fromQuantity(Quantity: 8),
1186 /*allowHigherAlign*/ AllowHigherAlign: false, Slot);
1187}
1188
1189static bool isStreamingCompatible(const FunctionDecl *F) {
1190 if (const auto *T = F->getType()->getAs<FunctionProtoType>())
1191 return T->getAArch64SMEAttributes() &
1192 FunctionType::SME_PStateSMCompatibleMask;
1193 return false;
1194}
1195
1196// Report an error if an argument or return value of type Ty would need to be
1197// passed in a floating-point register.
1198static void diagnoseIfNeedsFPReg(DiagnosticsEngine &Diags,
1199 const StringRef ABIName,
1200 const AArch64ABIInfo &ABIInfo,
1201 const QualType &Ty, const NamedDecl *D,
1202 SourceLocation loc) {
1203 const Type *HABase = nullptr;
1204 uint64_t HAMembers = 0;
1205 if (Ty->isFloatingType() || Ty->isVectorType() ||
1206 ABIInfo.isHomogeneousAggregate(Ty, Base&: HABase, Members&: HAMembers)) {
1207 Diags.Report(Loc: loc, DiagID: diag::err_target_unsupported_type_for_abi)
1208 << D->getDeclName() << Ty << ABIName;
1209 }
1210}
1211
1212// If we are using a hard-float ABI, but do not have floating point registers,
1213// then report an error for any function arguments or returns which would be
1214// passed in floating-pint registers.
1215void AArch64TargetCodeGenInfo::checkFunctionABI(
1216 CodeGenModule &CGM, const FunctionDecl *FuncDecl) const {
1217 const AArch64ABIInfo &ABIInfo = getABIInfo<AArch64ABIInfo>();
1218 const TargetInfo &TI = ABIInfo.getContext().getTargetInfo();
1219
1220 if (!TI.hasFeature(Feature: "fp") && !ABIInfo.isSoftFloat()) {
1221 diagnoseIfNeedsFPReg(Diags&: CGM.getDiags(), ABIName: TI.getABI(), ABIInfo,
1222 Ty: FuncDecl->getReturnType(), D: FuncDecl,
1223 loc: FuncDecl->getLocation());
1224 for (ParmVarDecl *PVD : FuncDecl->parameters()) {
1225 diagnoseIfNeedsFPReg(Diags&: CGM.getDiags(), ABIName: TI.getABI(), ABIInfo, Ty: PVD->getType(),
1226 D: PVD, loc: FuncDecl->getLocation());
1227 }
1228 }
1229}
1230
1231enum class ArmSMEInlinability : uint8_t {
1232 Ok = 0,
1233 ErrorCalleeRequiresNewZA = 1 << 0,
1234 ErrorCalleeRequiresNewZT0 = 1 << 1,
1235 WarnIncompatibleStreamingModes = 1 << 2,
1236 ErrorIncompatibleStreamingModes = 1 << 3,
1237
1238 IncompatibleStreamingModes =
1239 WarnIncompatibleStreamingModes | ErrorIncompatibleStreamingModes,
1240
1241 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/ErrorIncompatibleStreamingModes),
1242};
1243
1244/// Determines if there are any Arm SME ABI issues with inlining \p Callee into
1245/// \p Caller. Returns the issue (if any) in the ArmSMEInlinability bit enum.
1246static ArmSMEInlinability GetArmSMEInlinability(const FunctionDecl *Caller,
1247 const FunctionDecl *Callee) {
1248 bool CallerIsStreaming =
1249 IsArmStreamingFunction(FD: Caller, /*IncludeLocallyStreaming=*/true);
1250 bool CalleeIsStreaming =
1251 IsArmStreamingFunction(FD: Callee, /*IncludeLocallyStreaming=*/true);
1252 bool CallerIsStreamingCompatible = isStreamingCompatible(F: Caller);
1253 bool CalleeIsStreamingCompatible = isStreamingCompatible(F: Callee);
1254
1255 ArmSMEInlinability Inlinability = ArmSMEInlinability::Ok;
1256
1257 if (!CalleeIsStreamingCompatible &&
1258 (CallerIsStreaming != CalleeIsStreaming || CallerIsStreamingCompatible)) {
1259 if (CalleeIsStreaming)
1260 Inlinability |= ArmSMEInlinability::ErrorIncompatibleStreamingModes;
1261 else
1262 Inlinability |= ArmSMEInlinability::WarnIncompatibleStreamingModes;
1263 }
1264 if (auto *NewAttr = Callee->getAttr<ArmNewAttr>()) {
1265 if (NewAttr->isNewZA())
1266 Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZA;
1267 if (NewAttr->isNewZT0())
1268 Inlinability |= ArmSMEInlinability::ErrorCalleeRequiresNewZT0;
1269 }
1270
1271 return Inlinability;
1272}
1273
1274void AArch64TargetCodeGenInfo::checkFunctionCallABIStreaming(
1275 CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
1276 const FunctionDecl *Callee) const {
1277 if (!Caller || !Callee || !Callee->hasAttr<AlwaysInlineAttr>())
1278 return;
1279
1280 ArmSMEInlinability Inlinability = GetArmSMEInlinability(Caller, Callee);
1281
1282 if ((Inlinability & ArmSMEInlinability::IncompatibleStreamingModes) !=
1283 ArmSMEInlinability::Ok)
1284 CGM.getDiags().Report(
1285 Loc: CallLoc,
1286 DiagID: (Inlinability & ArmSMEInlinability::ErrorIncompatibleStreamingModes) ==
1287 ArmSMEInlinability::ErrorIncompatibleStreamingModes
1288 ? diag::err_function_always_inline_attribute_mismatch
1289 : diag::warn_function_always_inline_attribute_mismatch)
1290 << Caller->getDeclName() << Callee->getDeclName() << "streaming";
1291
1292 if ((Inlinability & ArmSMEInlinability::ErrorCalleeRequiresNewZA) ==
1293 ArmSMEInlinability::ErrorCalleeRequiresNewZA)
1294 CGM.getDiags().Report(Loc: CallLoc, DiagID: diag::err_function_always_inline_new_za)
1295 << Callee->getDeclName();
1296
1297 if ((Inlinability & ArmSMEInlinability::ErrorCalleeRequiresNewZT0) ==
1298 ArmSMEInlinability::ErrorCalleeRequiresNewZT0)
1299 CGM.getDiags().Report(Loc: CallLoc, DiagID: diag::err_function_always_inline_new_zt0)
1300 << Callee->getDeclName();
1301}
1302
1303// If the target does not have floating-point registers, but we are using a
1304// hard-float ABI, there is no way to pass floating-point, vector or HFA values
1305// to functions, so we report an error.
1306void AArch64TargetCodeGenInfo::checkFunctionCallABISoftFloat(
1307 CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
1308 const FunctionDecl *Callee, const CallArgList &Args,
1309 QualType ReturnType) const {
1310 const AArch64ABIInfo &ABIInfo = getABIInfo<AArch64ABIInfo>();
1311 const TargetInfo &TI = ABIInfo.getContext().getTargetInfo();
1312
1313 if (!Caller || TI.hasFeature(Feature: "fp") || ABIInfo.isSoftFloat())
1314 return;
1315
1316 diagnoseIfNeedsFPReg(Diags&: CGM.getDiags(), ABIName: TI.getABI(), ABIInfo, Ty: ReturnType,
1317 D: Callee ? Callee : Caller, loc: CallLoc);
1318
1319 for (const CallArg &Arg : Args)
1320 diagnoseIfNeedsFPReg(Diags&: CGM.getDiags(), ABIName: TI.getABI(), ABIInfo, Ty: Arg.getType(),
1321 D: Callee ? Callee : Caller, loc: CallLoc);
1322}
1323
1324void AArch64TargetCodeGenInfo::checkFunctionCallABI(CodeGenModule &CGM,
1325 SourceLocation CallLoc,
1326 const FunctionDecl *Caller,
1327 const FunctionDecl *Callee,
1328 const CallArgList &Args,
1329 QualType ReturnType) const {
1330 checkFunctionCallABIStreaming(CGM, CallLoc, Caller, Callee);
1331 checkFunctionCallABISoftFloat(CGM, CallLoc, Caller, Callee, Args, ReturnType);
1332}
1333
1334bool AArch64TargetCodeGenInfo::wouldInliningViolateFunctionCallABI(
1335 const FunctionDecl *Caller, const FunctionDecl *Callee) const {
1336 return Caller && Callee &&
1337 GetArmSMEInlinability(Caller, Callee) != ArmSMEInlinability::Ok;
1338}
1339
1340void AArch64ABIInfo::appendAttributeMangling(TargetClonesAttr *Attr,
1341 unsigned Index,
1342 raw_ostream &Out) const {
1343 appendAttributeMangling(AttrStr: Attr->getFeatureStr(Index), Out);
1344}
1345
1346void AArch64ABIInfo::appendAttributeMangling(StringRef AttrStr,
1347 raw_ostream &Out) const {
1348 if (AttrStr == "default") {
1349 Out << ".default";
1350 return;
1351 }
1352
1353 Out << "._";
1354 SmallVector<StringRef, 8> Features;
1355 AttrStr.split(A&: Features, Separator: "+");
1356 for (auto &Feat : Features)
1357 Feat = Feat.trim();
1358
1359 llvm::sort(C&: Features, Comp: [](const StringRef LHS, const StringRef RHS) {
1360 return LHS.compare(RHS) < 0;
1361 });
1362
1363 llvm::SmallDenseSet<StringRef, 8> UniqueFeats;
1364 for (auto &Feat : Features)
1365 if (getTarget().doesFeatureAffectCodeGen(Feature: Feat))
1366 if (auto Ext = llvm::AArch64::parseFMVExtension(Extension: Feat))
1367 if (UniqueFeats.insert(V: Ext->Name).second)
1368 Out << 'M' << Ext->Name;
1369}
1370
1371std::unique_ptr<TargetCodeGenInfo>
1372CodeGen::createAArch64TargetCodeGenInfo(CodeGenModule &CGM,
1373 AArch64ABIKind Kind) {
1374 return std::make_unique<AArch64TargetCodeGenInfo>(args&: CGM, args&: Kind);
1375}
1376
1377std::unique_ptr<TargetCodeGenInfo>
1378CodeGen::createWindowsAArch64TargetCodeGenInfo(CodeGenModule &CGM,
1379 AArch64ABIKind K) {
1380 return std::make_unique<WindowsAArch64TargetCodeGenInfo>(args&: CGM, args&: K);
1381}
1382