1//=== AArch64CallingConvention.cpp - AArch64 CC impl ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the table-generated and custom routines for the AArch64
10// Calling Convention.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64CallingConvention.h"
15#include "AArch64.h"
16#include "AArch64InstrInfo.h"
17#include "AArch64Subtarget.h"
18#include "llvm/CodeGen/CallingConvLower.h"
19#include "llvm/CodeGen/TargetInstrInfo.h"
20using namespace llvm;
21
22static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
23 AArch64::X3, AArch64::X4, AArch64::X5,
24 AArch64::X6, AArch64::X7};
25static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2,
26 AArch64::H3, AArch64::H4, AArch64::H5,
27 AArch64::H6, AArch64::H7};
28static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
29 AArch64::S3, AArch64::S4, AArch64::S5,
30 AArch64::S6, AArch64::S7};
31static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
32 AArch64::D3, AArch64::D4, AArch64::D5,
33 AArch64::D6, AArch64::D7};
34static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
35 AArch64::Q3, AArch64::Q4, AArch64::Q5,
36 AArch64::Q6, AArch64::Q7};
37static const MCPhysReg ZRegList[] = {AArch64::Z0, AArch64::Z1, AArch64::Z2,
38 AArch64::Z3, AArch64::Z4, AArch64::Z5,
39 AArch64::Z6, AArch64::Z7};
40static const MCPhysReg PRegList[] = {AArch64::P0, AArch64::P1, AArch64::P2,
41 AArch64::P3};
42
43static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
44 MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
45 CCState &State, Align SlotAlign) {
46 if (LocVT.isScalableVector()) {
47 const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
48 State.getMachineFunction().getSubtarget());
49 const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
50
51 // We are about to reinvoke the CCAssignFn auto-generated handler. If we
52 // don't unset these flags we will get stuck in an infinite loop forever
53 // invoking the custom handler.
54 ArgFlags.setInConsecutiveRegs(false);
55 ArgFlags.setInConsecutiveRegsLast(false);
56
57 // The calling convention for passing SVE tuples states that in the event
58 // we cannot allocate enough registers for the tuple we should still leave
59 // any remaining registers unallocated. However, when we call the
60 // CCAssignFn again we want it to behave as if all remaining registers are
61 // allocated. This will force the code to pass the tuple indirectly in
62 // accordance with the PCS.
63 bool ZRegsAllocated[8];
64 for (int I = 0; I < 8; I++) {
65 ZRegsAllocated[I] = State.isAllocated(Reg: ZRegList[I]);
66 State.AllocateReg(Reg: ZRegList[I]);
67 }
68 // The same applies to P registers.
69 bool PRegsAllocated[4];
70 for (int I = 0; I < 4; I++) {
71 PRegsAllocated[I] = State.isAllocated(Reg: PRegList[I]);
72 State.AllocateReg(Reg: PRegList[I]);
73 }
74
75 auto &It = PendingMembers[0];
76 CCAssignFn *AssignFn =
77 TLI->CCAssignFnForCall(CC: State.getCallingConv(), /*IsVarArg=*/false);
78 // FIXME: Get the correct original type.
79 Type *OrigTy = EVT(It.getValVT()).getTypeForEVT(Context&: State.getContext());
80 if (AssignFn(It.getValNo(), It.getValVT(), It.getValVT(), CCValAssign::Full,
81 ArgFlags, OrigTy, State))
82 llvm_unreachable("Call operand has unhandled type");
83
84 // Return the flags to how they were before.
85 ArgFlags.setInConsecutiveRegs(true);
86 ArgFlags.setInConsecutiveRegsLast(true);
87
88 // Return the register state back to how it was before, leaving any
89 // unallocated registers available for other smaller types.
90 for (int I = 0; I < 8; I++)
91 if (!ZRegsAllocated[I])
92 State.DeallocateReg(Reg: ZRegList[I]);
93 for (int I = 0; I < 4; I++)
94 if (!PRegsAllocated[I])
95 State.DeallocateReg(Reg: PRegList[I]);
96
97 // All pending members have now been allocated
98 PendingMembers.clear();
99 return true;
100 }
101
102 unsigned Size = LocVT.getSizeInBits() / 8;
103 for (auto &It : PendingMembers) {
104 It.convertToMem(Offset: State.AllocateStack(Size, Alignment: SlotAlign));
105 State.addLoc(V: It);
106 SlotAlign = Align(1);
107 }
108
109 // All pending members have now been allocated
110 PendingMembers.clear();
111 return true;
112}
113
114/// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An
115/// [N x Ty] type must still be contiguous in memory though.
116static bool CC_AArch64_Custom_Stack_Block(
117 unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
118 ISD::ArgFlagsTy &ArgFlags, CCState &State) {
119 SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
120
121 // Add the argument to the list to be allocated once we know the size of the
122 // block.
123 PendingMembers.push_back(
124 Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo));
125
126 if (!ArgFlags.isInConsecutiveRegsLast())
127 return true;
128
129 return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign: Align(8));
130}
131
132/// Given an [N x Ty] block, it should be passed in a consecutive sequence of
133/// registers. If no such sequence is available, mark the rest of the registers
134/// of that type as used and place the argument on the stack.
135static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
136 CCValAssign::LocInfo &LocInfo,
137 ISD::ArgFlagsTy &ArgFlags, CCState &State) {
138 const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
139 State.getMachineFunction().getSubtarget());
140 bool IsDarwinILP32 = Subtarget.isTargetILP32() && Subtarget.isTargetMachO();
141
142 // Try to allocate a contiguous block of registers, each of the correct
143 // size to hold one member.
144 ArrayRef<MCPhysReg> RegList;
145 if (LocVT.SimpleTy == MVT::i64 || (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32))
146 RegList = XRegList;
147 else if (LocVT.SimpleTy == MVT::f16 || LocVT.SimpleTy == MVT::bf16)
148 RegList = HRegList;
149 else if (LocVT.SimpleTy == MVT::f32 || LocVT.is32BitVector())
150 RegList = SRegList;
151 else if (LocVT.SimpleTy == MVT::f64 || LocVT.is64BitVector())
152 RegList = DRegList;
153 else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector())
154 RegList = QRegList;
155 else if (LocVT.isScalableVector()) {
156 // Scalable masks should be pass by Predicate registers.
157 if (LocVT == MVT::nxv1i1 || LocVT == MVT::nxv2i1 || LocVT == MVT::nxv4i1 ||
158 LocVT == MVT::nxv8i1 || LocVT == MVT::nxv16i1 ||
159 LocVT == MVT::aarch64svcount)
160 RegList = PRegList;
161 else
162 RegList = ZRegList;
163 } else {
164 // Not an array we want to split up after all.
165 return false;
166 }
167
168 SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
169
170 // Add the argument to the list to be allocated once we know the size of the
171 // block.
172 PendingMembers.push_back(
173 Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo));
174
175 if (!ArgFlags.isInConsecutiveRegsLast())
176 return true;
177
178 // [N x i32] arguments get packed into x-registers on Darwin's arm64_32
179 // because that's how the armv7k Clang front-end emits small structs.
180 unsigned EltsPerReg = (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32) ? 2 : 1;
181 ArrayRef<MCPhysReg> RegResult = State.AllocateRegBlock(
182 Regs: RegList, RegsRequired: alignTo(Value: PendingMembers.size(), Align: EltsPerReg) / EltsPerReg);
183 if (!RegResult.empty() && EltsPerReg == 1) {
184 for (const auto &[It, Reg] : zip(t&: PendingMembers, u&: RegResult)) {
185 It.convertToReg(Reg);
186 State.addLoc(V: It);
187 }
188 PendingMembers.clear();
189 return true;
190 } else if (!RegResult.empty()) {
191 assert(EltsPerReg == 2 && "unexpected ABI");
192 bool UseHigh = false;
193 CCValAssign::LocInfo Info;
194 unsigned RegIdx = 0;
195 for (auto &It : PendingMembers) {
196 Info = UseHigh ? CCValAssign::AExtUpper : CCValAssign::ZExt;
197 State.addLoc(V: CCValAssign::getReg(ValNo: It.getValNo(), ValVT: MVT::i32,
198 Reg: RegResult[RegIdx], LocVT: MVT::i64, HTP: Info));
199 UseHigh = !UseHigh;
200 if (!UseHigh)
201 ++RegIdx;
202 }
203 PendingMembers.clear();
204 return true;
205 }
206
207 if (!LocVT.isScalableVector()) {
208 // Mark all regs in the class as unavailable
209 for (auto Reg : RegList)
210 State.AllocateReg(Reg);
211 }
212
213 const MaybeAlign StackAlign =
214 State.getMachineFunction().getDataLayout().getStackAlignment();
215 assert(StackAlign && "data layout string is missing stack alignment");
216 const Align MemAlign = ArgFlags.getNonZeroMemAlign();
217 Align SlotAlign = std::min(a: MemAlign, b: *StackAlign);
218 if (!Subtarget.isTargetDarwin())
219 SlotAlign = std::max(a: SlotAlign, b: Align(8));
220
221 return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
222}
223
224// TableGen provides definitions of the calling convention analysis entry
225// points.
226#include "AArch64GenCallingConv.inc"
227