1 | //=== AArch64CallingConvention.cpp - AArch64 CC impl ------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the table-generated and custom routines for the AArch64 |
10 | // Calling Convention. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "AArch64CallingConvention.h" |
15 | #include "AArch64.h" |
16 | #include "AArch64InstrInfo.h" |
17 | #include "AArch64Subtarget.h" |
18 | #include "llvm/CodeGen/CallingConvLower.h" |
19 | #include "llvm/CodeGen/TargetInstrInfo.h" |
20 | #include "llvm/IR/CallingConv.h" |
21 | using namespace llvm; |
22 | |
23 | static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2, |
24 | AArch64::X3, AArch64::X4, AArch64::X5, |
25 | AArch64::X6, AArch64::X7}; |
26 | static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2, |
27 | AArch64::H3, AArch64::H4, AArch64::H5, |
28 | AArch64::H6, AArch64::H7}; |
29 | static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2, |
30 | AArch64::S3, AArch64::S4, AArch64::S5, |
31 | AArch64::S6, AArch64::S7}; |
32 | static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2, |
33 | AArch64::D3, AArch64::D4, AArch64::D5, |
34 | AArch64::D6, AArch64::D7}; |
35 | static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2, |
36 | AArch64::Q3, AArch64::Q4, AArch64::Q5, |
37 | AArch64::Q6, AArch64::Q7}; |
38 | static const MCPhysReg ZRegList[] = {AArch64::Z0, AArch64::Z1, AArch64::Z2, |
39 | AArch64::Z3, AArch64::Z4, AArch64::Z5, |
40 | AArch64::Z6, AArch64::Z7}; |
41 | static const MCPhysReg PRegList[] = {AArch64::P0, AArch64::P1, AArch64::P2, |
42 | AArch64::P3}; |
43 | |
44 | static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers, |
45 | MVT LocVT, ISD::ArgFlagsTy &ArgFlags, |
46 | CCState &State, Align SlotAlign) { |
47 | if (LocVT.isScalableVector()) { |
48 | const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>( |
49 | State.getMachineFunction().getSubtarget()); |
50 | const AArch64TargetLowering *TLI = Subtarget.getTargetLowering(); |
51 | |
52 | // We are about to reinvoke the CCAssignFn auto-generated handler. If we |
53 | // don't unset these flags we will get stuck in an infinite loop forever |
54 | // invoking the custom handler. |
55 | ArgFlags.setInConsecutiveRegs(false); |
56 | ArgFlags.setInConsecutiveRegsLast(false); |
57 | |
58 | // The calling convention for passing SVE tuples states that in the event |
59 | // we cannot allocate enough registers for the tuple we should still leave |
60 | // any remaining registers unallocated. However, when we call the |
61 | // CCAssignFn again we want it to behave as if all remaining registers are |
62 | // allocated. This will force the code to pass the tuple indirectly in |
63 | // accordance with the PCS. |
64 | bool ZRegsAllocated[8]; |
65 | for (int I = 0; I < 8; I++) { |
66 | ZRegsAllocated[I] = State.isAllocated(Reg: ZRegList[I]); |
67 | State.AllocateReg(Reg: ZRegList[I]); |
68 | } |
69 | // The same applies to P registers. |
70 | bool PRegsAllocated[4]; |
71 | for (int I = 0; I < 4; I++) { |
72 | PRegsAllocated[I] = State.isAllocated(Reg: PRegList[I]); |
73 | State.AllocateReg(Reg: PRegList[I]); |
74 | } |
75 | |
76 | auto &It = PendingMembers[0]; |
77 | CCAssignFn *AssignFn = |
78 | TLI->CCAssignFnForCall(CC: State.getCallingConv(), /*IsVarArg=*/false); |
79 | if (AssignFn(It.getValNo(), It.getValVT(), It.getValVT(), CCValAssign::Full, |
80 | ArgFlags, State)) |
81 | llvm_unreachable("Call operand has unhandled type" ); |
82 | |
83 | // Return the flags to how they were before. |
84 | ArgFlags.setInConsecutiveRegs(true); |
85 | ArgFlags.setInConsecutiveRegsLast(true); |
86 | |
87 | // Return the register state back to how it was before, leaving any |
88 | // unallocated registers available for other smaller types. |
89 | for (int I = 0; I < 8; I++) |
90 | if (!ZRegsAllocated[I]) |
91 | State.DeallocateReg(Reg: ZRegList[I]); |
92 | for (int I = 0; I < 4; I++) |
93 | if (!PRegsAllocated[I]) |
94 | State.DeallocateReg(Reg: PRegList[I]); |
95 | |
96 | // All pending members have now been allocated |
97 | PendingMembers.clear(); |
98 | return true; |
99 | } |
100 | |
101 | unsigned Size = LocVT.getSizeInBits() / 8; |
102 | for (auto &It : PendingMembers) { |
103 | It.convertToMem(Offset: State.AllocateStack(Size, Alignment: SlotAlign)); |
104 | State.addLoc(V: It); |
105 | SlotAlign = Align(1); |
106 | } |
107 | |
108 | // All pending members have now been allocated |
109 | PendingMembers.clear(); |
110 | return true; |
111 | } |
112 | |
113 | /// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An |
114 | /// [N x Ty] type must still be contiguous in memory though. |
115 | static bool CC_AArch64_Custom_Stack_Block( |
116 | unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, |
117 | ISD::ArgFlagsTy &ArgFlags, CCState &State) { |
118 | SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); |
119 | |
120 | // Add the argument to the list to be allocated once we know the size of the |
121 | // block. |
122 | PendingMembers.push_back( |
123 | Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo)); |
124 | |
125 | if (!ArgFlags.isInConsecutiveRegsLast()) |
126 | return true; |
127 | |
128 | return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign: Align(8)); |
129 | } |
130 | |
131 | /// Given an [N x Ty] block, it should be passed in a consecutive sequence of |
132 | /// registers. If no such sequence is available, mark the rest of the registers |
133 | /// of that type as used and place the argument on the stack. |
134 | static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, |
135 | CCValAssign::LocInfo &LocInfo, |
136 | ISD::ArgFlagsTy &ArgFlags, CCState &State) { |
137 | const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>( |
138 | State.getMachineFunction().getSubtarget()); |
139 | bool IsDarwinILP32 = Subtarget.isTargetILP32() && Subtarget.isTargetMachO(); |
140 | |
141 | // Try to allocate a contiguous block of registers, each of the correct |
142 | // size to hold one member. |
143 | ArrayRef<MCPhysReg> RegList; |
144 | if (LocVT.SimpleTy == MVT::i64 || (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32)) |
145 | RegList = XRegList; |
146 | else if (LocVT.SimpleTy == MVT::f16) |
147 | RegList = HRegList; |
148 | else if (LocVT.SimpleTy == MVT::f32 || LocVT.is32BitVector()) |
149 | RegList = SRegList; |
150 | else if (LocVT.SimpleTy == MVT::f64 || LocVT.is64BitVector()) |
151 | RegList = DRegList; |
152 | else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector()) |
153 | RegList = QRegList; |
154 | else if (LocVT.isScalableVector()) { |
155 | // Scalable masks should be pass by Predicate registers. |
156 | if (LocVT == MVT::nxv1i1 || LocVT == MVT::nxv2i1 || LocVT == MVT::nxv4i1 || |
157 | LocVT == MVT::nxv8i1 || LocVT == MVT::nxv16i1 || |
158 | LocVT == MVT::aarch64svcount) |
159 | RegList = PRegList; |
160 | else |
161 | RegList = ZRegList; |
162 | } else { |
163 | // Not an array we want to split up after all. |
164 | return false; |
165 | } |
166 | |
167 | SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); |
168 | |
169 | // Add the argument to the list to be allocated once we know the size of the |
170 | // block. |
171 | PendingMembers.push_back( |
172 | Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo)); |
173 | |
174 | if (!ArgFlags.isInConsecutiveRegsLast()) |
175 | return true; |
176 | |
177 | // [N x i32] arguments get packed into x-registers on Darwin's arm64_32 |
178 | // because that's how the armv7k Clang front-end emits small structs. |
179 | unsigned EltsPerReg = (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32) ? 2 : 1; |
180 | unsigned RegResult = State.AllocateRegBlock( |
181 | Regs: RegList, RegsRequired: alignTo(Value: PendingMembers.size(), Align: EltsPerReg) / EltsPerReg); |
182 | if (RegResult && EltsPerReg == 1) { |
183 | for (auto &It : PendingMembers) { |
184 | It.convertToReg(RegNo: RegResult); |
185 | State.addLoc(V: It); |
186 | ++RegResult; |
187 | } |
188 | PendingMembers.clear(); |
189 | return true; |
190 | } else if (RegResult) { |
191 | assert(EltsPerReg == 2 && "unexpected ABI" ); |
192 | bool UseHigh = false; |
193 | CCValAssign::LocInfo Info; |
194 | for (auto &It : PendingMembers) { |
195 | Info = UseHigh ? CCValAssign::AExtUpper : CCValAssign::ZExt; |
196 | State.addLoc(V: CCValAssign::getReg(ValNo: It.getValNo(), ValVT: MVT::i32, RegNo: RegResult, |
197 | LocVT: MVT::i64, HTP: Info)); |
198 | UseHigh = !UseHigh; |
199 | if (!UseHigh) |
200 | ++RegResult; |
201 | } |
202 | PendingMembers.clear(); |
203 | return true; |
204 | } |
205 | |
206 | if (!LocVT.isScalableVector()) { |
207 | // Mark all regs in the class as unavailable |
208 | for (auto Reg : RegList) |
209 | State.AllocateReg(Reg); |
210 | } |
211 | |
212 | const Align StackAlign = |
213 | State.getMachineFunction().getDataLayout().getStackAlignment(); |
214 | const Align MemAlign = ArgFlags.getNonZeroMemAlign(); |
215 | Align SlotAlign = std::min(a: MemAlign, b: StackAlign); |
216 | if (!Subtarget.isTargetDarwin()) |
217 | SlotAlign = std::max(a: SlotAlign, b: Align(8)); |
218 | |
219 | return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign); |
220 | } |
221 | |
222 | // TableGen provides definitions of the calling convention analysis entry |
223 | // points. |
224 | #include "AArch64GenCallingConv.inc" |
225 | |