| 1 | //=== AArch64CallingConvention.cpp - AArch64 CC impl ------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains the table-generated and custom routines for the AArch64 |
| 10 | // Calling Convention. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "AArch64CallingConvention.h" |
| 15 | #include "AArch64.h" |
| 16 | #include "AArch64InstrInfo.h" |
| 17 | #include "AArch64Subtarget.h" |
| 18 | #include "llvm/CodeGen/CallingConvLower.h" |
| 19 | #include "llvm/CodeGen/TargetInstrInfo.h" |
| 20 | using namespace llvm; |
| 21 | |
| 22 | static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2, |
| 23 | AArch64::X3, AArch64::X4, AArch64::X5, |
| 24 | AArch64::X6, AArch64::X7}; |
| 25 | static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2, |
| 26 | AArch64::H3, AArch64::H4, AArch64::H5, |
| 27 | AArch64::H6, AArch64::H7}; |
| 28 | static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2, |
| 29 | AArch64::S3, AArch64::S4, AArch64::S5, |
| 30 | AArch64::S6, AArch64::S7}; |
| 31 | static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2, |
| 32 | AArch64::D3, AArch64::D4, AArch64::D5, |
| 33 | AArch64::D6, AArch64::D7}; |
| 34 | static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2, |
| 35 | AArch64::Q3, AArch64::Q4, AArch64::Q5, |
| 36 | AArch64::Q6, AArch64::Q7}; |
| 37 | static const MCPhysReg ZRegList[] = {AArch64::Z0, AArch64::Z1, AArch64::Z2, |
| 38 | AArch64::Z3, AArch64::Z4, AArch64::Z5, |
| 39 | AArch64::Z6, AArch64::Z7}; |
| 40 | static const MCPhysReg PRegList[] = {AArch64::P0, AArch64::P1, AArch64::P2, |
| 41 | AArch64::P3}; |
| 42 | |
| 43 | static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers, |
| 44 | MVT LocVT, ISD::ArgFlagsTy &ArgFlags, |
| 45 | CCState &State, Align SlotAlign) { |
| 46 | if (LocVT.isScalableVector()) { |
| 47 | const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>( |
| 48 | State.getMachineFunction().getSubtarget()); |
| 49 | const AArch64TargetLowering *TLI = Subtarget.getTargetLowering(); |
| 50 | |
| 51 | // We are about to reinvoke the CCAssignFn auto-generated handler. If we |
| 52 | // don't unset these flags we will get stuck in an infinite loop forever |
| 53 | // invoking the custom handler. |
| 54 | ArgFlags.setInConsecutiveRegs(false); |
| 55 | ArgFlags.setInConsecutiveRegsLast(false); |
| 56 | |
| 57 | // The calling convention for passing SVE tuples states that in the event |
| 58 | // we cannot allocate enough registers for the tuple we should still leave |
| 59 | // any remaining registers unallocated. However, when we call the |
| 60 | // CCAssignFn again we want it to behave as if all remaining registers are |
| 61 | // allocated. This will force the code to pass the tuple indirectly in |
| 62 | // accordance with the PCS. |
| 63 | bool ZRegsAllocated[8]; |
| 64 | for (int I = 0; I < 8; I++) { |
| 65 | ZRegsAllocated[I] = State.isAllocated(Reg: ZRegList[I]); |
| 66 | State.AllocateReg(Reg: ZRegList[I]); |
| 67 | } |
| 68 | // The same applies to P registers. |
| 69 | bool PRegsAllocated[4]; |
| 70 | for (int I = 0; I < 4; I++) { |
| 71 | PRegsAllocated[I] = State.isAllocated(Reg: PRegList[I]); |
| 72 | State.AllocateReg(Reg: PRegList[I]); |
| 73 | } |
| 74 | |
| 75 | auto &It = PendingMembers[0]; |
| 76 | CCAssignFn *AssignFn = |
| 77 | TLI->CCAssignFnForCall(CC: State.getCallingConv(), /*IsVarArg=*/false); |
| 78 | if (AssignFn(It.getValNo(), It.getValVT(), It.getValVT(), CCValAssign::Full, |
| 79 | ArgFlags, State)) |
| 80 | llvm_unreachable("Call operand has unhandled type" ); |
| 81 | |
| 82 | // Return the flags to how they were before. |
| 83 | ArgFlags.setInConsecutiveRegs(true); |
| 84 | ArgFlags.setInConsecutiveRegsLast(true); |
| 85 | |
| 86 | // Return the register state back to how it was before, leaving any |
| 87 | // unallocated registers available for other smaller types. |
| 88 | for (int I = 0; I < 8; I++) |
| 89 | if (!ZRegsAllocated[I]) |
| 90 | State.DeallocateReg(Reg: ZRegList[I]); |
| 91 | for (int I = 0; I < 4; I++) |
| 92 | if (!PRegsAllocated[I]) |
| 93 | State.DeallocateReg(Reg: PRegList[I]); |
| 94 | |
| 95 | // All pending members have now been allocated |
| 96 | PendingMembers.clear(); |
| 97 | return true; |
| 98 | } |
| 99 | |
| 100 | unsigned Size = LocVT.getSizeInBits() / 8; |
| 101 | for (auto &It : PendingMembers) { |
| 102 | It.convertToMem(Offset: State.AllocateStack(Size, Alignment: SlotAlign)); |
| 103 | State.addLoc(V: It); |
| 104 | SlotAlign = Align(1); |
| 105 | } |
| 106 | |
| 107 | // All pending members have now been allocated |
| 108 | PendingMembers.clear(); |
| 109 | return true; |
| 110 | } |
| 111 | |
| 112 | /// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An |
| 113 | /// [N x Ty] type must still be contiguous in memory though. |
| 114 | static bool CC_AArch64_Custom_Stack_Block( |
| 115 | unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, |
| 116 | ISD::ArgFlagsTy &ArgFlags, CCState &State) { |
| 117 | SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); |
| 118 | |
| 119 | // Add the argument to the list to be allocated once we know the size of the |
| 120 | // block. |
| 121 | PendingMembers.push_back( |
| 122 | Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo)); |
| 123 | |
| 124 | if (!ArgFlags.isInConsecutiveRegsLast()) |
| 125 | return true; |
| 126 | |
| 127 | return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign: Align(8)); |
| 128 | } |
| 129 | |
| 130 | /// Given an [N x Ty] block, it should be passed in a consecutive sequence of |
| 131 | /// registers. If no such sequence is available, mark the rest of the registers |
| 132 | /// of that type as used and place the argument on the stack. |
| 133 | static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, |
| 134 | CCValAssign::LocInfo &LocInfo, |
| 135 | ISD::ArgFlagsTy &ArgFlags, CCState &State) { |
| 136 | const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>( |
| 137 | State.getMachineFunction().getSubtarget()); |
| 138 | bool IsDarwinILP32 = Subtarget.isTargetILP32() && Subtarget.isTargetMachO(); |
| 139 | |
| 140 | // Try to allocate a contiguous block of registers, each of the correct |
| 141 | // size to hold one member. |
| 142 | ArrayRef<MCPhysReg> RegList; |
| 143 | if (LocVT.SimpleTy == MVT::i64 || (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32)) |
| 144 | RegList = XRegList; |
| 145 | else if (LocVT.SimpleTy == MVT::f16 || LocVT.SimpleTy == MVT::bf16) |
| 146 | RegList = HRegList; |
| 147 | else if (LocVT.SimpleTy == MVT::f32 || LocVT.is32BitVector()) |
| 148 | RegList = SRegList; |
| 149 | else if (LocVT.SimpleTy == MVT::f64 || LocVT.is64BitVector()) |
| 150 | RegList = DRegList; |
| 151 | else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector()) |
| 152 | RegList = QRegList; |
| 153 | else if (LocVT.isScalableVector()) { |
| 154 | // Scalable masks should be pass by Predicate registers. |
| 155 | if (LocVT == MVT::nxv1i1 || LocVT == MVT::nxv2i1 || LocVT == MVT::nxv4i1 || |
| 156 | LocVT == MVT::nxv8i1 || LocVT == MVT::nxv16i1 || |
| 157 | LocVT == MVT::aarch64svcount) |
| 158 | RegList = PRegList; |
| 159 | else |
| 160 | RegList = ZRegList; |
| 161 | } else { |
| 162 | // Not an array we want to split up after all. |
| 163 | return false; |
| 164 | } |
| 165 | |
| 166 | SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); |
| 167 | |
| 168 | // Add the argument to the list to be allocated once we know the size of the |
| 169 | // block. |
| 170 | PendingMembers.push_back( |
| 171 | Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo)); |
| 172 | |
| 173 | if (!ArgFlags.isInConsecutiveRegsLast()) |
| 174 | return true; |
| 175 | |
| 176 | // [N x i32] arguments get packed into x-registers on Darwin's arm64_32 |
| 177 | // because that's how the armv7k Clang front-end emits small structs. |
| 178 | unsigned EltsPerReg = (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32) ? 2 : 1; |
| 179 | ArrayRef<MCPhysReg> RegResult = State.AllocateRegBlock( |
| 180 | Regs: RegList, RegsRequired: alignTo(Value: PendingMembers.size(), Align: EltsPerReg) / EltsPerReg); |
| 181 | if (!RegResult.empty() && EltsPerReg == 1) { |
| 182 | for (const auto &[It, Reg] : zip(t&: PendingMembers, u&: RegResult)) { |
| 183 | It.convertToReg(Reg); |
| 184 | State.addLoc(V: It); |
| 185 | } |
| 186 | PendingMembers.clear(); |
| 187 | return true; |
| 188 | } else if (!RegResult.empty()) { |
| 189 | assert(EltsPerReg == 2 && "unexpected ABI" ); |
| 190 | bool UseHigh = false; |
| 191 | CCValAssign::LocInfo Info; |
| 192 | unsigned RegIdx = 0; |
| 193 | for (auto &It : PendingMembers) { |
| 194 | Info = UseHigh ? CCValAssign::AExtUpper : CCValAssign::ZExt; |
| 195 | State.addLoc(V: CCValAssign::getReg(ValNo: It.getValNo(), ValVT: MVT::i32, |
| 196 | Reg: RegResult[RegIdx], LocVT: MVT::i64, HTP: Info)); |
| 197 | UseHigh = !UseHigh; |
| 198 | if (!UseHigh) |
| 199 | ++RegIdx; |
| 200 | } |
| 201 | PendingMembers.clear(); |
| 202 | return true; |
| 203 | } |
| 204 | |
| 205 | if (!LocVT.isScalableVector()) { |
| 206 | // Mark all regs in the class as unavailable |
| 207 | for (auto Reg : RegList) |
| 208 | State.AllocateReg(Reg); |
| 209 | } |
| 210 | |
| 211 | const MaybeAlign StackAlign = |
| 212 | State.getMachineFunction().getDataLayout().getStackAlignment(); |
| 213 | assert(StackAlign && "data layout string is missing stack alignment" ); |
| 214 | const Align MemAlign = ArgFlags.getNonZeroMemAlign(); |
| 215 | Align SlotAlign = std::min(a: MemAlign, b: *StackAlign); |
| 216 | if (!Subtarget.isTargetDarwin()) |
| 217 | SlotAlign = std::max(a: SlotAlign, b: Align(8)); |
| 218 | |
| 219 | return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign); |
| 220 | } |
| 221 | |
| 222 | // TableGen provides definitions of the calling convention analysis entry |
| 223 | // points. |
| 224 | #include "AArch64GenCallingConv.inc" |
| 225 | |