1 | //=== X86CallingConv.cpp - X86 Custom Calling Convention Impl -*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the implementation of custom routines for the X86 |
10 | // Calling Convention that aren't done by tablegen. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "X86CallingConv.h" |
15 | #include "X86Subtarget.h" |
16 | #include "llvm/ADT/SmallVector.h" |
17 | #include "llvm/CodeGen/CallingConvLower.h" |
18 | #include "llvm/IR/CallingConv.h" |
19 | #include "llvm/IR/Module.h" |
20 | |
21 | using namespace llvm; |
22 | |
23 | /// When regcall calling convention compiled to 32 bit arch, special treatment |
24 | /// is required for 64 bit masks. |
25 | /// The value should be assigned to two GPRs. |
26 | /// \return true if registers were allocated and false otherwise. |
27 | static bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, |
28 | MVT &LocVT, |
29 | CCValAssign::LocInfo &LocInfo, |
30 | ISD::ArgFlagsTy &ArgFlags, |
31 | CCState &State) { |
32 | // List of GPR registers that are available to store values in regcall |
33 | // calling convention. |
34 | static const MCPhysReg RegList[] = {X86::EAX, X86::ECX, X86::EDX, X86::EDI, |
35 | X86::ESI}; |
36 | |
37 | // The vector will save all the available registers for allocation. |
38 | SmallVector<unsigned, 5> AvailableRegs; |
39 | |
40 | // searching for the available registers. |
41 | for (auto Reg : RegList) { |
42 | if (!State.isAllocated(Reg)) |
43 | AvailableRegs.push_back(Elt: Reg); |
44 | } |
45 | |
46 | const size_t RequiredGprsUponSplit = 2; |
47 | if (AvailableRegs.size() < RequiredGprsUponSplit) |
48 | return false; // Not enough free registers - continue the search. |
49 | |
50 | // Allocating the available registers. |
51 | for (unsigned I = 0; I < RequiredGprsUponSplit; I++) { |
52 | |
53 | // Marking the register as located. |
54 | unsigned Reg = State.AllocateReg(Reg: AvailableRegs[I]); |
55 | |
56 | // Since we previously made sure that 2 registers are available |
57 | // we expect that a real register number will be returned. |
58 | assert(Reg && "Expecting a register will be available" ); |
59 | |
60 | // Assign the value to the allocated register |
61 | State.addLoc(V: CCValAssign::getCustomReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
62 | } |
63 | |
64 | // Successful in allocating registers - stop scanning next rules. |
65 | return true; |
66 | } |
67 | |
68 | static ArrayRef<MCPhysReg> CC_X86_VectorCallGetSSEs(const MVT &ValVT) { |
69 | if (ValVT.is512BitVector()) { |
70 | static const MCPhysReg RegListZMM[] = {X86::ZMM0, X86::ZMM1, X86::ZMM2, |
71 | X86::ZMM3, X86::ZMM4, X86::ZMM5}; |
72 | return ArrayRef(std::begin(arr: RegListZMM), std::end(arr: RegListZMM)); |
73 | } |
74 | |
75 | if (ValVT.is256BitVector()) { |
76 | static const MCPhysReg RegListYMM[] = {X86::YMM0, X86::YMM1, X86::YMM2, |
77 | X86::YMM3, X86::YMM4, X86::YMM5}; |
78 | return ArrayRef(std::begin(arr: RegListYMM), std::end(arr: RegListYMM)); |
79 | } |
80 | |
81 | static const MCPhysReg RegListXMM[] = {X86::XMM0, X86::XMM1, X86::XMM2, |
82 | X86::XMM3, X86::XMM4, X86::XMM5}; |
83 | return ArrayRef(std::begin(arr: RegListXMM), std::end(arr: RegListXMM)); |
84 | } |
85 | |
86 | static ArrayRef<MCPhysReg> CC_X86_64_VectorCallGetGPRs() { |
87 | static const MCPhysReg RegListGPR[] = {X86::RCX, X86::RDX, X86::R8, X86::R9}; |
88 | return ArrayRef(std::begin(arr: RegListGPR), std::end(arr: RegListGPR)); |
89 | } |
90 | |
91 | static bool CC_X86_VectorCallAssignRegister(unsigned &ValNo, MVT &ValVT, |
92 | MVT &LocVT, |
93 | CCValAssign::LocInfo &LocInfo, |
94 | ISD::ArgFlagsTy &ArgFlags, |
95 | CCState &State) { |
96 | |
97 | ArrayRef<MCPhysReg> RegList = CC_X86_VectorCallGetSSEs(ValVT); |
98 | bool Is64bit = static_cast<const X86Subtarget &>( |
99 | State.getMachineFunction().getSubtarget()) |
100 | .is64Bit(); |
101 | |
102 | for (auto Reg : RegList) { |
103 | // If the register is not marked as allocated - assign to it. |
104 | if (!State.isAllocated(Reg)) { |
105 | unsigned AssigedReg = State.AllocateReg(Reg); |
106 | assert(AssigedReg == Reg && "Expecting a valid register allocation" ); |
107 | State.addLoc( |
108 | V: CCValAssign::getReg(ValNo, ValVT, RegNo: AssigedReg, LocVT, HTP: LocInfo)); |
109 | return true; |
110 | } |
111 | // If the register is marked as shadow allocated - assign to it. |
112 | if (Is64bit && State.IsShadowAllocatedReg(Reg)) { |
113 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
114 | return true; |
115 | } |
116 | } |
117 | |
118 | llvm_unreachable("Clang should ensure that hva marked vectors will have " |
119 | "an available register." ); |
120 | return false; |
121 | } |
122 | |
123 | /// Vectorcall calling convention has special handling for vector types or |
124 | /// HVA for 64 bit arch. |
125 | /// For HVAs shadow registers might be allocated on the first pass |
126 | /// and actual XMM registers are allocated on the second pass. |
127 | /// For vector types, actual XMM registers are allocated on the first pass. |
128 | /// \return true if registers were allocated and false otherwise. |
129 | static bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT, |
130 | CCValAssign::LocInfo &LocInfo, |
131 | ISD::ArgFlagsTy &ArgFlags, CCState &State) { |
132 | // On the second pass, go through the HVAs only. |
133 | if (ArgFlags.isSecArgPass()) { |
134 | if (ArgFlags.isHva()) |
135 | return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo, |
136 | ArgFlags, State); |
137 | return true; |
138 | } |
139 | |
140 | // Process only vector types as defined by vectorcall spec: |
141 | // "A vector type is either a floating-point type, for example, |
142 | // a float or double, or an SIMD vector type, for example, __m128 or __m256". |
143 | if (!(ValVT.isFloatingPoint() || |
144 | (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) { |
145 | // If R9 was already assigned it means that we are after the fourth element |
146 | // and because this is not an HVA / Vector type, we need to allocate |
147 | // shadow XMM register. |
148 | if (State.isAllocated(Reg: X86::R9)) { |
149 | // Assign shadow XMM register. |
150 | (void)State.AllocateReg(Regs: CC_X86_VectorCallGetSSEs(ValVT)); |
151 | } |
152 | |
153 | return false; |
154 | } |
155 | |
156 | if (!ArgFlags.isHva() || ArgFlags.isHvaStart()) { |
157 | // Assign shadow GPR register. |
158 | (void)State.AllocateReg(Regs: CC_X86_64_VectorCallGetGPRs()); |
159 | |
160 | // Assign XMM register - (shadow for HVA and non-shadow for non HVA). |
161 | if (unsigned Reg = State.AllocateReg(Regs: CC_X86_VectorCallGetSSEs(ValVT))) { |
162 | // In Vectorcall Calling convention, additional shadow stack can be |
163 | // created on top of the basic 32 bytes of win64. |
164 | // It can happen if the fifth or sixth argument is vector type or HVA. |
165 | // At that case for each argument a shadow stack of 8 bytes is allocated. |
166 | const TargetRegisterInfo *TRI = |
167 | State.getMachineFunction().getSubtarget().getRegisterInfo(); |
168 | if (TRI->regsOverlap(RegA: Reg, RegB: X86::XMM4) || |
169 | TRI->regsOverlap(RegA: Reg, RegB: X86::XMM5)) |
170 | State.AllocateStack(Size: 8, Alignment: Align(8)); |
171 | |
172 | if (!ArgFlags.isHva()) { |
173 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
174 | return true; // Allocated a register - Stop the search. |
175 | } |
176 | } |
177 | } |
178 | |
179 | // If this is an HVA - Stop the search, |
180 | // otherwise continue the search. |
181 | return ArgFlags.isHva(); |
182 | } |
183 | |
184 | /// Vectorcall calling convention has special handling for vector types or |
185 | /// HVA for 32 bit arch. |
186 | /// For HVAs actual XMM registers are allocated on the second pass. |
187 | /// For vector types, actual XMM registers are allocated on the first pass. |
188 | /// \return true if registers were allocated and false otherwise. |
189 | static bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT, |
190 | CCValAssign::LocInfo &LocInfo, |
191 | ISD::ArgFlagsTy &ArgFlags, CCState &State) { |
192 | // On the second pass, go through the HVAs only. |
193 | if (ArgFlags.isSecArgPass()) { |
194 | if (ArgFlags.isHva()) |
195 | return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo, |
196 | ArgFlags, State); |
197 | return true; |
198 | } |
199 | |
200 | // Process only vector types as defined by vectorcall spec: |
201 | // "A vector type is either a floating point type, for example, |
202 | // a float or double, or an SIMD vector type, for example, __m128 or __m256". |
203 | if (!(ValVT.isFloatingPoint() || |
204 | (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) { |
205 | return false; |
206 | } |
207 | |
208 | if (ArgFlags.isHva()) |
209 | return true; // If this is an HVA - Stop the search. |
210 | |
211 | // Assign XMM register. |
212 | if (unsigned Reg = State.AllocateReg(Regs: CC_X86_VectorCallGetSSEs(ValVT))) { |
213 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
214 | return true; |
215 | } |
216 | |
217 | // In case we did not find an available XMM register for a vector - |
218 | // pass it indirectly. |
219 | // It is similar to CCPassIndirect, with the addition of inreg. |
220 | if (!ValVT.isFloatingPoint()) { |
221 | LocVT = MVT::i32; |
222 | LocInfo = CCValAssign::Indirect; |
223 | ArgFlags.setInReg(); |
224 | } |
225 | |
226 | return false; // No register was assigned - Continue the search. |
227 | } |
228 | |
229 | static bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &, |
230 | CCValAssign::LocInfo &, ISD::ArgFlagsTy &, |
231 | CCState &) { |
232 | llvm_unreachable("The AnyReg calling convention is only supported by the " |
233 | "stackmap and patchpoint intrinsics." ); |
234 | // gracefully fallback to X86 C calling convention on Release builds. |
235 | return false; |
236 | } |
237 | |
238 | static bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT, |
239 | CCValAssign::LocInfo &LocInfo, |
240 | ISD::ArgFlagsTy &ArgFlags, CCState &State) { |
241 | // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure |
242 | // not to split i64 and double between a register and stack |
243 | static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX}; |
244 | static const unsigned NumRegs = std::size(RegList); |
245 | |
246 | SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); |
247 | |
248 | // If this is the first part of an double/i64/i128, or if we're already |
249 | // in the middle of a split, add to the pending list. If this is not |
250 | // the end of the split, return, otherwise go on to process the pending |
251 | // list |
252 | if (ArgFlags.isSplit() || !PendingMembers.empty()) { |
253 | PendingMembers.push_back( |
254 | Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo)); |
255 | if (!ArgFlags.isSplitEnd()) |
256 | return true; |
257 | } |
258 | |
259 | // If there are no pending members, we are not in the middle of a split, |
260 | // so do the usual inreg stuff. |
261 | if (PendingMembers.empty()) { |
262 | if (unsigned Reg = State.AllocateReg(Regs: RegList)) { |
263 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT, HTP: LocInfo)); |
264 | return true; |
265 | } |
266 | return false; |
267 | } |
268 | |
269 | assert(ArgFlags.isSplitEnd()); |
270 | |
271 | // We now have the entire original argument in PendingMembers, so decide |
272 | // whether to use registers or the stack. |
273 | // Per the MCU ABI: |
274 | // a) To use registers, we need to have enough of them free to contain |
275 | // the entire argument. |
276 | // b) We never want to use more than 2 registers for a single argument. |
277 | |
278 | unsigned FirstFree = State.getFirstUnallocated(Regs: RegList); |
279 | bool UseRegs = PendingMembers.size() <= std::min(a: 2U, b: NumRegs - FirstFree); |
280 | |
281 | for (auto &It : PendingMembers) { |
282 | if (UseRegs) |
283 | It.convertToReg(RegNo: State.AllocateReg(Reg: RegList[FirstFree++])); |
284 | else |
285 | It.convertToMem(Offset: State.AllocateStack(Size: 4, Alignment: Align(4))); |
286 | State.addLoc(V: It); |
287 | } |
288 | |
289 | PendingMembers.clear(); |
290 | |
291 | return true; |
292 | } |
293 | |
294 | /// X86 interrupt handlers can only take one or two stack arguments, but if |
295 | /// there are two arguments, they are in the opposite order from the standard |
296 | /// convention. Therefore, we have to look at the argument count up front before |
297 | /// allocating stack for each argument. |
298 | static bool CC_X86_Intr(unsigned &ValNo, MVT &ValVT, MVT &LocVT, |
299 | CCValAssign::LocInfo &LocInfo, |
300 | ISD::ArgFlagsTy &ArgFlags, CCState &State) { |
301 | const MachineFunction &MF = State.getMachineFunction(); |
302 | size_t ArgCount = State.getMachineFunction().getFunction().arg_size(); |
303 | bool Is64Bit = MF.getSubtarget<X86Subtarget>().is64Bit(); |
304 | unsigned SlotSize = Is64Bit ? 8 : 4; |
305 | unsigned Offset; |
306 | if (ArgCount == 1 && ValNo == 0) { |
307 | // If we have one argument, the argument is five stack slots big, at fixed |
308 | // offset zero. |
309 | Offset = State.AllocateStack(Size: 5 * SlotSize, Alignment: Align(4)); |
310 | } else if (ArgCount == 2 && ValNo == 0) { |
311 | // If we have two arguments, the stack slot is *after* the error code |
312 | // argument. Pretend it doesn't consume stack space, and account for it when |
313 | // we assign the second argument. |
314 | Offset = SlotSize; |
315 | } else if (ArgCount == 2 && ValNo == 1) { |
316 | // If this is the second of two arguments, it must be the error code. It |
317 | // appears first on the stack, and is then followed by the five slot |
318 | // interrupt struct. |
319 | Offset = 0; |
320 | (void)State.AllocateStack(Size: 6 * SlotSize, Alignment: Align(4)); |
321 | } else { |
322 | report_fatal_error(reason: "unsupported x86 interrupt prototype" ); |
323 | } |
324 | |
325 | // FIXME: This should be accounted for in |
326 | // X86FrameLowering::getFrameIndexReference, not here. |
327 | if (Is64Bit && ArgCount == 2) |
328 | Offset += SlotSize; |
329 | |
330 | State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo)); |
331 | return true; |
332 | } |
333 | |
334 | static bool CC_X86_64_Pointer(unsigned &ValNo, MVT &ValVT, MVT &LocVT, |
335 | CCValAssign::LocInfo &LocInfo, |
336 | ISD::ArgFlagsTy &ArgFlags, CCState &State) { |
337 | if (LocVT != MVT::i64) { |
338 | LocVT = MVT::i64; |
339 | LocInfo = CCValAssign::ZExt; |
340 | } |
341 | return false; |
342 | } |
343 | |
344 | // Provides entry points of CC_X86 and RetCC_X86. |
345 | #include "X86GenCallingConv.inc" |
346 | |