1 | //=== X86CallingConv.cpp - X86 Custom Calling Convention Impl -*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the implementation of custom routines for the X86 |
10 | // Calling Convention that aren't done by tablegen. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "X86CallingConv.h" |
15 | #include "X86Subtarget.h" |
16 | #include "llvm/ADT/SmallVector.h" |
17 | #include "llvm/CodeGen/CallingConvLower.h" |
18 | #include "llvm/IR/Module.h" |
19 | |
20 | using namespace llvm; |
21 | |
22 | /// When regcall calling convention compiled to 32 bit arch, special treatment |
23 | /// is required for 64 bit masks. |
24 | /// The value should be assigned to two GPRs. |
25 | /// \return true if registers were allocated and false otherwise. |
26 | static bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, |
27 | MVT &LocVT, |
28 | CCValAssign::LocInfo &LocInfo, |
29 | ISD::ArgFlagsTy &ArgFlags, |
30 | CCState &State) { |
31 | // List of GPR registers that are available to store values in regcall |
32 | // calling convention. |
33 | static const MCPhysReg RegList[] = {X86::EAX, X86::ECX, X86::EDX, X86::EDI, |
34 | X86::ESI}; |
35 | |
36 | // The vector will save all the available registers for allocation. |
37 | SmallVector<unsigned, 5> AvailableRegs; |
38 | |
39 | // searching for the available registers. |
40 | for (auto Reg : RegList) { |
41 | if (!State.isAllocated(Reg)) |
42 | AvailableRegs.push_back(Elt: Reg); |
43 | } |
44 | |
45 | const size_t RequiredGprsUponSplit = 2; |
46 | if (AvailableRegs.size() < RequiredGprsUponSplit) |
47 | return false; // Not enough free registers - continue the search. |
48 | |
49 | // Allocating the available registers. |
50 | for (unsigned I = 0; I < RequiredGprsUponSplit; I++) { |
51 | |
52 | // Marking the register as located. |
53 | MCRegister Reg = State.AllocateReg(Reg: AvailableRegs[I]); |
54 | |
55 | // Since we previously made sure that 2 registers are available |
56 | // we expect that a real register number will be returned. |
57 | assert(Reg && "Expecting a register will be available" ); |
58 | |
59 | // Assign the value to the allocated register |
60 | State.addLoc(V: CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo)); |
61 | } |
62 | |
63 | // Successful in allocating registers - stop scanning next rules. |
64 | return true; |
65 | } |
66 | |
67 | static ArrayRef<MCPhysReg> CC_X86_VectorCallGetSSEs(const MVT &ValVT) { |
68 | if (ValVT.is512BitVector()) { |
69 | static const MCPhysReg RegListZMM[] = {X86::ZMM0, X86::ZMM1, X86::ZMM2, |
70 | X86::ZMM3, X86::ZMM4, X86::ZMM5}; |
71 | return RegListZMM; |
72 | } |
73 | |
74 | if (ValVT.is256BitVector()) { |
75 | static const MCPhysReg RegListYMM[] = {X86::YMM0, X86::YMM1, X86::YMM2, |
76 | X86::YMM3, X86::YMM4, X86::YMM5}; |
77 | return RegListYMM; |
78 | } |
79 | |
80 | static const MCPhysReg RegListXMM[] = {X86::XMM0, X86::XMM1, X86::XMM2, |
81 | X86::XMM3, X86::XMM4, X86::XMM5}; |
82 | return RegListXMM; |
83 | } |
84 | |
85 | static ArrayRef<MCPhysReg> CC_X86_64_VectorCallGetGPRs() { |
86 | static const MCPhysReg RegListGPR[] = {X86::RCX, X86::RDX, X86::R8, X86::R9}; |
87 | return RegListGPR; |
88 | } |
89 | |
90 | static bool CC_X86_VectorCallAssignRegister(unsigned &ValNo, MVT &ValVT, |
91 | MVT &LocVT, |
92 | CCValAssign::LocInfo &LocInfo, |
93 | ISD::ArgFlagsTy &ArgFlags, |
94 | CCState &State) { |
95 | |
96 | ArrayRef<MCPhysReg> RegList = CC_X86_VectorCallGetSSEs(ValVT); |
97 | bool Is64bit = static_cast<const X86Subtarget &>( |
98 | State.getMachineFunction().getSubtarget()) |
99 | .is64Bit(); |
100 | |
101 | for (auto Reg : RegList) { |
102 | // If the register is not marked as allocated - assign to it. |
103 | if (!State.isAllocated(Reg)) { |
104 | MCRegister AssigedReg = State.AllocateReg(Reg); |
105 | assert(AssigedReg == Reg && "Expecting a valid register allocation" ); |
106 | State.addLoc( |
107 | V: CCValAssign::getReg(ValNo, ValVT, Reg: AssigedReg, LocVT, HTP: LocInfo)); |
108 | return true; |
109 | } |
110 | // If the register is marked as shadow allocated - assign to it. |
111 | if (Is64bit && State.IsShadowAllocatedReg(Reg)) { |
112 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo)); |
113 | return true; |
114 | } |
115 | } |
116 | |
117 | llvm_unreachable("Clang should ensure that hva marked vectors will have " |
118 | "an available register." ); |
119 | return false; |
120 | } |
121 | |
122 | /// Vectorcall calling convention has special handling for vector types or |
123 | /// HVA for 64 bit arch. |
124 | /// For HVAs shadow registers might be allocated on the first pass |
125 | /// and actual XMM registers are allocated on the second pass. |
126 | /// For vector types, actual XMM registers are allocated on the first pass. |
127 | /// \return true if registers were allocated and false otherwise. |
128 | static bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT, |
129 | CCValAssign::LocInfo &LocInfo, |
130 | ISD::ArgFlagsTy &ArgFlags, CCState &State) { |
131 | // On the second pass, go through the HVAs only. |
132 | if (ArgFlags.isSecArgPass()) { |
133 | if (ArgFlags.isHva()) |
134 | return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo, |
135 | ArgFlags, State); |
136 | return true; |
137 | } |
138 | |
139 | // Process only vector types as defined by vectorcall spec: |
140 | // "A vector type is either a floating-point type, for example, |
141 | // a float or double, or an SIMD vector type, for example, __m128 or __m256". |
142 | if (!(ValVT.isFloatingPoint() || |
143 | (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) { |
144 | // If R9 was already assigned it means that we are after the fourth element |
145 | // and because this is not an HVA / Vector type, we need to allocate |
146 | // shadow XMM register. |
147 | if (State.isAllocated(Reg: X86::R9)) { |
148 | // Assign shadow XMM register. |
149 | (void)State.AllocateReg(Regs: CC_X86_VectorCallGetSSEs(ValVT)); |
150 | } |
151 | |
152 | return false; |
153 | } |
154 | |
155 | if (!ArgFlags.isHva() || ArgFlags.isHvaStart()) { |
156 | // Assign shadow GPR register. |
157 | (void)State.AllocateReg(Regs: CC_X86_64_VectorCallGetGPRs()); |
158 | |
159 | // Assign XMM register - (shadow for HVA and non-shadow for non HVA). |
160 | if (MCRegister Reg = State.AllocateReg(Regs: CC_X86_VectorCallGetSSEs(ValVT))) { |
161 | // In Vectorcall Calling convention, additional shadow stack can be |
162 | // created on top of the basic 32 bytes of win64. |
163 | // It can happen if the fifth or sixth argument is vector type or HVA. |
164 | // At that case for each argument a shadow stack of 8 bytes is allocated. |
165 | const TargetRegisterInfo *TRI = |
166 | State.getMachineFunction().getSubtarget().getRegisterInfo(); |
167 | if (TRI->regsOverlap(RegA: Reg, RegB: X86::XMM4) || |
168 | TRI->regsOverlap(RegA: Reg, RegB: X86::XMM5)) |
169 | State.AllocateStack(Size: 8, Alignment: Align(8)); |
170 | |
171 | if (!ArgFlags.isHva()) { |
172 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo)); |
173 | return true; // Allocated a register - Stop the search. |
174 | } |
175 | } |
176 | } |
177 | |
178 | // If this is an HVA - Stop the search, |
179 | // otherwise continue the search. |
180 | return ArgFlags.isHva(); |
181 | } |
182 | |
183 | /// Vectorcall calling convention has special handling for vector types or |
184 | /// HVA for 32 bit arch. |
185 | /// For HVAs actual XMM registers are allocated on the second pass. |
186 | /// For vector types, actual XMM registers are allocated on the first pass. |
187 | /// \return true if registers were allocated and false otherwise. |
188 | static bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT, |
189 | CCValAssign::LocInfo &LocInfo, |
190 | ISD::ArgFlagsTy &ArgFlags, CCState &State) { |
191 | // On the second pass, go through the HVAs only. |
192 | if (ArgFlags.isSecArgPass()) { |
193 | if (ArgFlags.isHva()) |
194 | return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo, |
195 | ArgFlags, State); |
196 | return true; |
197 | } |
198 | |
199 | // Process only vector types as defined by vectorcall spec: |
200 | // "A vector type is either a floating point type, for example, |
201 | // a float or double, or an SIMD vector type, for example, __m128 or __m256". |
202 | if (!(ValVT.isFloatingPoint() || |
203 | (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) { |
204 | return false; |
205 | } |
206 | |
207 | if (ArgFlags.isHva()) |
208 | return true; // If this is an HVA - Stop the search. |
209 | |
210 | // Assign XMM register. |
211 | if (MCRegister Reg = State.AllocateReg(Regs: CC_X86_VectorCallGetSSEs(ValVT))) { |
212 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo)); |
213 | return true; |
214 | } |
215 | |
216 | // In case we did not find an available XMM register for a vector - |
217 | // pass it indirectly. |
218 | // It is similar to CCPassIndirect, with the addition of inreg. |
219 | if (!ValVT.isFloatingPoint()) { |
220 | LocVT = MVT::i32; |
221 | LocInfo = CCValAssign::Indirect; |
222 | ArgFlags.setInReg(); |
223 | } |
224 | |
225 | return false; // No register was assigned - Continue the search. |
226 | } |
227 | |
228 | static bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &, |
229 | CCValAssign::LocInfo &, ISD::ArgFlagsTy &, |
230 | CCState &) { |
231 | llvm_unreachable("The AnyReg calling convention is only supported by the " |
232 | "stackmap and patchpoint intrinsics." ); |
233 | // gracefully fallback to X86 C calling convention on Release builds. |
234 | return false; |
235 | } |
236 | |
237 | static bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT, |
238 | CCValAssign::LocInfo &LocInfo, |
239 | ISD::ArgFlagsTy &ArgFlags, CCState &State) { |
240 | // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure |
241 | // not to split i64 and double between a register and stack |
242 | static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX}; |
243 | static const unsigned NumRegs = std::size(RegList); |
244 | |
245 | SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); |
246 | |
247 | // If this is the first part of an double/i64/i128, or if we're already |
248 | // in the middle of a split, add to the pending list. If this is not |
249 | // the end of the split, return, otherwise go on to process the pending |
250 | // list |
251 | if (ArgFlags.isSplit() || !PendingMembers.empty()) { |
252 | PendingMembers.push_back( |
253 | Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo)); |
254 | if (!ArgFlags.isSplitEnd()) |
255 | return true; |
256 | } |
257 | |
258 | // If there are no pending members, we are not in the middle of a split, |
259 | // so do the usual inreg stuff. |
260 | if (PendingMembers.empty()) { |
261 | if (MCRegister Reg = State.AllocateReg(Regs: RegList)) { |
262 | State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, HTP: LocInfo)); |
263 | return true; |
264 | } |
265 | return false; |
266 | } |
267 | |
268 | assert(ArgFlags.isSplitEnd()); |
269 | |
270 | // We now have the entire original argument in PendingMembers, so decide |
271 | // whether to use registers or the stack. |
272 | // Per the MCU ABI: |
273 | // a) To use registers, we need to have enough of them free to contain |
274 | // the entire argument. |
275 | // b) We never want to use more than 2 registers for a single argument. |
276 | |
277 | unsigned FirstFree = State.getFirstUnallocated(Regs: RegList); |
278 | bool UseRegs = PendingMembers.size() <= std::min(a: 2U, b: NumRegs - FirstFree); |
279 | |
280 | for (auto &It : PendingMembers) { |
281 | if (UseRegs) |
282 | It.convertToReg(Reg: State.AllocateReg(Reg: RegList[FirstFree++])); |
283 | else |
284 | It.convertToMem(Offset: State.AllocateStack(Size: 4, Alignment: Align(4))); |
285 | State.addLoc(V: It); |
286 | } |
287 | |
288 | PendingMembers.clear(); |
289 | |
290 | return true; |
291 | } |
292 | |
293 | /// X86 interrupt handlers can only take one or two stack arguments, but if |
294 | /// there are two arguments, they are in the opposite order from the standard |
295 | /// convention. Therefore, we have to look at the argument count up front before |
296 | /// allocating stack for each argument. |
297 | static bool CC_X86_Intr(unsigned &ValNo, MVT &ValVT, MVT &LocVT, |
298 | CCValAssign::LocInfo &LocInfo, |
299 | ISD::ArgFlagsTy &ArgFlags, CCState &State) { |
300 | const MachineFunction &MF = State.getMachineFunction(); |
301 | size_t ArgCount = State.getMachineFunction().getFunction().arg_size(); |
302 | bool Is64Bit = MF.getSubtarget<X86Subtarget>().is64Bit(); |
303 | unsigned SlotSize = Is64Bit ? 8 : 4; |
304 | unsigned Offset; |
305 | if (ArgCount == 1 && ValNo == 0) { |
306 | // If we have one argument, the argument is five stack slots big, at fixed |
307 | // offset zero. |
308 | Offset = State.AllocateStack(Size: 5 * SlotSize, Alignment: Align(4)); |
309 | } else if (ArgCount == 2 && ValNo == 0) { |
310 | // If we have two arguments, the stack slot is *after* the error code |
311 | // argument. Pretend it doesn't consume stack space, and account for it when |
312 | // we assign the second argument. |
313 | Offset = SlotSize; |
314 | } else if (ArgCount == 2 && ValNo == 1) { |
315 | // If this is the second of two arguments, it must be the error code. It |
316 | // appears first on the stack, and is then followed by the five slot |
317 | // interrupt struct. |
318 | Offset = 0; |
319 | (void)State.AllocateStack(Size: 6 * SlotSize, Alignment: Align(4)); |
320 | } else { |
321 | report_fatal_error(reason: "unsupported x86 interrupt prototype" ); |
322 | } |
323 | |
324 | // FIXME: This should be accounted for in |
325 | // X86FrameLowering::getFrameIndexReference, not here. |
326 | if (Is64Bit && ArgCount == 2) |
327 | Offset += SlotSize; |
328 | |
329 | State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo)); |
330 | return true; |
331 | } |
332 | |
333 | static bool CC_X86_64_Pointer(unsigned &ValNo, MVT &ValVT, MVT &LocVT, |
334 | CCValAssign::LocInfo &LocInfo, |
335 | ISD::ArgFlagsTy &ArgFlags, CCState &State) { |
336 | if (LocVT != MVT::i64) { |
337 | LocVT = MVT::i64; |
338 | LocInfo = CCValAssign::ZExt; |
339 | } |
340 | return false; |
341 | } |
342 | |
343 | /// Special handling for i128: Either allocate the value to two consecutive |
344 | /// i64 registers, or to the stack. Do not partially allocate in registers, |
345 | /// and do not reserve any registers when allocating to the stack. |
346 | static bool CC_X86_64_I128(unsigned &ValNo, MVT &ValVT, MVT &LocVT, |
347 | CCValAssign::LocInfo &LocInfo, |
348 | ISD::ArgFlagsTy &ArgFlags, CCState &State) { |
349 | assert(ValVT == MVT::i64 && "Should have i64 parts" ); |
350 | SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); |
351 | PendingMembers.push_back( |
352 | Elt: CCValAssign::getPending(ValNo, ValVT, LocVT, HTP: LocInfo)); |
353 | |
354 | if (!ArgFlags.isInConsecutiveRegsLast()) |
355 | return true; |
356 | |
357 | unsigned NumRegs = PendingMembers.size(); |
358 | assert(NumRegs == 2 && "Should have two parts" ); |
359 | |
360 | static const MCPhysReg Regs[] = {X86::RDI, X86::RSI, X86::RDX, |
361 | X86::RCX, X86::R8, X86::R9}; |
362 | ArrayRef<MCPhysReg> Allocated = State.AllocateRegBlock(Regs, RegsRequired: NumRegs); |
363 | if (!Allocated.empty()) { |
364 | PendingMembers[0].convertToReg(Reg: Allocated[0]); |
365 | PendingMembers[1].convertToReg(Reg: Allocated[1]); |
366 | } else { |
367 | int64_t Offset = State.AllocateStack(Size: 16, Alignment: Align(16)); |
368 | PendingMembers[0].convertToMem(Offset); |
369 | PendingMembers[1].convertToMem(Offset: Offset + 8); |
370 | } |
371 | State.addLoc(V: PendingMembers[0]); |
372 | State.addLoc(V: PendingMembers[1]); |
373 | PendingMembers.clear(); |
374 | return true; |
375 | } |
376 | |
377 | // Provides entry points of CC_X86 and RetCC_X86. |
378 | #include "X86GenCallingConv.inc" |
379 | |