1 | //===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the X86 implementation of TargetFrameLowering class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "X86FrameLowering.h" |
14 | #include "MCTargetDesc/X86MCTargetDesc.h" |
15 | #include "X86InstrBuilder.h" |
16 | #include "X86InstrInfo.h" |
17 | #include "X86MachineFunctionInfo.h" |
18 | #include "X86Subtarget.h" |
19 | #include "X86TargetMachine.h" |
20 | #include "llvm/ADT/Statistic.h" |
21 | #include "llvm/CodeGen/LivePhysRegs.h" |
22 | #include "llvm/CodeGen/MachineFrameInfo.h" |
23 | #include "llvm/CodeGen/MachineFunction.h" |
24 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
25 | #include "llvm/CodeGen/MachineModuleInfo.h" |
26 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
27 | #include "llvm/CodeGen/WinEHFuncInfo.h" |
28 | #include "llvm/IR/DataLayout.h" |
29 | #include "llvm/IR/EHPersonalities.h" |
30 | #include "llvm/IR/Function.h" |
31 | #include "llvm/IR/Module.h" |
32 | #include "llvm/MC/MCAsmInfo.h" |
33 | #include "llvm/MC/MCObjectFileInfo.h" |
34 | #include "llvm/MC/MCSymbol.h" |
35 | #include "llvm/Support/Debug.h" |
36 | #include "llvm/Support/LEB128.h" |
37 | #include "llvm/Target/TargetOptions.h" |
38 | #include <cstdlib> |
39 | |
40 | #define DEBUG_TYPE "x86-fl" |
41 | |
42 | STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue" ); |
43 | STATISTIC(, |
44 | "Number of extra stack probes generated in prologue" ); |
45 | STATISTIC(NumFunctionUsingPush2Pop2, "Number of funtions using push2/pop2" ); |
46 | |
47 | using namespace llvm; |
48 | |
49 | X86FrameLowering::X86FrameLowering(const X86Subtarget &STI, |
50 | MaybeAlign StackAlignOverride) |
51 | : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(), |
52 | STI.is64Bit() ? -8 : -4), |
53 | STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) { |
54 | // Cache a bunch of frame-related predicates for this subtarget. |
55 | SlotSize = TRI->getSlotSize(); |
56 | Is64Bit = STI.is64Bit(); |
57 | IsLP64 = STI.isTarget64BitLP64(); |
58 | // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. |
59 | Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); |
60 | StackPtr = TRI->getStackRegister(); |
61 | } |
62 | |
63 | bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { |
64 | return !MF.getFrameInfo().hasVarSizedObjects() && |
65 | !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() && |
66 | !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall(); |
67 | } |
68 | |
69 | /// canSimplifyCallFramePseudos - If there is a reserved call frame, the |
70 | /// call frame pseudos can be simplified. Having a FP, as in the default |
71 | /// implementation, is not sufficient here since we can't always use it. |
72 | /// Use a more nuanced condition. |
73 | bool X86FrameLowering::canSimplifyCallFramePseudos( |
74 | const MachineFunction &MF) const { |
75 | return hasReservedCallFrame(MF) || |
76 | MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() || |
77 | (hasFP(MF) && !TRI->hasStackRealignment(MF)) || |
78 | TRI->hasBasePointer(MF); |
79 | } |
80 | |
81 | // needsFrameIndexResolution - Do we need to perform FI resolution for |
82 | // this function. Normally, this is required only when the function |
83 | // has any stack objects. However, FI resolution actually has another job, |
84 | // not apparent from the title - it resolves callframesetup/destroy |
85 | // that were not simplified earlier. |
86 | // So, this is required for x86 functions that have push sequences even |
87 | // when there are no stack objects. |
88 | bool X86FrameLowering::needsFrameIndexResolution( |
89 | const MachineFunction &MF) const { |
90 | return MF.getFrameInfo().hasStackObjects() || |
91 | MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); |
92 | } |
93 | |
94 | /// hasFP - Return true if the specified function should have a dedicated frame |
95 | /// pointer register. This is true if the function has variable sized allocas |
96 | /// or if frame pointer elimination is disabled. |
97 | bool X86FrameLowering::hasFP(const MachineFunction &MF) const { |
98 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
99 | return (MF.getTarget().Options.DisableFramePointerElim(MF) || |
100 | TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() || |
101 | MFI.isFrameAddressTaken() || MFI.hasOpaqueSPAdjustment() || |
102 | MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || |
103 | MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() || |
104 | MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() || |
105 | MFI.hasStackMap() || MFI.hasPatchPoint() || |
106 | (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment())); |
107 | } |
108 | |
109 | static unsigned getSUBriOpcode(bool IsLP64) { |
110 | return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri; |
111 | } |
112 | |
113 | static unsigned getADDriOpcode(bool IsLP64) { |
114 | return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri; |
115 | } |
116 | |
117 | static unsigned getSUBrrOpcode(bool IsLP64) { |
118 | return IsLP64 ? X86::SUB64rr : X86::SUB32rr; |
119 | } |
120 | |
121 | static unsigned getADDrrOpcode(bool IsLP64) { |
122 | return IsLP64 ? X86::ADD64rr : X86::ADD32rr; |
123 | } |
124 | |
125 | static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) { |
126 | return IsLP64 ? X86::AND64ri32 : X86::AND32ri; |
127 | } |
128 | |
129 | static unsigned getLEArOpcode(bool IsLP64) { |
130 | return IsLP64 ? X86::LEA64r : X86::LEA32r; |
131 | } |
132 | |
133 | static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) { |
134 | if (Use64BitReg) { |
135 | if (isUInt<32>(x: Imm)) |
136 | return X86::MOV32ri64; |
137 | if (isInt<32>(x: Imm)) |
138 | return X86::MOV64ri32; |
139 | return X86::MOV64ri; |
140 | } |
141 | return X86::MOV32ri; |
142 | } |
143 | |
144 | // Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the |
145 | // value written by the PUSH from the stack. The processor tracks these marked |
146 | // instructions internally and fast-forwards register data between matching PUSH |
147 | // and POP instructions, without going through memory or through the training |
148 | // loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient |
149 | // memory-renaming optimization can be used. |
150 | // |
151 | // The PPX hint is purely a performance hint. Instructions with this hint have |
152 | // the same functional semantics as those without. PPX hints set by the |
153 | // compiler that violate the balancing rule may turn off the PPX optimization, |
154 | // but they will not affect program semantics. |
155 | // |
156 | // Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp |
157 | // are not considered). |
158 | // |
159 | // PUSH2 and POP2 are instructions for (respectively) pushing/popping 2 |
160 | // GPRs at a time to/from the stack. |
161 | static unsigned getPUSHOpcode(const X86Subtarget &ST) { |
162 | return ST.is64Bit() ? (ST.hasPPX() ? X86::PUSHP64r : X86::PUSH64r) |
163 | : X86::PUSH32r; |
164 | } |
165 | static unsigned getPOPOpcode(const X86Subtarget &ST) { |
166 | return ST.is64Bit() ? (ST.hasPPX() ? X86::POPP64r : X86::POP64r) |
167 | : X86::POP32r; |
168 | } |
169 | static unsigned getPUSH2Opcode(const X86Subtarget &ST) { |
170 | return ST.hasPPX() ? X86::PUSH2P : X86::PUSH2; |
171 | } |
172 | static unsigned getPOP2Opcode(const X86Subtarget &ST) { |
173 | return ST.hasPPX() ? X86::POP2P : X86::POP2; |
174 | } |
175 | |
176 | static bool isEAXLiveIn(MachineBasicBlock &MBB) { |
177 | for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) { |
178 | unsigned Reg = RegMask.PhysReg; |
179 | |
180 | if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX || |
181 | Reg == X86::AH || Reg == X86::AL) |
182 | return true; |
183 | } |
184 | |
185 | return false; |
186 | } |
187 | |
188 | /// Check if the flags need to be preserved before the terminators. |
189 | /// This would be the case, if the eflags is live-in of the region |
190 | /// composed by the terminators or live-out of that region, without |
191 | /// being defined by a terminator. |
192 | static bool |
193 | flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) { |
194 | for (const MachineInstr &MI : MBB.terminators()) { |
195 | bool BreakNext = false; |
196 | for (const MachineOperand &MO : MI.operands()) { |
197 | if (!MO.isReg()) |
198 | continue; |
199 | Register Reg = MO.getReg(); |
200 | if (Reg != X86::EFLAGS) |
201 | continue; |
202 | |
203 | // This terminator needs an eflags that is not defined |
204 | // by a previous another terminator: |
205 | // EFLAGS is live-in of the region composed by the terminators. |
206 | if (!MO.isDef()) |
207 | return true; |
208 | // This terminator defines the eflags, i.e., we don't need to preserve it. |
209 | // However, we still need to check this specific terminator does not |
210 | // read a live-in value. |
211 | BreakNext = true; |
212 | } |
213 | // We found a definition of the eflags, no need to preserve them. |
214 | if (BreakNext) |
215 | return false; |
216 | } |
217 | |
218 | // None of the terminators use or define the eflags. |
219 | // Check if they are live-out, that would imply we need to preserve them. |
220 | for (const MachineBasicBlock *Succ : MBB.successors()) |
221 | if (Succ->isLiveIn(Reg: X86::EFLAGS)) |
222 | return true; |
223 | |
224 | return false; |
225 | } |
226 | |
227 | /// emitSPUpdate - Emit a series of instructions to increment / decrement the |
228 | /// stack pointer by a constant value. |
229 | void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, |
230 | MachineBasicBlock::iterator &MBBI, |
231 | const DebugLoc &DL, int64_t NumBytes, |
232 | bool InEpilogue) const { |
233 | bool isSub = NumBytes < 0; |
234 | uint64_t Offset = isSub ? -NumBytes : NumBytes; |
235 | MachineInstr::MIFlag Flag = |
236 | isSub ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy; |
237 | |
238 | uint64_t Chunk = (1LL << 31) - 1; |
239 | |
240 | MachineFunction &MF = *MBB.getParent(); |
241 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
242 | const X86TargetLowering &TLI = *STI.getTargetLowering(); |
243 | const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF); |
244 | |
245 | // It's ok to not take into account large chunks when probing, as the |
246 | // allocation is split in smaller chunks anyway. |
247 | if (EmitInlineStackProbe && !InEpilogue) { |
248 | |
249 | // This pseudo-instruction is going to be expanded, potentially using a |
250 | // loop, by inlineStackProbe(). |
251 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::STACKALLOC_W_PROBING)).addImm(Val: Offset); |
252 | return; |
253 | } else if (Offset > Chunk) { |
254 | // Rather than emit a long series of instructions for large offsets, |
255 | // load the offset into a register and do one sub/add |
256 | unsigned Reg = 0; |
257 | unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX); |
258 | |
259 | if (isSub && !isEAXLiveIn(MBB)) |
260 | Reg = Rax; |
261 | else |
262 | Reg = TRI->findDeadCallerSavedReg(MBB, MBBI); |
263 | |
264 | unsigned AddSubRROpc = |
265 | isSub ? getSUBrrOpcode(IsLP64: Is64Bit) : getADDrrOpcode(IsLP64: Is64Bit); |
266 | if (Reg) { |
267 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: getMOVriOpcode(Use64BitReg: Is64Bit, Imm: Offset)), DestReg: Reg) |
268 | .addImm(Val: Offset) |
269 | .setMIFlag(Flag); |
270 | MachineInstr *MI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: AddSubRROpc), DestReg: StackPtr) |
271 | .addReg(RegNo: StackPtr) |
272 | .addReg(RegNo: Reg); |
273 | MI->getOperand(i: 3).setIsDead(); // The EFLAGS implicit def is dead. |
274 | return; |
275 | } else if (Offset > 8 * Chunk) { |
276 | // If we would need more than 8 add or sub instructions (a >16GB stack |
277 | // frame), it's worth spilling RAX to materialize this immediate. |
278 | // pushq %rax |
279 | // movabsq +-$Offset+-SlotSize, %rax |
280 | // addq %rsp, %rax |
281 | // xchg %rax, (%rsp) |
282 | // movq (%rsp), %rsp |
283 | assert(Is64Bit && "can't have 32-bit 16GB stack frame" ); |
284 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH64r)) |
285 | .addReg(RegNo: Rax, flags: RegState::Kill) |
286 | .setMIFlag(Flag); |
287 | // Subtract is not commutative, so negate the offset and always use add. |
288 | // Subtract 8 less and add 8 more to account for the PUSH we just did. |
289 | if (isSub) |
290 | Offset = -(Offset - SlotSize); |
291 | else |
292 | Offset = Offset + SlotSize; |
293 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: getMOVriOpcode(Use64BitReg: Is64Bit, Imm: Offset)), DestReg: Rax) |
294 | .addImm(Val: Offset) |
295 | .setMIFlag(Flag); |
296 | MachineInstr *MI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::ADD64rr), DestReg: Rax) |
297 | .addReg(RegNo: Rax) |
298 | .addReg(RegNo: StackPtr); |
299 | MI->getOperand(i: 3).setIsDead(); // The EFLAGS implicit def is dead. |
300 | // Exchange the new SP in RAX with the top of the stack. |
301 | addRegOffset( |
302 | MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::XCHG64rm), DestReg: Rax).addReg(RegNo: Rax), |
303 | Reg: StackPtr, isKill: false, Offset: 0); |
304 | // Load new SP from the top of the stack into RSP. |
305 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64rm), DestReg: StackPtr), |
306 | Reg: StackPtr, isKill: false, Offset: 0); |
307 | return; |
308 | } |
309 | } |
310 | |
311 | while (Offset) { |
312 | uint64_t ThisVal = std::min(a: Offset, b: Chunk); |
313 | if (ThisVal == SlotSize) { |
314 | // Use push / pop for slot sized adjustments as a size optimization. We |
315 | // need to find a dead register when using pop. |
316 | unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX) |
317 | : TRI->findDeadCallerSavedReg(MBB, MBBI); |
318 | if (Reg) { |
319 | unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r) |
320 | : (Is64Bit ? X86::POP64r : X86::POP32r); |
321 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc)) |
322 | .addReg(RegNo: Reg, flags: getDefRegState(B: !isSub) | getUndefRegState(B: isSub)) |
323 | .setMIFlag(Flag); |
324 | Offset -= ThisVal; |
325 | continue; |
326 | } |
327 | } |
328 | |
329 | BuildStackAdjustment(MBB, MBBI, DL, Offset: isSub ? -ThisVal : ThisVal, InEpilogue) |
330 | .setMIFlag(Flag); |
331 | |
332 | Offset -= ThisVal; |
333 | } |
334 | } |
335 | |
336 | MachineInstrBuilder X86FrameLowering::BuildStackAdjustment( |
337 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
338 | const DebugLoc &DL, int64_t Offset, bool InEpilogue) const { |
339 | assert(Offset != 0 && "zero offset stack adjustment requested" ); |
340 | |
341 | // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue |
342 | // is tricky. |
343 | bool UseLEA; |
344 | if (!InEpilogue) { |
345 | // Check if inserting the prologue at the beginning |
346 | // of MBB would require to use LEA operations. |
347 | // We need to use LEA operations if EFLAGS is live in, because |
348 | // it means an instruction will read it before it gets defined. |
349 | UseLEA = STI.useLeaForSP() || MBB.isLiveIn(Reg: X86::EFLAGS); |
350 | } else { |
351 | // If we can use LEA for SP but we shouldn't, check that none |
352 | // of the terminators uses the eflags. Otherwise we will insert |
353 | // a ADD that will redefine the eflags and break the condition. |
354 | // Alternatively, we could move the ADD, but this may not be possible |
355 | // and is an optimization anyway. |
356 | UseLEA = canUseLEAForSPInEpilogue(MF: *MBB.getParent()); |
357 | if (UseLEA && !STI.useLeaForSP()) |
358 | UseLEA = flagsNeedToBePreservedBeforeTheTerminators(MBB); |
359 | // If that assert breaks, that means we do not do the right thing |
360 | // in canUseAsEpilogue. |
361 | assert((UseLEA || !flagsNeedToBePreservedBeforeTheTerminators(MBB)) && |
362 | "We shouldn't have allowed this insertion point" ); |
363 | } |
364 | |
365 | MachineInstrBuilder MI; |
366 | if (UseLEA) { |
367 | MI = addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, |
368 | MCID: TII.get(Opcode: getLEArOpcode(IsLP64: Uses64BitFramePtr)), |
369 | DestReg: StackPtr), |
370 | Reg: StackPtr, isKill: false, Offset); |
371 | } else { |
372 | bool IsSub = Offset < 0; |
373 | uint64_t AbsOffset = IsSub ? -Offset : Offset; |
374 | const unsigned Opc = IsSub ? getSUBriOpcode(IsLP64: Uses64BitFramePtr) |
375 | : getADDriOpcode(IsLP64: Uses64BitFramePtr); |
376 | MI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: StackPtr) |
377 | .addReg(RegNo: StackPtr) |
378 | .addImm(Val: AbsOffset); |
379 | MI->getOperand(i: 3).setIsDead(); // The EFLAGS implicit def is dead. |
380 | } |
381 | return MI; |
382 | } |
383 | |
384 | int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, |
385 | MachineBasicBlock::iterator &MBBI, |
386 | bool doMergeWithPrevious) const { |
387 | if ((doMergeWithPrevious && MBBI == MBB.begin()) || |
388 | (!doMergeWithPrevious && MBBI == MBB.end())) |
389 | return 0; |
390 | |
391 | MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(x: MBBI) : MBBI; |
392 | |
393 | PI = skipDebugInstructionsBackward(It: PI, Begin: MBB.begin()); |
394 | // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI |
395 | // instruction, and that there are no DBG_VALUE or other instructions between |
396 | // ADD/SUB/LEA and its corresponding CFI instruction. |
397 | /* TODO: Add support for the case where there are multiple CFI instructions |
398 | below the ADD/SUB/LEA, e.g.: |
399 | ... |
400 | add |
401 | cfi_def_cfa_offset |
402 | cfi_offset |
403 | ... |
404 | */ |
405 | if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction()) |
406 | PI = std::prev(x: PI); |
407 | |
408 | unsigned Opc = PI->getOpcode(); |
409 | int Offset = 0; |
410 | |
411 | if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) && |
412 | PI->getOperand(i: 0).getReg() == StackPtr) { |
413 | assert(PI->getOperand(1).getReg() == StackPtr); |
414 | Offset = PI->getOperand(i: 2).getImm(); |
415 | } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) && |
416 | PI->getOperand(i: 0).getReg() == StackPtr && |
417 | PI->getOperand(i: 1).getReg() == StackPtr && |
418 | PI->getOperand(i: 2).getImm() == 1 && |
419 | PI->getOperand(i: 3).getReg() == X86::NoRegister && |
420 | PI->getOperand(i: 5).getReg() == X86::NoRegister) { |
421 | // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg. |
422 | Offset = PI->getOperand(i: 4).getImm(); |
423 | } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) && |
424 | PI->getOperand(i: 0).getReg() == StackPtr) { |
425 | assert(PI->getOperand(1).getReg() == StackPtr); |
426 | Offset = -PI->getOperand(i: 2).getImm(); |
427 | } else |
428 | return 0; |
429 | |
430 | PI = MBB.erase(I: PI); |
431 | if (PI != MBB.end() && PI->isCFIInstruction()) { |
432 | auto CIs = MBB.getParent()->getFrameInstructions(); |
433 | MCCFIInstruction CI = CIs[PI->getOperand(i: 0).getCFIIndex()]; |
434 | if (CI.getOperation() == MCCFIInstruction::OpDefCfaOffset || |
435 | CI.getOperation() == MCCFIInstruction::OpAdjustCfaOffset) |
436 | PI = MBB.erase(I: PI); |
437 | } |
438 | if (!doMergeWithPrevious) |
439 | MBBI = skipDebugInstructionsForward(It: PI, End: MBB.end()); |
440 | |
441 | return Offset; |
442 | } |
443 | |
444 | void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB, |
445 | MachineBasicBlock::iterator MBBI, |
446 | const DebugLoc &DL, |
447 | const MCCFIInstruction &CFIInst, |
448 | MachineInstr::MIFlag Flag) const { |
449 | MachineFunction &MF = *MBB.getParent(); |
450 | unsigned CFIIndex = MF.addFrameInst(Inst: CFIInst); |
451 | |
452 | if (CFIInst.getOperation() == MCCFIInstruction::OpAdjustCfaOffset) |
453 | MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true); |
454 | |
455 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
456 | .addCFIIndex(CFIIndex) |
457 | .setMIFlag(Flag); |
458 | } |
459 | |
460 | /// Emits Dwarf Info specifying offsets of callee saved registers and |
461 | /// frame pointer. This is called only when basic block sections are enabled. |
462 | void X86FrameLowering::emitCalleeSavedFrameMovesFullCFA( |
463 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { |
464 | MachineFunction &MF = *MBB.getParent(); |
465 | if (!hasFP(MF)) { |
466 | emitCalleeSavedFrameMoves(MBB, MBBI, DL: DebugLoc{}, IsPrologue: true); |
467 | return; |
468 | } |
469 | const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo(); |
470 | const Register FramePtr = TRI->getFrameRegister(MF); |
471 | const Register MachineFramePtr = |
472 | STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(Reg: FramePtr, Size: 64)) |
473 | : FramePtr; |
474 | unsigned DwarfReg = MRI->getDwarfRegNum(RegNum: MachineFramePtr, isEH: true); |
475 | // Offset = space for return address + size of the frame pointer itself. |
476 | int64_t Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4); |
477 | BuildCFI(MBB, MBBI, DL: DebugLoc{}, |
478 | CFIInst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfReg, Offset: -Offset)); |
479 | emitCalleeSavedFrameMoves(MBB, MBBI, DL: DebugLoc{}, IsPrologue: true); |
480 | } |
481 | |
482 | void X86FrameLowering::emitCalleeSavedFrameMoves( |
483 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
484 | const DebugLoc &DL, bool IsPrologue) const { |
485 | MachineFunction &MF = *MBB.getParent(); |
486 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
487 | const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo(); |
488 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
489 | |
490 | // Add callee saved registers to move list. |
491 | const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); |
492 | |
493 | // Calculate offsets. |
494 | for (const CalleeSavedInfo &I : CSI) { |
495 | int64_t Offset = MFI.getObjectOffset(ObjectIdx: I.getFrameIdx()); |
496 | Register Reg = I.getReg(); |
497 | unsigned DwarfReg = MRI->getDwarfRegNum(RegNum: Reg, isEH: true); |
498 | |
499 | if (IsPrologue) { |
500 | if (X86FI->getStackPtrSaveMI()) { |
501 | // +2*SlotSize because there is return address and ebp at the bottom |
502 | // of the stack. |
503 | // | retaddr | |
504 | // | ebp | |
505 | // | |<--ebp |
506 | Offset += 2 * SlotSize; |
507 | SmallString<64> CfaExpr; |
508 | CfaExpr.push_back(Elt: dwarf::DW_CFA_expression); |
509 | uint8_t buffer[16]; |
510 | CfaExpr.append(in_start: buffer, in_end: buffer + encodeULEB128(Value: DwarfReg, p: buffer)); |
511 | CfaExpr.push_back(Elt: 2); |
512 | Register FramePtr = TRI->getFrameRegister(MF); |
513 | const Register MachineFramePtr = |
514 | STI.isTarget64BitILP32() |
515 | ? Register(getX86SubSuperRegister(Reg: FramePtr, Size: 64)) |
516 | : FramePtr; |
517 | unsigned DwarfFramePtr = MRI->getDwarfRegNum(RegNum: MachineFramePtr, isEH: true); |
518 | CfaExpr.push_back(Elt: (uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr)); |
519 | CfaExpr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: Offset, p: buffer)); |
520 | BuildCFI(MBB, MBBI, DL, |
521 | CFIInst: MCCFIInstruction::createEscape(L: nullptr, Vals: CfaExpr.str()), |
522 | Flag: MachineInstr::FrameSetup); |
523 | } else { |
524 | BuildCFI(MBB, MBBI, DL, |
525 | CFIInst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfReg, Offset)); |
526 | } |
527 | } else { |
528 | BuildCFI(MBB, MBBI, DL, |
529 | CFIInst: MCCFIInstruction::createRestore(L: nullptr, Register: DwarfReg)); |
530 | } |
531 | } |
532 | if (auto *MI = X86FI->getStackPtrSaveMI()) { |
533 | int FI = MI->getOperand(i: 1).getIndex(); |
534 | int64_t Offset = MFI.getObjectOffset(ObjectIdx: FI) + 2 * SlotSize; |
535 | SmallString<64> CfaExpr; |
536 | Register FramePtr = TRI->getFrameRegister(MF); |
537 | const Register MachineFramePtr = |
538 | STI.isTarget64BitILP32() |
539 | ? Register(getX86SubSuperRegister(Reg: FramePtr, Size: 64)) |
540 | : FramePtr; |
541 | unsigned DwarfFramePtr = MRI->getDwarfRegNum(RegNum: MachineFramePtr, isEH: true); |
542 | CfaExpr.push_back(Elt: (uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr)); |
543 | uint8_t buffer[16]; |
544 | CfaExpr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: Offset, p: buffer)); |
545 | CfaExpr.push_back(Elt: dwarf::DW_OP_deref); |
546 | |
547 | SmallString<64> DefCfaExpr; |
548 | DefCfaExpr.push_back(Elt: dwarf::DW_CFA_def_cfa_expression); |
549 | DefCfaExpr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: CfaExpr.size(), p: buffer)); |
550 | DefCfaExpr.append(RHS: CfaExpr.str()); |
551 | // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref |
552 | BuildCFI(MBB, MBBI, DL, |
553 | CFIInst: MCCFIInstruction::createEscape(L: nullptr, Vals: DefCfaExpr.str()), |
554 | Flag: MachineInstr::FrameSetup); |
555 | } |
556 | } |
557 | |
558 | void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, |
559 | MachineBasicBlock &MBB) const { |
560 | const MachineFunction &MF = *MBB.getParent(); |
561 | |
562 | // Insertion point. |
563 | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); |
564 | |
565 | // Fake a debug loc. |
566 | DebugLoc DL; |
567 | if (MBBI != MBB.end()) |
568 | DL = MBBI->getDebugLoc(); |
569 | |
570 | // Zero out FP stack if referenced. Do this outside of the loop below so that |
571 | // it's done only once. |
572 | const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); |
573 | for (MCRegister Reg : RegsToZero.set_bits()) { |
574 | if (!X86::RFP80RegClass.contains(Reg)) |
575 | continue; |
576 | |
577 | unsigned NumFPRegs = ST.is64Bit() ? 8 : 7; |
578 | for (unsigned i = 0; i != NumFPRegs; ++i) |
579 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::LD_F0)); |
580 | |
581 | for (unsigned i = 0; i != NumFPRegs; ++i) |
582 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::ST_FPrr)).addReg(RegNo: X86::ST0); |
583 | break; |
584 | } |
585 | |
586 | // For GPRs, we only care to clear out the 32-bit register. |
587 | BitVector GPRsToZero(TRI->getNumRegs()); |
588 | for (MCRegister Reg : RegsToZero.set_bits()) |
589 | if (TRI->isGeneralPurposeRegister(MF, Reg)) { |
590 | GPRsToZero.set(getX86SubSuperRegister(Reg, Size: 32)); |
591 | RegsToZero.reset(Idx: Reg); |
592 | } |
593 | |
594 | // Zero out the GPRs first. |
595 | for (MCRegister Reg : GPRsToZero.set_bits()) |
596 | TII.buildClearRegister(Reg, MBB, Iter: MBBI, DL); |
597 | |
598 | // Zero out the remaining registers. |
599 | for (MCRegister Reg : RegsToZero.set_bits()) |
600 | TII.buildClearRegister(Reg, MBB, Iter: MBBI, DL); |
601 | } |
602 | |
603 | void X86FrameLowering::emitStackProbe( |
604 | MachineFunction &MF, MachineBasicBlock &MBB, |
605 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, |
606 | std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const { |
607 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
608 | if (STI.isTargetWindowsCoreCLR()) { |
609 | if (InProlog) { |
610 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::STACKALLOC_W_PROBING)) |
611 | .addImm(Val: 0 /* no explicit stack size */); |
612 | } else { |
613 | emitStackProbeInline(MF, MBB, MBBI, DL, InProlog: false); |
614 | } |
615 | } else { |
616 | emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum); |
617 | } |
618 | } |
619 | |
620 | bool X86FrameLowering::stackProbeFunctionModifiesSP() const { |
621 | return STI.isOSWindows() && !STI.isTargetWin64(); |
622 | } |
623 | |
624 | void X86FrameLowering::inlineStackProbe(MachineFunction &MF, |
625 | MachineBasicBlock &PrologMBB) const { |
626 | auto Where = llvm::find_if(Range&: PrologMBB, P: [](MachineInstr &MI) { |
627 | return MI.getOpcode() == X86::STACKALLOC_W_PROBING; |
628 | }); |
629 | if (Where != PrologMBB.end()) { |
630 | DebugLoc DL = PrologMBB.findDebugLoc(MBBI: Where); |
631 | emitStackProbeInline(MF, MBB&: PrologMBB, MBBI: Where, DL, InProlog: true); |
632 | Where->eraseFromParent(); |
633 | } |
634 | } |
635 | |
636 | void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, |
637 | MachineBasicBlock &MBB, |
638 | MachineBasicBlock::iterator MBBI, |
639 | const DebugLoc &DL, |
640 | bool InProlog) const { |
641 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
642 | if (STI.isTargetWindowsCoreCLR() && STI.is64Bit()) |
643 | emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog); |
644 | else |
645 | emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog); |
646 | } |
647 | |
648 | void X86FrameLowering::emitStackProbeInlineGeneric( |
649 | MachineFunction &MF, MachineBasicBlock &MBB, |
650 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const { |
651 | MachineInstr &AllocWithProbe = *MBBI; |
652 | uint64_t Offset = AllocWithProbe.getOperand(i: 0).getImm(); |
653 | |
654 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
655 | const X86TargetLowering &TLI = *STI.getTargetLowering(); |
656 | assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) && |
657 | "different expansion expected for CoreCLR 64 bit" ); |
658 | |
659 | const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); |
660 | uint64_t ProbeChunk = StackProbeSize * 8; |
661 | |
662 | uint64_t MaxAlign = |
663 | TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0; |
664 | |
665 | // Synthesize a loop or unroll it, depending on the number of iterations. |
666 | // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left |
667 | // between the unaligned rsp and current rsp. |
668 | if (Offset > ProbeChunk) { |
669 | emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset, |
670 | Align: MaxAlign % StackProbeSize); |
671 | } else { |
672 | emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset, |
673 | Align: MaxAlign % StackProbeSize); |
674 | } |
675 | } |
676 | |
677 | void X86FrameLowering::emitStackProbeInlineGenericBlock( |
678 | MachineFunction &MF, MachineBasicBlock &MBB, |
679 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset, |
680 | uint64_t AlignOffset) const { |
681 | |
682 | const bool NeedsDwarfCFI = needsDwarfCFI(MF); |
683 | const bool HasFP = hasFP(MF); |
684 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
685 | const X86TargetLowering &TLI = *STI.getTargetLowering(); |
686 | const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; |
687 | const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); |
688 | |
689 | uint64_t CurrentOffset = 0; |
690 | |
691 | assert(AlignOffset < StackProbeSize); |
692 | |
693 | // If the offset is so small it fits within a page, there's nothing to do. |
694 | if (StackProbeSize < Offset + AlignOffset) { |
695 | |
696 | uint64_t StackAdjustment = StackProbeSize - AlignOffset; |
697 | BuildStackAdjustment(MBB, MBBI, DL, Offset: -StackAdjustment, /*InEpilogue=*/false) |
698 | .setMIFlag(MachineInstr::FrameSetup); |
699 | if (!HasFP && NeedsDwarfCFI) { |
700 | BuildCFI( |
701 | MBB, MBBI, DL, |
702 | CFIInst: MCCFIInstruction::createAdjustCfaOffset(L: nullptr, Adjustment: StackAdjustment)); |
703 | } |
704 | |
705 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: MovMIOpc)) |
706 | .setMIFlag(MachineInstr::FrameSetup), |
707 | Reg: StackPtr, isKill: false, Offset: 0) |
708 | .addImm(Val: 0) |
709 | .setMIFlag(MachineInstr::FrameSetup); |
710 | NumFrameExtraProbe++; |
711 | CurrentOffset = StackProbeSize - AlignOffset; |
712 | } |
713 | |
714 | // For the next N - 1 pages, just probe. I tried to take advantage of |
715 | // natural probes but it implies much more logic and there was very few |
716 | // interesting natural probes to interleave. |
717 | while (CurrentOffset + StackProbeSize < Offset) { |
718 | BuildStackAdjustment(MBB, MBBI, DL, Offset: -StackProbeSize, /*InEpilogue=*/false) |
719 | .setMIFlag(MachineInstr::FrameSetup); |
720 | |
721 | if (!HasFP && NeedsDwarfCFI) { |
722 | BuildCFI( |
723 | MBB, MBBI, DL, |
724 | CFIInst: MCCFIInstruction::createAdjustCfaOffset(L: nullptr, Adjustment: StackProbeSize)); |
725 | } |
726 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: MovMIOpc)) |
727 | .setMIFlag(MachineInstr::FrameSetup), |
728 | Reg: StackPtr, isKill: false, Offset: 0) |
729 | .addImm(Val: 0) |
730 | .setMIFlag(MachineInstr::FrameSetup); |
731 | NumFrameExtraProbe++; |
732 | CurrentOffset += StackProbeSize; |
733 | } |
734 | |
735 | // No need to probe the tail, it is smaller than a Page. |
736 | uint64_t ChunkSize = Offset - CurrentOffset; |
737 | if (ChunkSize == SlotSize) { |
738 | // Use push for slot sized adjustments as a size optimization, |
739 | // like emitSPUpdate does when not probing. |
740 | unsigned Reg = Is64Bit ? X86::RAX : X86::EAX; |
741 | unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r; |
742 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc)) |
743 | .addReg(RegNo: Reg, flags: RegState::Undef) |
744 | .setMIFlag(MachineInstr::FrameSetup); |
745 | } else { |
746 | BuildStackAdjustment(MBB, MBBI, DL, Offset: -ChunkSize, /*InEpilogue=*/false) |
747 | .setMIFlag(MachineInstr::FrameSetup); |
748 | } |
749 | // No need to adjust Dwarf CFA offset here, the last position of the stack has |
750 | // been defined |
751 | } |
752 | |
753 | void X86FrameLowering::emitStackProbeInlineGenericLoop( |
754 | MachineFunction &MF, MachineBasicBlock &MBB, |
755 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset, |
756 | uint64_t AlignOffset) const { |
757 | assert(Offset && "null offset" ); |
758 | |
759 | assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) != |
760 | MachineBasicBlock::LQR_Live && |
761 | "Inline stack probe loop will clobber live EFLAGS." ); |
762 | |
763 | const bool NeedsDwarfCFI = needsDwarfCFI(MF); |
764 | const bool HasFP = hasFP(MF); |
765 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
766 | const X86TargetLowering &TLI = *STI.getTargetLowering(); |
767 | const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; |
768 | const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); |
769 | |
770 | if (AlignOffset) { |
771 | if (AlignOffset < StackProbeSize) { |
772 | // Perform a first smaller allocation followed by a probe. |
773 | BuildStackAdjustment(MBB, MBBI, DL, Offset: -AlignOffset, /*InEpilogue=*/false) |
774 | .setMIFlag(MachineInstr::FrameSetup); |
775 | |
776 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: MovMIOpc)) |
777 | .setMIFlag(MachineInstr::FrameSetup), |
778 | Reg: StackPtr, isKill: false, Offset: 0) |
779 | .addImm(Val: 0) |
780 | .setMIFlag(MachineInstr::FrameSetup); |
781 | NumFrameExtraProbe++; |
782 | Offset -= AlignOffset; |
783 | } |
784 | } |
785 | |
786 | // Synthesize a loop |
787 | NumFrameLoopProbe++; |
788 | const BasicBlock *LLVM_BB = MBB.getBasicBlock(); |
789 | |
790 | MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(BB: LLVM_BB); |
791 | MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(BB: LLVM_BB); |
792 | |
793 | MachineFunction::iterator MBBIter = ++MBB.getIterator(); |
794 | MF.insert(MBBI: MBBIter, MBB: testMBB); |
795 | MF.insert(MBBI: MBBIter, MBB: tailMBB); |
796 | |
797 | Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 |
798 | : Is64Bit ? X86::R11D |
799 | : X86::EAX; |
800 | |
801 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: FinalStackProbed) |
802 | .addReg(RegNo: StackPtr) |
803 | .setMIFlag(MachineInstr::FrameSetup); |
804 | |
805 | // save loop bound |
806 | { |
807 | const unsigned BoundOffset = alignDown(Value: Offset, Align: StackProbeSize); |
808 | const unsigned SUBOpc = getSUBriOpcode(IsLP64: Uses64BitFramePtr); |
809 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: SUBOpc), DestReg: FinalStackProbed) |
810 | .addReg(RegNo: FinalStackProbed) |
811 | .addImm(Val: BoundOffset) |
812 | .setMIFlag(MachineInstr::FrameSetup); |
813 | |
814 | // while in the loop, use loop-invariant reg for CFI, |
815 | // instead of the stack pointer, which changes during the loop |
816 | if (!HasFP && NeedsDwarfCFI) { |
817 | // x32 uses the same DWARF register numbers as x86-64, |
818 | // so there isn't a register number for r11d, we must use r11 instead |
819 | const Register DwarfFinalStackProbed = |
820 | STI.isTarget64BitILP32() |
821 | ? Register(getX86SubSuperRegister(Reg: FinalStackProbed, Size: 64)) |
822 | : FinalStackProbed; |
823 | |
824 | BuildCFI(MBB, MBBI, DL, |
825 | CFIInst: MCCFIInstruction::createDefCfaRegister( |
826 | L: nullptr, Register: TRI->getDwarfRegNum(RegNum: DwarfFinalStackProbed, isEH: true))); |
827 | BuildCFI(MBB, MBBI, DL, |
828 | CFIInst: MCCFIInstruction::createAdjustCfaOffset(L: nullptr, Adjustment: BoundOffset)); |
829 | } |
830 | } |
831 | |
832 | // allocate a page |
833 | BuildStackAdjustment(MBB&: *testMBB, MBBI: testMBB->end(), DL, Offset: -StackProbeSize, |
834 | /*InEpilogue=*/false) |
835 | .setMIFlag(MachineInstr::FrameSetup); |
836 | |
837 | // touch the page |
838 | addRegOffset(MIB: BuildMI(BB: testMBB, MIMD: DL, MCID: TII.get(Opcode: MovMIOpc)) |
839 | .setMIFlag(MachineInstr::FrameSetup), |
840 | Reg: StackPtr, isKill: false, Offset: 0) |
841 | .addImm(Val: 0) |
842 | .setMIFlag(MachineInstr::FrameSetup); |
843 | |
844 | // cmp with stack pointer bound |
845 | BuildMI(BB: testMBB, MIMD: DL, MCID: TII.get(Opcode: Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) |
846 | .addReg(RegNo: StackPtr) |
847 | .addReg(RegNo: FinalStackProbed) |
848 | .setMIFlag(MachineInstr::FrameSetup); |
849 | |
850 | // jump |
851 | BuildMI(BB: testMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1)) |
852 | .addMBB(MBB: testMBB) |
853 | .addImm(Val: X86::COND_NE) |
854 | .setMIFlag(MachineInstr::FrameSetup); |
855 | testMBB->addSuccessor(Succ: testMBB); |
856 | testMBB->addSuccessor(Succ: tailMBB); |
857 | |
858 | // BB management |
859 | tailMBB->splice(Where: tailMBB->end(), Other: &MBB, From: MBBI, To: MBB.end()); |
860 | tailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB); |
861 | MBB.addSuccessor(Succ: testMBB); |
862 | |
863 | // handle tail |
864 | const uint64_t TailOffset = Offset % StackProbeSize; |
865 | MachineBasicBlock::iterator TailMBBIter = tailMBB->begin(); |
866 | if (TailOffset) { |
867 | BuildStackAdjustment(MBB&: *tailMBB, MBBI: TailMBBIter, DL, Offset: -TailOffset, |
868 | /*InEpilogue=*/false) |
869 | .setMIFlag(MachineInstr::FrameSetup); |
870 | } |
871 | |
872 | // after the loop, switch back to stack pointer for CFI |
873 | if (!HasFP && NeedsDwarfCFI) { |
874 | // x32 uses the same DWARF register numbers as x86-64, |
875 | // so there isn't a register number for esp, we must use rsp instead |
876 | const Register DwarfStackPtr = |
877 | STI.isTarget64BitILP32() |
878 | ? Register(getX86SubSuperRegister(Reg: StackPtr, Size: 64)) |
879 | : Register(StackPtr); |
880 | |
881 | BuildCFI(MBB&: *tailMBB, MBBI: TailMBBIter, DL, |
882 | CFIInst: MCCFIInstruction::createDefCfaRegister( |
883 | L: nullptr, Register: TRI->getDwarfRegNum(RegNum: DwarfStackPtr, isEH: true))); |
884 | } |
885 | |
886 | // Update Live In information |
887 | fullyRecomputeLiveIns(MBBs: {tailMBB, testMBB}); |
888 | } |
889 | |
890 | void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64( |
891 | MachineFunction &MF, MachineBasicBlock &MBB, |
892 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const { |
893 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
894 | assert(STI.is64Bit() && "different expansion needed for 32 bit" ); |
895 | assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR" ); |
896 | const TargetInstrInfo &TII = *STI.getInstrInfo(); |
897 | const BasicBlock *LLVM_BB = MBB.getBasicBlock(); |
898 | |
899 | assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) != |
900 | MachineBasicBlock::LQR_Live && |
901 | "Inline stack probe loop will clobber live EFLAGS." ); |
902 | |
903 | // RAX contains the number of bytes of desired stack adjustment. |
904 | // The handling here assumes this value has already been updated so as to |
905 | // maintain stack alignment. |
906 | // |
907 | // We need to exit with RSP modified by this amount and execute suitable |
908 | // page touches to notify the OS that we're growing the stack responsibly. |
909 | // All stack probing must be done without modifying RSP. |
910 | // |
911 | // MBB: |
912 | // SizeReg = RAX; |
913 | // ZeroReg = 0 |
914 | // CopyReg = RSP |
915 | // Flags, TestReg = CopyReg - SizeReg |
916 | // FinalReg = !Flags.Ovf ? TestReg : ZeroReg |
917 | // LimitReg = gs magic thread env access |
918 | // if FinalReg >= LimitReg goto ContinueMBB |
919 | // RoundBB: |
920 | // RoundReg = page address of FinalReg |
921 | // LoopMBB: |
922 | // LoopReg = PHI(LimitReg,ProbeReg) |
923 | // ProbeReg = LoopReg - PageSize |
924 | // [ProbeReg] = 0 |
925 | // if (ProbeReg > RoundReg) goto LoopMBB |
926 | // ContinueMBB: |
927 | // RSP = RSP - RAX |
928 | // [rest of original MBB] |
929 | |
930 | // Set up the new basic blocks |
931 | MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(BB: LLVM_BB); |
932 | MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(BB: LLVM_BB); |
933 | MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(BB: LLVM_BB); |
934 | |
935 | MachineFunction::iterator MBBIter = std::next(x: MBB.getIterator()); |
936 | MF.insert(MBBI: MBBIter, MBB: RoundMBB); |
937 | MF.insert(MBBI: MBBIter, MBB: LoopMBB); |
938 | MF.insert(MBBI: MBBIter, MBB: ContinueMBB); |
939 | |
940 | // Split MBB and move the tail portion down to ContinueMBB. |
941 | MachineBasicBlock::iterator BeforeMBBI = std::prev(x: MBBI); |
942 | ContinueMBB->splice(Where: ContinueMBB->begin(), Other: &MBB, From: MBBI, To: MBB.end()); |
943 | ContinueMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB); |
944 | |
945 | // Some useful constants |
946 | const int64_t ThreadEnvironmentStackLimit = 0x10; |
947 | const int64_t PageSize = 0x1000; |
948 | const int64_t PageMask = ~(PageSize - 1); |
949 | |
950 | // Registers we need. For the normal case we use virtual |
951 | // registers. For the prolog expansion we use RAX, RCX and RDX. |
952 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
953 | const TargetRegisterClass *RegClass = &X86::GR64RegClass; |
954 | const Register |
955 | SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister(RegClass), |
956 | ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass), |
957 | CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass), |
958 | TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass), |
959 | FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass), |
960 | RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass), |
961 | LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass), |
962 | JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass), |
963 | ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass); |
964 | |
965 | // SP-relative offsets where we can save RCX and RDX. |
966 | int64_t RCXShadowSlot = 0; |
967 | int64_t RDXShadowSlot = 0; |
968 | |
969 | // If inlining in the prolog, save RCX and RDX. |
970 | if (InProlog) { |
971 | // Compute the offsets. We need to account for things already |
972 | // pushed onto the stack at this point: return address, frame |
973 | // pointer (if used), and callee saves. |
974 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
975 | const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize(); |
976 | const bool HasFP = hasFP(MF); |
977 | |
978 | // Check if we need to spill RCX and/or RDX. |
979 | // Here we assume that no earlier prologue instruction changes RCX and/or |
980 | // RDX, so checking the block live-ins is enough. |
981 | const bool IsRCXLiveIn = MBB.isLiveIn(Reg: X86::RCX); |
982 | const bool IsRDXLiveIn = MBB.isLiveIn(Reg: X86::RDX); |
983 | int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0); |
984 | // Assign the initial slot to both registers, then change RDX's slot if both |
985 | // need to be spilled. |
986 | if (IsRCXLiveIn) |
987 | RCXShadowSlot = InitSlot; |
988 | if (IsRDXLiveIn) |
989 | RDXShadowSlot = InitSlot; |
990 | if (IsRDXLiveIn && IsRCXLiveIn) |
991 | RDXShadowSlot += 8; |
992 | // Emit the saves if needed. |
993 | if (IsRCXLiveIn) |
994 | addRegOffset(MIB: BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64mr)), Reg: X86::RSP, isKill: false, |
995 | Offset: RCXShadowSlot) |
996 | .addReg(RegNo: X86::RCX); |
997 | if (IsRDXLiveIn) |
998 | addRegOffset(MIB: BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64mr)), Reg: X86::RSP, isKill: false, |
999 | Offset: RDXShadowSlot) |
1000 | .addReg(RegNo: X86::RDX); |
1001 | } else { |
1002 | // Not in the prolog. Copy RAX to a virtual reg. |
1003 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64rr), DestReg: SizeReg).addReg(RegNo: X86::RAX); |
1004 | } |
1005 | |
1006 | // Add code to MBB to check for overflow and set the new target stack pointer |
1007 | // to zero if so. |
1008 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::XOR64rr), DestReg: ZeroReg) |
1009 | .addReg(RegNo: ZeroReg, flags: RegState::Undef) |
1010 | .addReg(RegNo: ZeroReg, flags: RegState::Undef); |
1011 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64rr), DestReg: CopyReg).addReg(RegNo: X86::RSP); |
1012 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::SUB64rr), DestReg: TestReg) |
1013 | .addReg(RegNo: CopyReg) |
1014 | .addReg(RegNo: SizeReg); |
1015 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::CMOV64rr), DestReg: FinalReg) |
1016 | .addReg(RegNo: TestReg) |
1017 | .addReg(RegNo: ZeroReg) |
1018 | .addImm(Val: X86::COND_B); |
1019 | |
1020 | // FinalReg now holds final stack pointer value, or zero if |
1021 | // allocation would overflow. Compare against the current stack |
1022 | // limit from the thread environment block. Note this limit is the |
1023 | // lowest touched page on the stack, not the point at which the OS |
1024 | // will cause an overflow exception, so this is just an optimization |
1025 | // to avoid unnecessarily touching pages that are below the current |
1026 | // SP but already committed to the stack by the OS. |
1027 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64rm), DestReg: LimitReg) |
1028 | .addReg(RegNo: 0) |
1029 | .addImm(Val: 1) |
1030 | .addReg(RegNo: 0) |
1031 | .addImm(Val: ThreadEnvironmentStackLimit) |
1032 | .addReg(RegNo: X86::GS); |
1033 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::CMP64rr)).addReg(RegNo: FinalReg).addReg(RegNo: LimitReg); |
1034 | // Jump if the desired stack pointer is at or above the stack limit. |
1035 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1)) |
1036 | .addMBB(MBB: ContinueMBB) |
1037 | .addImm(Val: X86::COND_AE); |
1038 | |
1039 | // Add code to roundMBB to round the final stack pointer to a page boundary. |
1040 | RoundMBB->addLiveIn(PhysReg: FinalReg); |
1041 | BuildMI(BB: RoundMBB, MIMD: DL, MCID: TII.get(Opcode: X86::AND64ri32), DestReg: RoundedReg) |
1042 | .addReg(RegNo: FinalReg) |
1043 | .addImm(Val: PageMask); |
1044 | BuildMI(BB: RoundMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JMP_1)).addMBB(MBB: LoopMBB); |
1045 | |
1046 | // LimitReg now holds the current stack limit, RoundedReg page-rounded |
1047 | // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page |
1048 | // and probe until we reach RoundedReg. |
1049 | if (!InProlog) { |
1050 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII.get(Opcode: X86::PHI), DestReg: JoinReg) |
1051 | .addReg(RegNo: LimitReg) |
1052 | .addMBB(MBB: RoundMBB) |
1053 | .addReg(RegNo: ProbeReg) |
1054 | .addMBB(MBB: LoopMBB); |
1055 | } |
1056 | |
1057 | LoopMBB->addLiveIn(PhysReg: JoinReg); |
1058 | addRegOffset(MIB: BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII.get(Opcode: X86::LEA64r), DestReg: ProbeReg), Reg: JoinReg, |
1059 | isKill: false, Offset: -PageSize); |
1060 | |
1061 | // Probe by storing a byte onto the stack. |
1062 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII.get(Opcode: X86::MOV8mi)) |
1063 | .addReg(RegNo: ProbeReg) |
1064 | .addImm(Val: 1) |
1065 | .addReg(RegNo: 0) |
1066 | .addImm(Val: 0) |
1067 | .addReg(RegNo: 0) |
1068 | .addImm(Val: 0); |
1069 | |
1070 | LoopMBB->addLiveIn(PhysReg: RoundedReg); |
1071 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII.get(Opcode: X86::CMP64rr)) |
1072 | .addReg(RegNo: RoundedReg) |
1073 | .addReg(RegNo: ProbeReg); |
1074 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1)) |
1075 | .addMBB(MBB: LoopMBB) |
1076 | .addImm(Val: X86::COND_NE); |
1077 | |
1078 | MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI(); |
1079 | |
1080 | // If in prolog, restore RDX and RCX. |
1081 | if (InProlog) { |
1082 | if (RCXShadowSlot) // It means we spilled RCX in the prologue. |
1083 | addRegOffset(MIB: BuildMI(BB&: *ContinueMBB, I: ContinueMBBI, MIMD: DL, |
1084 | MCID: TII.get(Opcode: X86::MOV64rm), DestReg: X86::RCX), |
1085 | Reg: X86::RSP, isKill: false, Offset: RCXShadowSlot); |
1086 | if (RDXShadowSlot) // It means we spilled RDX in the prologue. |
1087 | addRegOffset(MIB: BuildMI(BB&: *ContinueMBB, I: ContinueMBBI, MIMD: DL, |
1088 | MCID: TII.get(Opcode: X86::MOV64rm), DestReg: X86::RDX), |
1089 | Reg: X86::RSP, isKill: false, Offset: RDXShadowSlot); |
1090 | } |
1091 | |
1092 | // Now that the probing is done, add code to continueMBB to update |
1093 | // the stack pointer for real. |
1094 | ContinueMBB->addLiveIn(PhysReg: SizeReg); |
1095 | BuildMI(BB&: *ContinueMBB, I: ContinueMBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SUB64rr), DestReg: X86::RSP) |
1096 | .addReg(RegNo: X86::RSP) |
1097 | .addReg(RegNo: SizeReg); |
1098 | |
1099 | // Add the control flow edges we need. |
1100 | MBB.addSuccessor(Succ: ContinueMBB); |
1101 | MBB.addSuccessor(Succ: RoundMBB); |
1102 | RoundMBB->addSuccessor(Succ: LoopMBB); |
1103 | LoopMBB->addSuccessor(Succ: ContinueMBB); |
1104 | LoopMBB->addSuccessor(Succ: LoopMBB); |
1105 | |
1106 | // Mark all the instructions added to the prolog as frame setup. |
1107 | if (InProlog) { |
1108 | for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) { |
1109 | BeforeMBBI->setFlag(MachineInstr::FrameSetup); |
1110 | } |
1111 | for (MachineInstr &MI : *RoundMBB) { |
1112 | MI.setFlag(MachineInstr::FrameSetup); |
1113 | } |
1114 | for (MachineInstr &MI : *LoopMBB) { |
1115 | MI.setFlag(MachineInstr::FrameSetup); |
1116 | } |
1117 | for (MachineInstr &MI : |
1118 | llvm::make_range(x: ContinueMBB->begin(), y: ContinueMBBI)) { |
1119 | MI.setFlag(MachineInstr::FrameSetup); |
1120 | } |
1121 | } |
1122 | } |
1123 | |
1124 | void X86FrameLowering::emitStackProbeCall( |
1125 | MachineFunction &MF, MachineBasicBlock &MBB, |
1126 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, |
1127 | std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const { |
1128 | bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; |
1129 | |
1130 | // FIXME: Add indirect thunk support and remove this. |
1131 | if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls()) |
1132 | report_fatal_error(reason: "Emitting stack probe calls on 64-bit with the large " |
1133 | "code model and indirect thunks not yet implemented." ); |
1134 | |
1135 | assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) != |
1136 | MachineBasicBlock::LQR_Live && |
1137 | "Stack probe calls will clobber live EFLAGS." ); |
1138 | |
1139 | unsigned CallOp; |
1140 | if (Is64Bit) |
1141 | CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32; |
1142 | else |
1143 | CallOp = X86::CALLpcrel32; |
1144 | |
1145 | StringRef Symbol = STI.getTargetLowering()->getStackProbeSymbolName(MF); |
1146 | |
1147 | MachineInstrBuilder CI; |
1148 | MachineBasicBlock::iterator ExpansionMBBI = std::prev(x: MBBI); |
1149 | |
1150 | // All current stack probes take AX and SP as input, clobber flags, and |
1151 | // preserve all registers. x86_64 probes leave RSP unmodified. |
1152 | if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { |
1153 | // For the large code model, we have to call through a register. Use R11, |
1154 | // as it is scratch in all supported calling conventions. |
1155 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64ri), DestReg: X86::R11) |
1156 | .addExternalSymbol(FnName: MF.createExternalSymbolName(Name: Symbol)); |
1157 | CI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: CallOp)).addReg(RegNo: X86::R11); |
1158 | } else { |
1159 | CI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: CallOp)) |
1160 | .addExternalSymbol(FnName: MF.createExternalSymbolName(Name: Symbol)); |
1161 | } |
1162 | |
1163 | unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX; |
1164 | unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP; |
1165 | CI.addReg(RegNo: AX, flags: RegState::Implicit) |
1166 | .addReg(RegNo: SP, flags: RegState::Implicit) |
1167 | .addReg(RegNo: AX, flags: RegState::Define | RegState::Implicit) |
1168 | .addReg(RegNo: SP, flags: RegState::Define | RegState::Implicit) |
1169 | .addReg(RegNo: X86::EFLAGS, flags: RegState::Define | RegState::Implicit); |
1170 | |
1171 | MachineInstr *ModInst = CI; |
1172 | if (STI.isTargetWin64() || !STI.isOSWindows()) { |
1173 | // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves. |
1174 | // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp |
1175 | // themselves. They also does not clobber %rax so we can reuse it when |
1176 | // adjusting %rsp. |
1177 | // All other platforms do not specify a particular ABI for the stack probe |
1178 | // function, so we arbitrarily define it to not adjust %esp/%rsp itself. |
1179 | ModInst = |
1180 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: getSUBrrOpcode(IsLP64: Uses64BitFramePtr)), DestReg: SP) |
1181 | .addReg(RegNo: SP) |
1182 | .addReg(RegNo: AX); |
1183 | } |
1184 | |
1185 | // DebugInfo variable locations -- if there's an instruction number for the |
1186 | // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that |
1187 | // modifies SP. |
1188 | if (InstrNum) { |
1189 | if (STI.isTargetWin64() || !STI.isOSWindows()) { |
1190 | // Label destination operand of the subtract. |
1191 | MF.makeDebugValueSubstitution(*InstrNum, |
1192 | {ModInst->getDebugInstrNum(), 0}); |
1193 | } else { |
1194 | // Label the call. The operand number is the penultimate operand, zero |
1195 | // based. |
1196 | unsigned SPDefOperand = ModInst->getNumOperands() - 2; |
1197 | MF.makeDebugValueSubstitution( |
1198 | *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand}); |
1199 | } |
1200 | } |
1201 | |
1202 | if (InProlog) { |
1203 | // Apply the frame setup flag to all inserted instrs. |
1204 | for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI) |
1205 | ExpansionMBBI->setFlag(MachineInstr::FrameSetup); |
1206 | } |
1207 | } |
1208 | |
1209 | static unsigned calculateSetFPREG(uint64_t SPAdjust) { |
1210 | // Win64 ABI has a less restrictive limitation of 240; 128 works equally well |
1211 | // and might require smaller successive adjustments. |
1212 | const uint64_t Win64MaxSEHOffset = 128; |
1213 | uint64_t SEHFrameOffset = std::min(a: SPAdjust, b: Win64MaxSEHOffset); |
1214 | // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode. |
1215 | return SEHFrameOffset & -16; |
1216 | } |
1217 | |
1218 | // If we're forcing a stack realignment we can't rely on just the frame |
1219 | // info, we need to know the ABI stack alignment as well in case we |
1220 | // have a call out. Otherwise just make sure we have some alignment - we'll |
1221 | // go with the minimum SlotSize. |
1222 | uint64_t |
1223 | X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const { |
1224 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
1225 | Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment. |
1226 | Align StackAlign = getStackAlign(); |
1227 | bool HasRealign = MF.getFunction().hasFnAttribute(Kind: "stackrealign" ); |
1228 | if (HasRealign) { |
1229 | if (MFI.hasCalls()) |
1230 | MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; |
1231 | else if (MaxAlign < SlotSize) |
1232 | MaxAlign = Align(SlotSize); |
1233 | } |
1234 | |
1235 | if (!Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR) { |
1236 | if (HasRealign) |
1237 | MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16); |
1238 | else |
1239 | MaxAlign = Align(16); |
1240 | } |
1241 | return MaxAlign.value(); |
1242 | } |
1243 | |
1244 | void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, |
1245 | MachineBasicBlock::iterator MBBI, |
1246 | const DebugLoc &DL, unsigned Reg, |
1247 | uint64_t MaxAlign) const { |
1248 | uint64_t Val = -MaxAlign; |
1249 | unsigned AndOp = getANDriOpcode(IsLP64: Uses64BitFramePtr, Imm: Val); |
1250 | |
1251 | MachineFunction &MF = *MBB.getParent(); |
1252 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
1253 | const X86TargetLowering &TLI = *STI.getTargetLowering(); |
1254 | const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); |
1255 | const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF); |
1256 | |
1257 | // We want to make sure that (in worst case) less than StackProbeSize bytes |
1258 | // are not probed after the AND. This assumption is used in |
1259 | // emitStackProbeInlineGeneric. |
1260 | if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) { |
1261 | { |
1262 | NumFrameLoopProbe++; |
1263 | MachineBasicBlock *entryMBB = |
1264 | MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
1265 | MachineBasicBlock *headMBB = |
1266 | MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
1267 | MachineBasicBlock *bodyMBB = |
1268 | MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
1269 | MachineBasicBlock * = |
1270 | MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
1271 | |
1272 | MachineFunction::iterator MBBIter = MBB.getIterator(); |
1273 | MF.insert(MBBI: MBBIter, MBB: entryMBB); |
1274 | MF.insert(MBBI: MBBIter, MBB: headMBB); |
1275 | MF.insert(MBBI: MBBIter, MBB: bodyMBB); |
1276 | MF.insert(MBBI: MBBIter, MBB: footMBB); |
1277 | const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; |
1278 | Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 |
1279 | : Is64Bit ? X86::R11D |
1280 | : X86::EAX; |
1281 | |
1282 | // Setup entry block |
1283 | { |
1284 | |
1285 | entryMBB->splice(Where: entryMBB->end(), Other: &MBB, From: MBB.begin(), To: MBBI); |
1286 | BuildMI(BB: entryMBB, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: FinalStackProbed) |
1287 | .addReg(RegNo: StackPtr) |
1288 | .setMIFlag(MachineInstr::FrameSetup); |
1289 | MachineInstr *MI = |
1290 | BuildMI(BB: entryMBB, MIMD: DL, MCID: TII.get(Opcode: AndOp), DestReg: FinalStackProbed) |
1291 | .addReg(RegNo: FinalStackProbed) |
1292 | .addImm(Val) |
1293 | .setMIFlag(MachineInstr::FrameSetup); |
1294 | |
1295 | // The EFLAGS implicit def is dead. |
1296 | MI->getOperand(i: 3).setIsDead(); |
1297 | |
1298 | BuildMI(BB: entryMBB, MIMD: DL, |
1299 | MCID: TII.get(Opcode: Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) |
1300 | .addReg(RegNo: FinalStackProbed) |
1301 | .addReg(RegNo: StackPtr) |
1302 | .setMIFlag(MachineInstr::FrameSetup); |
1303 | BuildMI(BB: entryMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1)) |
1304 | .addMBB(MBB: &MBB) |
1305 | .addImm(Val: X86::COND_E) |
1306 | .setMIFlag(MachineInstr::FrameSetup); |
1307 | entryMBB->addSuccessor(Succ: headMBB); |
1308 | entryMBB->addSuccessor(Succ: &MBB); |
1309 | } |
1310 | |
1311 | // Loop entry block |
1312 | |
1313 | { |
1314 | const unsigned SUBOpc = getSUBriOpcode(IsLP64: Uses64BitFramePtr); |
1315 | BuildMI(BB: headMBB, MIMD: DL, MCID: TII.get(Opcode: SUBOpc), DestReg: StackPtr) |
1316 | .addReg(RegNo: StackPtr) |
1317 | .addImm(Val: StackProbeSize) |
1318 | .setMIFlag(MachineInstr::FrameSetup); |
1319 | |
1320 | BuildMI(BB: headMBB, MIMD: DL, |
1321 | MCID: TII.get(Opcode: Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) |
1322 | .addReg(RegNo: StackPtr) |
1323 | .addReg(RegNo: FinalStackProbed) |
1324 | .setMIFlag(MachineInstr::FrameSetup); |
1325 | |
1326 | // jump to the footer if StackPtr < FinalStackProbed |
1327 | BuildMI(BB: headMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1)) |
1328 | .addMBB(MBB: footMBB) |
1329 | .addImm(Val: X86::COND_B) |
1330 | .setMIFlag(MachineInstr::FrameSetup); |
1331 | |
1332 | headMBB->addSuccessor(Succ: bodyMBB); |
1333 | headMBB->addSuccessor(Succ: footMBB); |
1334 | } |
1335 | |
1336 | // setup loop body |
1337 | { |
1338 | addRegOffset(MIB: BuildMI(BB: bodyMBB, MIMD: DL, MCID: TII.get(Opcode: MovMIOpc)) |
1339 | .setMIFlag(MachineInstr::FrameSetup), |
1340 | Reg: StackPtr, isKill: false, Offset: 0) |
1341 | .addImm(Val: 0) |
1342 | .setMIFlag(MachineInstr::FrameSetup); |
1343 | |
1344 | const unsigned SUBOpc = getSUBriOpcode(IsLP64: Uses64BitFramePtr); |
1345 | BuildMI(BB: bodyMBB, MIMD: DL, MCID: TII.get(Opcode: SUBOpc), DestReg: StackPtr) |
1346 | .addReg(RegNo: StackPtr) |
1347 | .addImm(Val: StackProbeSize) |
1348 | .setMIFlag(MachineInstr::FrameSetup); |
1349 | |
1350 | // cmp with stack pointer bound |
1351 | BuildMI(BB: bodyMBB, MIMD: DL, |
1352 | MCID: TII.get(Opcode: Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) |
1353 | .addReg(RegNo: FinalStackProbed) |
1354 | .addReg(RegNo: StackPtr) |
1355 | .setMIFlag(MachineInstr::FrameSetup); |
1356 | |
1357 | // jump back while FinalStackProbed < StackPtr |
1358 | BuildMI(BB: bodyMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1)) |
1359 | .addMBB(MBB: bodyMBB) |
1360 | .addImm(Val: X86::COND_B) |
1361 | .setMIFlag(MachineInstr::FrameSetup); |
1362 | bodyMBB->addSuccessor(Succ: bodyMBB); |
1363 | bodyMBB->addSuccessor(Succ: footMBB); |
1364 | } |
1365 | |
1366 | // setup loop footer |
1367 | { |
1368 | BuildMI(BB: footMBB, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: StackPtr) |
1369 | .addReg(RegNo: FinalStackProbed) |
1370 | .setMIFlag(MachineInstr::FrameSetup); |
1371 | addRegOffset(MIB: BuildMI(BB: footMBB, MIMD: DL, MCID: TII.get(Opcode: MovMIOpc)) |
1372 | .setMIFlag(MachineInstr::FrameSetup), |
1373 | Reg: StackPtr, isKill: false, Offset: 0) |
1374 | .addImm(Val: 0) |
1375 | .setMIFlag(MachineInstr::FrameSetup); |
1376 | footMBB->addSuccessor(Succ: &MBB); |
1377 | } |
1378 | |
1379 | fullyRecomputeLiveIns(MBBs: {footMBB, bodyMBB, headMBB, &MBB}); |
1380 | } |
1381 | } else { |
1382 | MachineInstr *MI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: AndOp), DestReg: Reg) |
1383 | .addReg(RegNo: Reg) |
1384 | .addImm(Val) |
1385 | .setMIFlag(MachineInstr::FrameSetup); |
1386 | |
1387 | // The EFLAGS implicit def is dead. |
1388 | MI->getOperand(i: 3).setIsDead(); |
1389 | } |
1390 | } |
1391 | |
1392 | bool X86FrameLowering::has128ByteRedZone(const MachineFunction &MF) const { |
1393 | // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be |
1394 | // clobbered by any interrupt handler. |
1395 | assert(&STI == &MF.getSubtarget<X86Subtarget>() && |
1396 | "MF used frame lowering for wrong subtarget" ); |
1397 | const Function &Fn = MF.getFunction(); |
1398 | const bool IsWin64CC = STI.isCallingConvWin64(CC: Fn.getCallingConv()); |
1399 | return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Kind: Attribute::NoRedZone); |
1400 | } |
1401 | |
1402 | /// Return true if we need to use the restricted Windows x64 prologue and |
1403 | /// epilogue code patterns that can be described with WinCFI (.seh_* |
1404 | /// directives). |
1405 | bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const { |
1406 | return MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); |
1407 | } |
1408 | |
1409 | bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const { |
1410 | return !isWin64Prologue(MF) && MF.needsFrameMoves(); |
1411 | } |
1412 | |
1413 | /// Return true if an opcode is part of the REP group of instructions |
1414 | static bool isOpcodeRep(unsigned Opcode) { |
1415 | switch (Opcode) { |
1416 | case X86::REPNE_PREFIX: |
1417 | case X86::REP_MOVSB_32: |
1418 | case X86::REP_MOVSB_64: |
1419 | case X86::REP_MOVSD_32: |
1420 | case X86::REP_MOVSD_64: |
1421 | case X86::REP_MOVSQ_32: |
1422 | case X86::REP_MOVSQ_64: |
1423 | case X86::REP_MOVSW_32: |
1424 | case X86::REP_MOVSW_64: |
1425 | case X86::REP_PREFIX: |
1426 | case X86::REP_STOSB_32: |
1427 | case X86::REP_STOSB_64: |
1428 | case X86::REP_STOSD_32: |
1429 | case X86::REP_STOSD_64: |
1430 | case X86::REP_STOSQ_32: |
1431 | case X86::REP_STOSQ_64: |
1432 | case X86::REP_STOSW_32: |
1433 | case X86::REP_STOSW_64: |
1434 | return true; |
1435 | default: |
1436 | break; |
1437 | } |
1438 | return false; |
1439 | } |
1440 | |
1441 | /// emitPrologue - Push callee-saved registers onto the stack, which |
1442 | /// automatically adjust the stack pointer. Adjust the stack pointer to allocate |
1443 | /// space for local variables. Also emit labels used by the exception handler to |
1444 | /// generate the exception handling frames. |
1445 | |
1446 | /* |
1447 | Here's a gist of what gets emitted: |
1448 | |
1449 | ; Establish frame pointer, if needed |
1450 | [if needs FP] |
1451 | push %rbp |
1452 | .cfi_def_cfa_offset 16 |
1453 | .cfi_offset %rbp, -16 |
1454 | .seh_pushreg %rpb |
1455 | mov %rsp, %rbp |
1456 | .cfi_def_cfa_register %rbp |
1457 | |
1458 | ; Spill general-purpose registers |
1459 | [for all callee-saved GPRs] |
1460 | pushq %<reg> |
1461 | [if not needs FP] |
1462 | .cfi_def_cfa_offset (offset from RETADDR) |
1463 | .seh_pushreg %<reg> |
1464 | |
1465 | ; If the required stack alignment > default stack alignment |
1466 | ; rsp needs to be re-aligned. This creates a "re-alignment gap" |
1467 | ; of unknown size in the stack frame. |
1468 | [if stack needs re-alignment] |
1469 | and $MASK, %rsp |
1470 | |
1471 | ; Allocate space for locals |
1472 | [if target is Windows and allocated space > 4096 bytes] |
1473 | ; Windows needs special care for allocations larger |
1474 | ; than one page. |
1475 | mov $NNN, %rax |
1476 | call ___chkstk_ms/___chkstk |
1477 | sub %rax, %rsp |
1478 | [else] |
1479 | sub $NNN, %rsp |
1480 | |
1481 | [if needs FP] |
1482 | .seh_stackalloc (size of XMM spill slots) |
1483 | .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots |
1484 | [else] |
1485 | .seh_stackalloc NNN |
1486 | |
1487 | ; Spill XMMs |
1488 | ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved, |
1489 | ; they may get spilled on any platform, if the current function |
1490 | ; calls @llvm.eh.unwind.init |
1491 | [if needs FP] |
1492 | [for all callee-saved XMM registers] |
1493 | movaps %<xmm reg>, -MMM(%rbp) |
1494 | [for all callee-saved XMM registers] |
1495 | .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset) |
1496 | ; i.e. the offset relative to (%rbp - SEHFrameOffset) |
1497 | [else] |
1498 | [for all callee-saved XMM registers] |
1499 | movaps %<xmm reg>, KKK(%rsp) |
1500 | [for all callee-saved XMM registers] |
1501 | .seh_savexmm %<xmm reg>, KKK |
1502 | |
1503 | .seh_endprologue |
1504 | |
1505 | [if needs base pointer] |
1506 | mov %rsp, %rbx |
1507 | [if needs to restore base pointer] |
1508 | mov %rsp, -MMM(%rbp) |
1509 | |
1510 | ; Emit CFI info |
1511 | [if needs FP] |
1512 | [for all callee-saved registers] |
1513 | .cfi_offset %<reg>, (offset from %rbp) |
1514 | [else] |
1515 | .cfi_def_cfa_offset (offset from RETADDR) |
1516 | [for all callee-saved registers] |
1517 | .cfi_offset %<reg>, (offset from %rsp) |
1518 | |
1519 | Notes: |
1520 | - .seh directives are emitted only for Windows 64 ABI |
1521 | - .cv_fpo directives are emitted on win32 when emitting CodeView |
1522 | - .cfi directives are emitted for all other ABIs |
1523 | - for 32-bit code, substitute %e?? registers for %r?? |
1524 | */ |
1525 | |
1526 | void X86FrameLowering::emitPrologue(MachineFunction &MF, |
1527 | MachineBasicBlock &MBB) const { |
1528 | assert(&STI == &MF.getSubtarget<X86Subtarget>() && |
1529 | "MF used frame lowering for wrong subtarget" ); |
1530 | MachineBasicBlock::iterator MBBI = MBB.begin(); |
1531 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1532 | const Function &Fn = MF.getFunction(); |
1533 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
1534 | uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment. |
1535 | uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate. |
1536 | bool IsFunclet = MBB.isEHFuncletEntry(); |
1537 | EHPersonality Personality = EHPersonality::Unknown; |
1538 | if (Fn.hasPersonalityFn()) |
1539 | Personality = classifyEHPersonality(Pers: Fn.getPersonalityFn()); |
1540 | bool FnHasClrFunclet = |
1541 | MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR; |
1542 | bool IsClrFunclet = IsFunclet && FnHasClrFunclet; |
1543 | bool HasFP = hasFP(MF); |
1544 | bool IsWin64Prologue = isWin64Prologue(MF); |
1545 | bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry(); |
1546 | // FIXME: Emit FPO data for EH funclets. |
1547 | bool NeedsWinFPO = !IsFunclet && STI.isTargetWin32() && |
1548 | MF.getFunction().getParent()->getCodeViewFlag(); |
1549 | bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO; |
1550 | bool NeedsDwarfCFI = needsDwarfCFI(MF); |
1551 | Register FramePtr = TRI->getFrameRegister(MF); |
1552 | const Register MachineFramePtr = |
1553 | STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(Reg: FramePtr, Size: 64)) |
1554 | : FramePtr; |
1555 | Register BasePtr = TRI->getBaseRegister(); |
1556 | bool HasWinCFI = false; |
1557 | |
1558 | // Debug location must be unknown since the first debug location is used |
1559 | // to determine the end of the prologue. |
1560 | DebugLoc DL; |
1561 | Register ArgBaseReg; |
1562 | |
1563 | // Emit extra prolog for argument stack slot reference. |
1564 | if (auto *MI = X86FI->getStackPtrSaveMI()) { |
1565 | // MI is lea instruction that created in X86ArgumentStackSlotPass. |
1566 | // Creat extra prolog for stack realignment. |
1567 | ArgBaseReg = MI->getOperand(i: 0).getReg(); |
1568 | // leal 4(%esp), %basereg |
1569 | // .cfi_def_cfa %basereg, 0 |
1570 | // andl $-128, %esp |
1571 | // pushl -4(%basereg) |
1572 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Is64Bit ? X86::LEA64r : X86::LEA32r), |
1573 | DestReg: ArgBaseReg) |
1574 | .addUse(RegNo: StackPtr) |
1575 | .addImm(Val: 1) |
1576 | .addUse(RegNo: X86::NoRegister) |
1577 | .addImm(Val: SlotSize) |
1578 | .addUse(RegNo: X86::NoRegister) |
1579 | .setMIFlag(MachineInstr::FrameSetup); |
1580 | if (NeedsDwarfCFI) { |
1581 | // .cfi_def_cfa %basereg, 0 |
1582 | unsigned DwarfStackPtr = TRI->getDwarfRegNum(RegNum: ArgBaseReg, isEH: true); |
1583 | BuildCFI(MBB, MBBI, DL, |
1584 | CFIInst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: DwarfStackPtr, Offset: 0), |
1585 | Flag: MachineInstr::FrameSetup); |
1586 | } |
1587 | BuildStackAlignAND(MBB, MBBI, DL, Reg: StackPtr, MaxAlign); |
1588 | int64_t Offset = -(int64_t)SlotSize; |
1589 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm)) |
1590 | .addReg(RegNo: ArgBaseReg) |
1591 | .addImm(Val: 1) |
1592 | .addReg(RegNo: X86::NoRegister) |
1593 | .addImm(Val: Offset) |
1594 | .addReg(RegNo: X86::NoRegister) |
1595 | .setMIFlag(MachineInstr::FrameSetup); |
1596 | } |
1597 | |
1598 | // Space reserved for stack-based arguments when making a (ABI-guaranteed) |
1599 | // tail call. |
1600 | unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta(); |
1601 | if (TailCallArgReserveSize && IsWin64Prologue) |
1602 | report_fatal_error(reason: "Can't handle guaranteed tail call under win64 yet" ); |
1603 | |
1604 | const bool EmitStackProbeCall = |
1605 | STI.getTargetLowering()->hasStackProbeSymbol(MF); |
1606 | unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF); |
1607 | |
1608 | if (HasFP && X86FI->hasSwiftAsyncContext()) { |
1609 | switch (MF.getTarget().Options.SwiftAsyncFramePointer) { |
1610 | case SwiftAsyncFramePointerMode::DeploymentBased: |
1611 | if (STI.swiftAsyncContextIsDynamicallySet()) { |
1612 | // The special symbol below is absolute and has a *value* suitable to be |
1613 | // combined with the frame pointer directly. |
1614 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::OR64rm), DestReg: MachineFramePtr) |
1615 | .addUse(RegNo: MachineFramePtr) |
1616 | .addUse(RegNo: X86::RIP) |
1617 | .addImm(Val: 1) |
1618 | .addUse(RegNo: X86::NoRegister) |
1619 | .addExternalSymbol(FnName: "swift_async_extendedFramePointerFlags" , |
1620 | TargetFlags: X86II::MO_GOTPCREL) |
1621 | .addUse(RegNo: X86::NoRegister); |
1622 | break; |
1623 | } |
1624 | [[fallthrough]]; |
1625 | |
1626 | case SwiftAsyncFramePointerMode::Always: |
1627 | assert( |
1628 | !IsWin64Prologue && |
1629 | "win64 prologue does not set the bit 60 in the saved frame pointer" ); |
1630 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::BTS64ri8), DestReg: MachineFramePtr) |
1631 | .addUse(RegNo: MachineFramePtr) |
1632 | .addImm(Val: 60) |
1633 | .setMIFlag(MachineInstr::FrameSetup); |
1634 | break; |
1635 | |
1636 | case SwiftAsyncFramePointerMode::Never: |
1637 | break; |
1638 | } |
1639 | } |
1640 | |
1641 | // Re-align the stack on 64-bit if the x86-interrupt calling convention is |
1642 | // used and an error code was pushed, since the x86-64 ABI requires a 16-byte |
1643 | // stack alignment. |
1644 | if (Fn.getCallingConv() == CallingConv::X86_INTR && Is64Bit && |
1645 | Fn.arg_size() == 2) { |
1646 | StackSize += 8; |
1647 | MFI.setStackSize(StackSize); |
1648 | |
1649 | // Update the stack pointer by pushing a register. This is the instruction |
1650 | // emitted that would be end up being emitted by a call to `emitSPUpdate`. |
1651 | // Hard-coding the update to a push avoids emitting a second |
1652 | // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack |
1653 | // probing isn't needed anyways for an 8-byte update. |
1654 | // Pushing a register leaves us in a similar situation to a regular |
1655 | // function call where we know that the address at (rsp-8) is writeable. |
1656 | // That way we avoid any off-by-ones with stack probing for additional |
1657 | // stack pointer updates later on. |
1658 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH64r)) |
1659 | .addReg(RegNo: X86::RAX, flags: RegState::Undef) |
1660 | .setMIFlag(MachineInstr::FrameSetup); |
1661 | } |
1662 | |
1663 | // If this is x86-64 and the Red Zone is not disabled, if we are a leaf |
1664 | // function, and use up to 128 bytes of stack space, don't have a frame |
1665 | // pointer, calls, or dynamic alloca then we do not need to adjust the |
1666 | // stack pointer (we fit in the Red Zone). We also check that we don't |
1667 | // push and pop from the stack. |
1668 | if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) && |
1669 | !MFI.hasVarSizedObjects() && // No dynamic alloca. |
1670 | !MFI.adjustsStack() && // No calls. |
1671 | !EmitStackProbeCall && // No stack probes. |
1672 | !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop. |
1673 | !MF.shouldSplitStack()) { // Regular stack |
1674 | uint64_t MinSize = |
1675 | X86FI->getCalleeSavedFrameSize() - X86FI->getTCReturnAddrDelta(); |
1676 | if (HasFP) |
1677 | MinSize += SlotSize; |
1678 | X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0); |
1679 | StackSize = std::max(a: MinSize, b: StackSize > 128 ? StackSize - 128 : 0); |
1680 | MFI.setStackSize(StackSize); |
1681 | } |
1682 | |
1683 | // Insert stack pointer adjustment for later moving of return addr. Only |
1684 | // applies to tail call optimized functions where the callee argument stack |
1685 | // size is bigger than the callers. |
1686 | if (TailCallArgReserveSize != 0) { |
1687 | BuildStackAdjustment(MBB, MBBI, DL, Offset: -(int)TailCallArgReserveSize, |
1688 | /*InEpilogue=*/false) |
1689 | .setMIFlag(MachineInstr::FrameSetup); |
1690 | } |
1691 | |
1692 | // Mapping for machine moves: |
1693 | // |
1694 | // DST: VirtualFP AND |
1695 | // SRC: VirtualFP => DW_CFA_def_cfa_offset |
1696 | // ELSE => DW_CFA_def_cfa |
1697 | // |
1698 | // SRC: VirtualFP AND |
1699 | // DST: Register => DW_CFA_def_cfa_register |
1700 | // |
1701 | // ELSE |
1702 | // OFFSET < 0 => DW_CFA_offset_extended_sf |
1703 | // REG < 64 => DW_CFA_offset + Reg |
1704 | // ELSE => DW_CFA_offset_extended |
1705 | |
1706 | uint64_t NumBytes = 0; |
1707 | int stackGrowth = -SlotSize; |
1708 | |
1709 | // Find the funclet establisher parameter |
1710 | Register Establisher = X86::NoRegister; |
1711 | if (IsClrFunclet) |
1712 | Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX; |
1713 | else if (IsFunclet) |
1714 | Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX; |
1715 | |
1716 | if (IsWin64Prologue && IsFunclet && !IsClrFunclet) { |
1717 | // Immediately spill establisher into the home slot. |
1718 | // The runtime cares about this. |
1719 | // MOV64mr %rdx, 16(%rsp) |
1720 | unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; |
1721 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: MOVmr)), Reg: StackPtr, isKill: true, Offset: 16) |
1722 | .addReg(RegNo: Establisher) |
1723 | .setMIFlag(MachineInstr::FrameSetup); |
1724 | MBB.addLiveIn(PhysReg: Establisher); |
1725 | } |
1726 | |
1727 | if (HasFP) { |
1728 | assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved" ); |
1729 | |
1730 | // Calculate required stack adjustment. |
1731 | uint64_t FrameSize = StackSize - SlotSize; |
1732 | NumBytes = |
1733 | FrameSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize); |
1734 | |
1735 | // Callee-saved registers are pushed on stack before the stack is realigned. |
1736 | if (TRI->hasStackRealignment(MF) && !IsWin64Prologue) |
1737 | NumBytes = alignTo(Value: NumBytes, Align: MaxAlign); |
1738 | |
1739 | // Save EBP/RBP into the appropriate stack slot. |
1740 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, |
1741 | MCID: TII.get(Opcode: getPUSHOpcode(ST: MF.getSubtarget<X86Subtarget>()))) |
1742 | .addReg(RegNo: MachineFramePtr, flags: RegState::Kill) |
1743 | .setMIFlag(MachineInstr::FrameSetup); |
1744 | |
1745 | if (NeedsDwarfCFI && !ArgBaseReg.isValid()) { |
1746 | // Mark the place where EBP/RBP was saved. |
1747 | // Define the current CFA rule to use the provided offset. |
1748 | assert(StackSize); |
1749 | BuildCFI(MBB, MBBI, DL, |
1750 | CFIInst: MCCFIInstruction::cfiDefCfaOffset( |
1751 | L: nullptr, Offset: -2 * stackGrowth + (int)TailCallArgReserveSize), |
1752 | Flag: MachineInstr::FrameSetup); |
1753 | |
1754 | // Change the rule for the FramePtr to be an "offset" rule. |
1755 | unsigned DwarfFramePtr = TRI->getDwarfRegNum(RegNum: MachineFramePtr, isEH: true); |
1756 | BuildCFI(MBB, MBBI, DL, |
1757 | CFIInst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfFramePtr, |
1758 | Offset: 2 * stackGrowth - |
1759 | (int)TailCallArgReserveSize), |
1760 | Flag: MachineInstr::FrameSetup); |
1761 | } |
1762 | |
1763 | if (NeedsWinCFI) { |
1764 | HasWinCFI = true; |
1765 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_PushReg)) |
1766 | .addImm(Val: FramePtr) |
1767 | .setMIFlag(MachineInstr::FrameSetup); |
1768 | } |
1769 | |
1770 | if (!IsFunclet) { |
1771 | if (X86FI->hasSwiftAsyncContext()) { |
1772 | assert(!IsWin64Prologue && |
1773 | "win64 prologue does not store async context right below rbp" ); |
1774 | const auto &Attrs = MF.getFunction().getAttributes(); |
1775 | |
1776 | // Before we update the live frame pointer we have to ensure there's a |
1777 | // valid (or null) asynchronous context in its slot just before FP in |
1778 | // the frame record, so store it now. |
1779 | if (Attrs.hasAttrSomewhere(Kind: Attribute::SwiftAsync)) { |
1780 | // We have an initial context in r14, store it just before the frame |
1781 | // pointer. |
1782 | MBB.addLiveIn(PhysReg: X86::R14); |
1783 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH64r)) |
1784 | .addReg(RegNo: X86::R14) |
1785 | .setMIFlag(MachineInstr::FrameSetup); |
1786 | } else { |
1787 | // No initial context, store null so that there's no pointer that |
1788 | // could be misused. |
1789 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH64i32)) |
1790 | .addImm(Val: 0) |
1791 | .setMIFlag(MachineInstr::FrameSetup); |
1792 | } |
1793 | |
1794 | if (NeedsWinCFI) { |
1795 | HasWinCFI = true; |
1796 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_PushReg)) |
1797 | .addImm(Val: X86::R14) |
1798 | .setMIFlag(MachineInstr::FrameSetup); |
1799 | } |
1800 | |
1801 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::LEA64r), DestReg: FramePtr) |
1802 | .addUse(RegNo: X86::RSP) |
1803 | .addImm(Val: 1) |
1804 | .addUse(RegNo: X86::NoRegister) |
1805 | .addImm(Val: 8) |
1806 | .addUse(RegNo: X86::NoRegister) |
1807 | .setMIFlag(MachineInstr::FrameSetup); |
1808 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SUB64ri32), DestReg: X86::RSP) |
1809 | .addUse(RegNo: X86::RSP) |
1810 | .addImm(Val: 8) |
1811 | .setMIFlag(MachineInstr::FrameSetup); |
1812 | } |
1813 | |
1814 | if (!IsWin64Prologue && !IsFunclet) { |
1815 | // Update EBP with the new base value. |
1816 | if (!X86FI->hasSwiftAsyncContext()) |
1817 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, |
1818 | MCID: TII.get(Opcode: Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), |
1819 | DestReg: FramePtr) |
1820 | .addReg(RegNo: StackPtr) |
1821 | .setMIFlag(MachineInstr::FrameSetup); |
1822 | |
1823 | if (NeedsDwarfCFI) { |
1824 | if (ArgBaseReg.isValid()) { |
1825 | SmallString<64> CfaExpr; |
1826 | CfaExpr.push_back(Elt: dwarf::DW_CFA_expression); |
1827 | uint8_t buffer[16]; |
1828 | unsigned DwarfReg = TRI->getDwarfRegNum(RegNum: MachineFramePtr, isEH: true); |
1829 | CfaExpr.append(in_start: buffer, in_end: buffer + encodeULEB128(Value: DwarfReg, p: buffer)); |
1830 | CfaExpr.push_back(Elt: 2); |
1831 | CfaExpr.push_back(Elt: (uint8_t)(dwarf::DW_OP_breg0 + DwarfReg)); |
1832 | CfaExpr.push_back(Elt: 0); |
1833 | // DW_CFA_expression: reg5 DW_OP_breg5 +0 |
1834 | BuildCFI(MBB, MBBI, DL, |
1835 | CFIInst: MCCFIInstruction::createEscape(L: nullptr, Vals: CfaExpr.str()), |
1836 | Flag: MachineInstr::FrameSetup); |
1837 | } else { |
1838 | // Mark effective beginning of when frame pointer becomes valid. |
1839 | // Define the current CFA to use the EBP/RBP register. |
1840 | unsigned DwarfFramePtr = TRI->getDwarfRegNum(RegNum: MachineFramePtr, isEH: true); |
1841 | BuildCFI( |
1842 | MBB, MBBI, DL, |
1843 | CFIInst: MCCFIInstruction::createDefCfaRegister(L: nullptr, Register: DwarfFramePtr), |
1844 | Flag: MachineInstr::FrameSetup); |
1845 | } |
1846 | } |
1847 | |
1848 | if (NeedsWinFPO) { |
1849 | // .cv_fpo_setframe $FramePtr |
1850 | HasWinCFI = true; |
1851 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_SetFrame)) |
1852 | .addImm(Val: FramePtr) |
1853 | .addImm(Val: 0) |
1854 | .setMIFlag(MachineInstr::FrameSetup); |
1855 | } |
1856 | } |
1857 | } |
1858 | } else { |
1859 | assert(!IsFunclet && "funclets without FPs not yet implemented" ); |
1860 | NumBytes = |
1861 | StackSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize); |
1862 | } |
1863 | |
1864 | // Update the offset adjustment, which is mainly used by codeview to translate |
1865 | // from ESP to VFRAME relative local variable offsets. |
1866 | if (!IsFunclet) { |
1867 | if (HasFP && TRI->hasStackRealignment(MF)) |
1868 | MFI.setOffsetAdjustment(-NumBytes); |
1869 | else |
1870 | MFI.setOffsetAdjustment(-StackSize); |
1871 | } |
1872 | |
1873 | // For EH funclets, only allocate enough space for outgoing calls. Save the |
1874 | // NumBytes value that we would've used for the parent frame. |
1875 | unsigned = NumBytes; |
1876 | if (IsFunclet) |
1877 | NumBytes = getWinEHFuncletFrameSize(MF); |
1878 | |
1879 | // Skip the callee-saved push instructions. |
1880 | bool PushedRegs = false; |
1881 | int StackOffset = 2 * stackGrowth; |
1882 | MachineBasicBlock::const_iterator LastCSPush = MBBI; |
1883 | auto IsCSPush = [&](const MachineBasicBlock::iterator &MBBI) { |
1884 | if (MBBI == MBB.end() || !MBBI->getFlag(Flag: MachineInstr::FrameSetup)) |
1885 | return false; |
1886 | unsigned Opc = MBBI->getOpcode(); |
1887 | return Opc == X86::PUSH32r || Opc == X86::PUSH64r || Opc == X86::PUSHP64r || |
1888 | Opc == X86::PUSH2 || Opc == X86::PUSH2P; |
1889 | }; |
1890 | |
1891 | while (IsCSPush(MBBI)) { |
1892 | PushedRegs = true; |
1893 | Register Reg = MBBI->getOperand(i: 0).getReg(); |
1894 | LastCSPush = MBBI; |
1895 | ++MBBI; |
1896 | unsigned Opc = LastCSPush->getOpcode(); |
1897 | |
1898 | if (!HasFP && NeedsDwarfCFI) { |
1899 | // Mark callee-saved push instruction. |
1900 | // Define the current CFA rule to use the provided offset. |
1901 | assert(StackSize); |
1902 | // Compared to push, push2 introduces more stack offset (one more |
1903 | // register). |
1904 | if (Opc == X86::PUSH2 || Opc == X86::PUSH2P) |
1905 | StackOffset += stackGrowth; |
1906 | BuildCFI(MBB, MBBI, DL, |
1907 | CFIInst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -StackOffset), |
1908 | Flag: MachineInstr::FrameSetup); |
1909 | StackOffset += stackGrowth; |
1910 | } |
1911 | |
1912 | if (NeedsWinCFI) { |
1913 | HasWinCFI = true; |
1914 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_PushReg)) |
1915 | .addImm(Val: Reg) |
1916 | .setMIFlag(MachineInstr::FrameSetup); |
1917 | if (Opc == X86::PUSH2 || Opc == X86::PUSH2P) |
1918 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_PushReg)) |
1919 | .addImm(Val: LastCSPush->getOperand(i: 1).getReg()) |
1920 | .setMIFlag(MachineInstr::FrameSetup); |
1921 | } |
1922 | } |
1923 | |
1924 | // Realign stack after we pushed callee-saved registers (so that we'll be |
1925 | // able to calculate their offsets from the frame pointer). |
1926 | // Don't do this for Win64, it needs to realign the stack after the prologue. |
1927 | if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) && |
1928 | !ArgBaseReg.isValid()) { |
1929 | assert(HasFP && "There should be a frame pointer if stack is realigned." ); |
1930 | BuildStackAlignAND(MBB, MBBI, DL, Reg: StackPtr, MaxAlign); |
1931 | |
1932 | if (NeedsWinCFI) { |
1933 | HasWinCFI = true; |
1934 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_StackAlign)) |
1935 | .addImm(Val: MaxAlign) |
1936 | .setMIFlag(MachineInstr::FrameSetup); |
1937 | } |
1938 | } |
1939 | |
1940 | // If there is an SUB32ri of ESP immediately before this instruction, merge |
1941 | // the two. This can be the case when tail call elimination is enabled and |
1942 | // the callee has more arguments then the caller. |
1943 | NumBytes -= mergeSPUpdates(MBB, MBBI, doMergeWithPrevious: true); |
1944 | |
1945 | // Adjust stack pointer: ESP -= numbytes. |
1946 | |
1947 | // Windows and cygwin/mingw require a prologue helper routine when allocating |
1948 | // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw |
1949 | // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the |
1950 | // stack and adjust the stack pointer in one go. The 64-bit version of |
1951 | // __chkstk is only responsible for probing the stack. The 64-bit prologue is |
1952 | // responsible for adjusting the stack pointer. Touching the stack at 4K |
1953 | // increments is necessary to ensure that the guard pages used by the OS |
1954 | // virtual memory manager are allocated in correct sequence. |
1955 | uint64_t AlignedNumBytes = NumBytes; |
1956 | if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF)) |
1957 | AlignedNumBytes = alignTo(Value: AlignedNumBytes, Align: MaxAlign); |
1958 | if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) { |
1959 | assert(!X86FI->getUsesRedZone() && |
1960 | "The Red Zone is not accounted for in stack probes" ); |
1961 | |
1962 | // Check whether EAX is livein for this block. |
1963 | bool isEAXAlive = isEAXLiveIn(MBB); |
1964 | |
1965 | if (isEAXAlive) { |
1966 | if (Is64Bit) { |
1967 | // Save RAX |
1968 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH64r)) |
1969 | .addReg(RegNo: X86::RAX, flags: RegState::Kill) |
1970 | .setMIFlag(MachineInstr::FrameSetup); |
1971 | } else { |
1972 | // Save EAX |
1973 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH32r)) |
1974 | .addReg(RegNo: X86::EAX, flags: RegState::Kill) |
1975 | .setMIFlag(MachineInstr::FrameSetup); |
1976 | } |
1977 | } |
1978 | |
1979 | if (Is64Bit) { |
1980 | // Handle the 64-bit Windows ABI case where we need to call __chkstk. |
1981 | // Function prologue is responsible for adjusting the stack pointer. |
1982 | int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes; |
1983 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: getMOVriOpcode(Use64BitReg: Is64Bit, Imm: Alloc)), DestReg: X86::RAX) |
1984 | .addImm(Val: Alloc) |
1985 | .setMIFlag(MachineInstr::FrameSetup); |
1986 | } else { |
1987 | // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. |
1988 | // We'll also use 4 already allocated bytes for EAX. |
1989 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV32ri), DestReg: X86::EAX) |
1990 | .addImm(Val: isEAXAlive ? NumBytes - 4 : NumBytes) |
1991 | .setMIFlag(MachineInstr::FrameSetup); |
1992 | } |
1993 | |
1994 | // Call __chkstk, __chkstk_ms, or __alloca. |
1995 | emitStackProbe(MF, MBB, MBBI, DL, InProlog: true); |
1996 | |
1997 | if (isEAXAlive) { |
1998 | // Restore RAX/EAX |
1999 | MachineInstr *MI; |
2000 | if (Is64Bit) |
2001 | MI = addRegOffset(MIB: BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64rm), DestReg: X86::RAX), |
2002 | Reg: StackPtr, isKill: false, Offset: NumBytes - 8); |
2003 | else |
2004 | MI = addRegOffset(MIB: BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: X86::MOV32rm), DestReg: X86::EAX), |
2005 | Reg: StackPtr, isKill: false, Offset: NumBytes - 4); |
2006 | MI->setFlag(MachineInstr::FrameSetup); |
2007 | MBB.insert(I: MBBI, MI); |
2008 | } |
2009 | } else if (NumBytes) { |
2010 | emitSPUpdate(MBB, MBBI, DL, NumBytes: -(int64_t)NumBytes, /*InEpilogue=*/false); |
2011 | } |
2012 | |
2013 | if (NeedsWinCFI && NumBytes) { |
2014 | HasWinCFI = true; |
2015 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_StackAlloc)) |
2016 | .addImm(Val: NumBytes) |
2017 | .setMIFlag(MachineInstr::FrameSetup); |
2018 | } |
2019 | |
2020 | int SEHFrameOffset = 0; |
2021 | unsigned SPOrEstablisher; |
2022 | if (IsFunclet) { |
2023 | if (IsClrFunclet) { |
2024 | // The establisher parameter passed to a CLR funclet is actually a pointer |
2025 | // to the (mostly empty) frame of its nearest enclosing funclet; we have |
2026 | // to find the root function establisher frame by loading the PSPSym from |
2027 | // the intermediate frame. |
2028 | unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF); |
2029 | MachinePointerInfo NoInfo; |
2030 | MBB.addLiveIn(PhysReg: Establisher); |
2031 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64rm), DestReg: Establisher), |
2032 | Reg: Establisher, isKill: false, Offset: PSPSlotOffset) |
2033 | .addMemOperand(MMO: MF.getMachineMemOperand( |
2034 | PtrInfo: NoInfo, F: MachineMemOperand::MOLoad, Size: SlotSize, BaseAlignment: Align(SlotSize))); |
2035 | ; |
2036 | // Save the root establisher back into the current funclet's (mostly |
2037 | // empty) frame, in case a sub-funclet or the GC needs it. |
2038 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64mr)), Reg: StackPtr, |
2039 | isKill: false, Offset: PSPSlotOffset) |
2040 | .addReg(RegNo: Establisher) |
2041 | .addMemOperand(MMO: MF.getMachineMemOperand( |
2042 | PtrInfo: NoInfo, |
2043 | F: MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, |
2044 | Size: SlotSize, BaseAlignment: Align(SlotSize))); |
2045 | } |
2046 | SPOrEstablisher = Establisher; |
2047 | } else { |
2048 | SPOrEstablisher = StackPtr; |
2049 | } |
2050 | |
2051 | if (IsWin64Prologue && HasFP) { |
2052 | // Set RBP to a small fixed offset from RSP. In the funclet case, we base |
2053 | // this calculation on the incoming establisher, which holds the value of |
2054 | // RSP from the parent frame at the end of the prologue. |
2055 | SEHFrameOffset = calculateSetFPREG(SPAdjust: ParentFrameNumBytes); |
2056 | if (SEHFrameOffset) |
2057 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::LEA64r), DestReg: FramePtr), |
2058 | Reg: SPOrEstablisher, isKill: false, Offset: SEHFrameOffset); |
2059 | else |
2060 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64rr), DestReg: FramePtr) |
2061 | .addReg(RegNo: SPOrEstablisher); |
2062 | |
2063 | // If this is not a funclet, emit the CFI describing our frame pointer. |
2064 | if (NeedsWinCFI && !IsFunclet) { |
2065 | assert(!NeedsWinFPO && "this setframe incompatible with FPO data" ); |
2066 | HasWinCFI = true; |
2067 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_SetFrame)) |
2068 | .addImm(Val: FramePtr) |
2069 | .addImm(Val: SEHFrameOffset) |
2070 | .setMIFlag(MachineInstr::FrameSetup); |
2071 | if (isAsynchronousEHPersonality(Pers: Personality)) |
2072 | MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset; |
2073 | } |
2074 | } else if (IsFunclet && STI.is32Bit()) { |
2075 | // Reset EBP / ESI to something good for funclets. |
2076 | MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL); |
2077 | // If we're a catch funclet, we can be returned to via catchret. Save ESP |
2078 | // into the registration node so that the runtime will restore it for us. |
2079 | if (!MBB.isCleanupFuncletEntry()) { |
2080 | assert(Personality == EHPersonality::MSVC_CXX); |
2081 | Register FrameReg; |
2082 | int FI = MF.getWinEHFuncInfo()->EHRegNodeFrameIndex; |
2083 | int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed(); |
2084 | // ESP is the first field, so no extra displacement is needed. |
2085 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV32mr)), Reg: FrameReg, |
2086 | isKill: false, Offset: EHRegOffset) |
2087 | .addReg(RegNo: X86::ESP); |
2088 | } |
2089 | } |
2090 | |
2091 | while (MBBI != MBB.end() && MBBI->getFlag(Flag: MachineInstr::FrameSetup)) { |
2092 | const MachineInstr &FrameInstr = *MBBI; |
2093 | ++MBBI; |
2094 | |
2095 | if (NeedsWinCFI) { |
2096 | int FI; |
2097 | if (Register Reg = TII.isStoreToStackSlot(MI: FrameInstr, FrameIndex&: FI)) { |
2098 | if (X86::FR64RegClass.contains(Reg)) { |
2099 | int Offset; |
2100 | Register IgnoredFrameReg; |
2101 | if (IsWin64Prologue && IsFunclet) |
2102 | Offset = getWin64EHFrameIndexRef(MF, FI, SPReg&: IgnoredFrameReg); |
2103 | else |
2104 | Offset = |
2105 | getFrameIndexReference(MF, FI, FrameReg&: IgnoredFrameReg).getFixed() + |
2106 | SEHFrameOffset; |
2107 | |
2108 | HasWinCFI = true; |
2109 | assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data" ); |
2110 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_SaveXMM)) |
2111 | .addImm(Val: Reg) |
2112 | .addImm(Val: Offset) |
2113 | .setMIFlag(MachineInstr::FrameSetup); |
2114 | } |
2115 | } |
2116 | } |
2117 | } |
2118 | |
2119 | if (NeedsWinCFI && HasWinCFI) |
2120 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_EndPrologue)) |
2121 | .setMIFlag(MachineInstr::FrameSetup); |
2122 | |
2123 | if (FnHasClrFunclet && !IsFunclet) { |
2124 | // Save the so-called Initial-SP (i.e. the value of the stack pointer |
2125 | // immediately after the prolog) into the PSPSlot so that funclets |
2126 | // and the GC can recover it. |
2127 | unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF); |
2128 | auto PSPInfo = MachinePointerInfo::getFixedStack( |
2129 | MF, FI: MF.getWinEHFuncInfo()->PSPSymFrameIdx); |
2130 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64mr)), Reg: StackPtr, isKill: false, |
2131 | Offset: PSPSlotOffset) |
2132 | .addReg(RegNo: StackPtr) |
2133 | .addMemOperand(MMO: MF.getMachineMemOperand( |
2134 | PtrInfo: PSPInfo, F: MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, |
2135 | Size: SlotSize, BaseAlignment: Align(SlotSize))); |
2136 | } |
2137 | |
2138 | // Realign stack after we spilled callee-saved registers (so that we'll be |
2139 | // able to calculate their offsets from the frame pointer). |
2140 | // Win64 requires aligning the stack after the prologue. |
2141 | if (IsWin64Prologue && TRI->hasStackRealignment(MF)) { |
2142 | assert(HasFP && "There should be a frame pointer if stack is realigned." ); |
2143 | BuildStackAlignAND(MBB, MBBI, DL, Reg: SPOrEstablisher, MaxAlign); |
2144 | } |
2145 | |
2146 | // We already dealt with stack realignment and funclets above. |
2147 | if (IsFunclet && STI.is32Bit()) |
2148 | return; |
2149 | |
2150 | // If we need a base pointer, set it up here. It's whatever the value |
2151 | // of the stack pointer is at this point. Any variable size objects |
2152 | // will be allocated after this, so we can still use the base pointer |
2153 | // to reference locals. |
2154 | if (TRI->hasBasePointer(MF)) { |
2155 | // Update the base pointer with the current stack pointer. |
2156 | unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; |
2157 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: BasePtr) |
2158 | .addReg(RegNo: SPOrEstablisher) |
2159 | .setMIFlag(MachineInstr::FrameSetup); |
2160 | if (X86FI->getRestoreBasePointer()) { |
2161 | // Stash value of base pointer. Saving RSP instead of EBP shortens |
2162 | // dependence chain. Used by SjLj EH. |
2163 | unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; |
2164 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opm)), Reg: FramePtr, isKill: true, |
2165 | Offset: X86FI->getRestoreBasePointerOffset()) |
2166 | .addReg(RegNo: SPOrEstablisher) |
2167 | .setMIFlag(MachineInstr::FrameSetup); |
2168 | } |
2169 | |
2170 | if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) { |
2171 | // Stash the value of the frame pointer relative to the base pointer for |
2172 | // Win32 EH. This supports Win32 EH, which does the inverse of the above: |
2173 | // it recovers the frame pointer from the base pointer rather than the |
2174 | // other way around. |
2175 | unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; |
2176 | Register UsedReg; |
2177 | int Offset = |
2178 | getFrameIndexReference(MF, FI: X86FI->getSEHFramePtrSaveIndex(), FrameReg&: UsedReg) |
2179 | .getFixed(); |
2180 | assert(UsedReg == BasePtr); |
2181 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opm)), Reg: UsedReg, isKill: true, Offset) |
2182 | .addReg(RegNo: FramePtr) |
2183 | .setMIFlag(MachineInstr::FrameSetup); |
2184 | } |
2185 | } |
2186 | if (ArgBaseReg.isValid()) { |
2187 | // Save argument base pointer. |
2188 | auto *MI = X86FI->getStackPtrSaveMI(); |
2189 | int FI = MI->getOperand(i: 1).getIndex(); |
2190 | unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr; |
2191 | // movl %basereg, offset(%ebp) |
2192 | addFrameReference(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: MOVmr)), FI) |
2193 | .addReg(RegNo: ArgBaseReg) |
2194 | .setMIFlag(MachineInstr::FrameSetup); |
2195 | } |
2196 | |
2197 | if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { |
2198 | // Mark end of stack pointer adjustment. |
2199 | if (!HasFP && NumBytes) { |
2200 | // Define the current CFA rule to use the provided offset. |
2201 | assert(StackSize); |
2202 | BuildCFI( |
2203 | MBB, MBBI, DL, |
2204 | CFIInst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: StackSize - stackGrowth), |
2205 | Flag: MachineInstr::FrameSetup); |
2206 | } |
2207 | |
2208 | // Emit DWARF info specifying the offsets of the callee-saved registers. |
2209 | emitCalleeSavedFrameMoves(MBB, MBBI, DL, IsPrologue: true); |
2210 | } |
2211 | |
2212 | // X86 Interrupt handling function cannot assume anything about the direction |
2213 | // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction |
2214 | // in each prologue of interrupt handler function. |
2215 | // |
2216 | // Create "cld" instruction only in these cases: |
2217 | // 1. The interrupt handling function uses any of the "rep" instructions. |
2218 | // 2. Interrupt handling function calls another function. |
2219 | // 3. If there are any inline asm blocks, as we do not know what they do |
2220 | // |
2221 | // TODO: We should also emit cld if we detect the use of std, but as of now, |
2222 | // the compiler does not even emit that instruction or even define it, so in |
2223 | // practice, this would only happen with inline asm, which we cover anyway. |
2224 | if (Fn.getCallingConv() == CallingConv::X86_INTR) { |
2225 | bool NeedsCLD = false; |
2226 | |
2227 | for (const MachineBasicBlock &B : MF) { |
2228 | for (const MachineInstr &MI : B) { |
2229 | if (MI.isCall()) { |
2230 | NeedsCLD = true; |
2231 | break; |
2232 | } |
2233 | |
2234 | if (isOpcodeRep(Opcode: MI.getOpcode())) { |
2235 | NeedsCLD = true; |
2236 | break; |
2237 | } |
2238 | |
2239 | if (MI.isInlineAsm()) { |
2240 | // TODO: Parse asm for rep instructions or call sites? |
2241 | // For now, let's play it safe and emit a cld instruction |
2242 | // just in case. |
2243 | NeedsCLD = true; |
2244 | break; |
2245 | } |
2246 | } |
2247 | } |
2248 | |
2249 | if (NeedsCLD) { |
2250 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::CLD)) |
2251 | .setMIFlag(MachineInstr::FrameSetup); |
2252 | } |
2253 | } |
2254 | |
2255 | // At this point we know if the function has WinCFI or not. |
2256 | MF.setHasWinCFI(HasWinCFI); |
2257 | } |
2258 | |
2259 | bool X86FrameLowering::canUseLEAForSPInEpilogue( |
2260 | const MachineFunction &MF) const { |
2261 | // We can't use LEA instructions for adjusting the stack pointer if we don't |
2262 | // have a frame pointer in the Win64 ABI. Only ADD instructions may be used |
2263 | // to deallocate the stack. |
2264 | // This means that we can use LEA for SP in two situations: |
2265 | // 1. We *aren't* using the Win64 ABI which means we are free to use LEA. |
2266 | // 2. We *have* a frame pointer which means we are permitted to use LEA. |
2267 | return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF); |
2268 | } |
2269 | |
2270 | static bool isFuncletReturnInstr(MachineInstr &MI) { |
2271 | switch (MI.getOpcode()) { |
2272 | case X86::CATCHRET: |
2273 | case X86::CLEANUPRET: |
2274 | return true; |
2275 | default: |
2276 | return false; |
2277 | } |
2278 | llvm_unreachable("impossible" ); |
2279 | } |
2280 | |
2281 | // CLR funclets use a special "Previous Stack Pointer Symbol" slot on the |
2282 | // stack. It holds a pointer to the bottom of the root function frame. The |
2283 | // establisher frame pointer passed to a nested funclet may point to the |
2284 | // (mostly empty) frame of its parent funclet, but it will need to find |
2285 | // the frame of the root function to access locals. To facilitate this, |
2286 | // every funclet copies the pointer to the bottom of the root function |
2287 | // frame into a PSPSym slot in its own (mostly empty) stack frame. Using the |
2288 | // same offset for the PSPSym in the root function frame that's used in the |
2289 | // funclets' frames allows each funclet to dynamically accept any ancestor |
2290 | // frame as its establisher argument (the runtime doesn't guarantee the |
2291 | // immediate parent for some reason lost to history), and also allows the GC, |
2292 | // which uses the PSPSym for some bookkeeping, to find it in any funclet's |
2293 | // frame with only a single offset reported for the entire method. |
2294 | unsigned |
2295 | X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const { |
2296 | const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo(); |
2297 | Register SPReg; |
2298 | int Offset = getFrameIndexReferencePreferSP(MF, FI: Info.PSPSymFrameIdx, FrameReg&: SPReg, |
2299 | /*IgnoreSPUpdates*/ true) |
2300 | .getFixed(); |
2301 | assert(Offset >= 0 && SPReg == TRI->getStackRegister()); |
2302 | return static_cast<unsigned>(Offset); |
2303 | } |
2304 | |
2305 | unsigned |
2306 | X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const { |
2307 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
2308 | // This is the size of the pushed CSRs. |
2309 | unsigned CSSize = X86FI->getCalleeSavedFrameSize(); |
2310 | // This is the size of callee saved XMMs. |
2311 | const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); |
2312 | unsigned XMMSize = |
2313 | WinEHXMMSlotInfo.size() * TRI->getSpillSize(RC: X86::VR128RegClass); |
2314 | // This is the amount of stack a funclet needs to allocate. |
2315 | unsigned UsedSize; |
2316 | EHPersonality Personality = |
2317 | classifyEHPersonality(Pers: MF.getFunction().getPersonalityFn()); |
2318 | if (Personality == EHPersonality::CoreCLR) { |
2319 | // CLR funclets need to hold enough space to include the PSPSym, at the |
2320 | // same offset from the stack pointer (immediately after the prolog) as it |
2321 | // resides at in the main function. |
2322 | UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize; |
2323 | } else { |
2324 | // Other funclets just need enough stack for outgoing call arguments. |
2325 | UsedSize = MF.getFrameInfo().getMaxCallFrameSize(); |
2326 | } |
2327 | // RBP is not included in the callee saved register block. After pushing RBP, |
2328 | // everything is 16 byte aligned. Everything we allocate before an outgoing |
2329 | // call must also be 16 byte aligned. |
2330 | unsigned FrameSizeMinusRBP = alignTo(Size: CSSize + UsedSize, A: getStackAlign()); |
2331 | // Subtract out the size of the callee saved registers. This is how much stack |
2332 | // each funclet will allocate. |
2333 | return FrameSizeMinusRBP + XMMSize - CSSize; |
2334 | } |
2335 | |
2336 | static bool isTailCallOpcode(unsigned Opc) { |
2337 | return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi || |
2338 | Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 || |
2339 | Opc == X86::TCRETURNdi64 || Opc == X86::TCRETURNmi64; |
2340 | } |
2341 | |
2342 | void X86FrameLowering::emitEpilogue(MachineFunction &MF, |
2343 | MachineBasicBlock &MBB) const { |
2344 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
2345 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
2346 | MachineBasicBlock::iterator Terminator = MBB.getFirstTerminator(); |
2347 | MachineBasicBlock::iterator MBBI = Terminator; |
2348 | DebugLoc DL; |
2349 | if (MBBI != MBB.end()) |
2350 | DL = MBBI->getDebugLoc(); |
2351 | // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. |
2352 | const bool Is64BitILP32 = STI.isTarget64BitILP32(); |
2353 | Register FramePtr = TRI->getFrameRegister(MF); |
2354 | Register MachineFramePtr = |
2355 | Is64BitILP32 ? Register(getX86SubSuperRegister(Reg: FramePtr, Size: 64)) : FramePtr; |
2356 | |
2357 | bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); |
2358 | bool NeedsWin64CFI = |
2359 | IsWin64Prologue && MF.getFunction().needsUnwindTableEntry(); |
2360 | bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(MI&: *MBBI); |
2361 | |
2362 | // Get the number of bytes to allocate from the FrameInfo. |
2363 | uint64_t StackSize = MFI.getStackSize(); |
2364 | uint64_t MaxAlign = calculateMaxStackAlign(MF); |
2365 | unsigned CSSize = X86FI->getCalleeSavedFrameSize(); |
2366 | unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta(); |
2367 | bool HasFP = hasFP(MF); |
2368 | uint64_t NumBytes = 0; |
2369 | |
2370 | bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() && |
2371 | !MF.getTarget().getTargetTriple().isOSWindows()) && |
2372 | MF.needsFrameMoves(); |
2373 | |
2374 | Register ArgBaseReg; |
2375 | if (auto *MI = X86FI->getStackPtrSaveMI()) { |
2376 | unsigned Opc = X86::LEA32r; |
2377 | Register StackReg = X86::ESP; |
2378 | ArgBaseReg = MI->getOperand(i: 0).getReg(); |
2379 | if (STI.is64Bit()) { |
2380 | Opc = X86::LEA64r; |
2381 | StackReg = X86::RSP; |
2382 | } |
2383 | // leal -4(%basereg), %esp |
2384 | // .cfi_def_cfa %esp, 4 |
2385 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: StackReg) |
2386 | .addUse(RegNo: ArgBaseReg) |
2387 | .addImm(Val: 1) |
2388 | .addUse(RegNo: X86::NoRegister) |
2389 | .addImm(Val: -(int64_t)SlotSize) |
2390 | .addUse(RegNo: X86::NoRegister) |
2391 | .setMIFlag(MachineInstr::FrameDestroy); |
2392 | if (NeedsDwarfCFI) { |
2393 | unsigned DwarfStackPtr = TRI->getDwarfRegNum(RegNum: StackReg, isEH: true); |
2394 | BuildCFI(MBB, MBBI, DL, |
2395 | CFIInst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: DwarfStackPtr, Offset: SlotSize), |
2396 | Flag: MachineInstr::FrameDestroy); |
2397 | --MBBI; |
2398 | } |
2399 | --MBBI; |
2400 | } |
2401 | |
2402 | if (IsFunclet) { |
2403 | assert(HasFP && "EH funclets without FP not yet implemented" ); |
2404 | NumBytes = getWinEHFuncletFrameSize(MF); |
2405 | } else if (HasFP) { |
2406 | // Calculate required stack adjustment. |
2407 | uint64_t FrameSize = StackSize - SlotSize; |
2408 | NumBytes = FrameSize - CSSize - TailCallArgReserveSize; |
2409 | |
2410 | // Callee-saved registers were pushed on stack before the stack was |
2411 | // realigned. |
2412 | if (TRI->hasStackRealignment(MF) && !IsWin64Prologue) |
2413 | NumBytes = alignTo(Value: FrameSize, Align: MaxAlign); |
2414 | } else { |
2415 | NumBytes = StackSize - CSSize - TailCallArgReserveSize; |
2416 | } |
2417 | uint64_t SEHStackAllocAmt = NumBytes; |
2418 | |
2419 | // AfterPop is the position to insert .cfi_restore. |
2420 | MachineBasicBlock::iterator AfterPop = MBBI; |
2421 | if (HasFP) { |
2422 | if (X86FI->hasSwiftAsyncContext()) { |
2423 | // Discard the context. |
2424 | int Offset = 16 + mergeSPUpdates(MBB, MBBI, doMergeWithPrevious: true); |
2425 | emitSPUpdate(MBB, MBBI, DL, NumBytes: Offset, /*InEpilogue*/ true); |
2426 | } |
2427 | // Pop EBP. |
2428 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, |
2429 | MCID: TII.get(Opcode: getPOPOpcode(ST: MF.getSubtarget<X86Subtarget>())), |
2430 | DestReg: MachineFramePtr) |
2431 | .setMIFlag(MachineInstr::FrameDestroy); |
2432 | |
2433 | // We need to reset FP to its untagged state on return. Bit 60 is currently |
2434 | // used to show the presence of an extended frame. |
2435 | if (X86FI->hasSwiftAsyncContext()) { |
2436 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::BTR64ri8), DestReg: MachineFramePtr) |
2437 | .addUse(RegNo: MachineFramePtr) |
2438 | .addImm(Val: 60) |
2439 | .setMIFlag(MachineInstr::FrameDestroy); |
2440 | } |
2441 | |
2442 | if (NeedsDwarfCFI) { |
2443 | if (!ArgBaseReg.isValid()) { |
2444 | unsigned DwarfStackPtr = |
2445 | TRI->getDwarfRegNum(RegNum: Is64Bit ? X86::RSP : X86::ESP, isEH: true); |
2446 | BuildCFI(MBB, MBBI, DL, |
2447 | CFIInst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: DwarfStackPtr, Offset: SlotSize), |
2448 | Flag: MachineInstr::FrameDestroy); |
2449 | } |
2450 | if (!MBB.succ_empty() && !MBB.isReturnBlock()) { |
2451 | unsigned DwarfFramePtr = TRI->getDwarfRegNum(RegNum: MachineFramePtr, isEH: true); |
2452 | BuildCFI(MBB, MBBI: AfterPop, DL, |
2453 | CFIInst: MCCFIInstruction::createRestore(L: nullptr, Register: DwarfFramePtr), |
2454 | Flag: MachineInstr::FrameDestroy); |
2455 | --MBBI; |
2456 | --AfterPop; |
2457 | } |
2458 | --MBBI; |
2459 | } |
2460 | } |
2461 | |
2462 | MachineBasicBlock::iterator FirstCSPop = MBBI; |
2463 | // Skip the callee-saved pop instructions. |
2464 | while (MBBI != MBB.begin()) { |
2465 | MachineBasicBlock::iterator PI = std::prev(x: MBBI); |
2466 | unsigned Opc = PI->getOpcode(); |
2467 | |
2468 | if (Opc != X86::DBG_VALUE && !PI->isTerminator()) { |
2469 | if (!PI->getFlag(Flag: MachineInstr::FrameDestroy) || |
2470 | (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::BTR64ri8 && |
2471 | Opc != X86::ADD64ri32 && Opc != X86::POPP64r && Opc != X86::POP2 && |
2472 | Opc != X86::POP2P && Opc != X86::LEA64r)) |
2473 | break; |
2474 | FirstCSPop = PI; |
2475 | } |
2476 | |
2477 | --MBBI; |
2478 | } |
2479 | if (ArgBaseReg.isValid()) { |
2480 | // Restore argument base pointer. |
2481 | auto *MI = X86FI->getStackPtrSaveMI(); |
2482 | int FI = MI->getOperand(i: 1).getIndex(); |
2483 | unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm; |
2484 | // movl offset(%ebp), %basereg |
2485 | addFrameReference(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: MOVrm), DestReg: ArgBaseReg), FI) |
2486 | .setMIFlag(MachineInstr::FrameDestroy); |
2487 | } |
2488 | MBBI = FirstCSPop; |
2489 | |
2490 | if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET) |
2491 | emitCatchRetReturnValue(MBB, MBBI: FirstCSPop, CatchRet: &*Terminator); |
2492 | |
2493 | if (MBBI != MBB.end()) |
2494 | DL = MBBI->getDebugLoc(); |
2495 | // If there is an ADD32ri or SUB32ri of ESP immediately before this |
2496 | // instruction, merge the two instructions. |
2497 | if (NumBytes || MFI.hasVarSizedObjects()) |
2498 | NumBytes += mergeSPUpdates(MBB, MBBI, doMergeWithPrevious: true); |
2499 | |
2500 | // If dynamic alloca is used, then reset esp to point to the last callee-saved |
2501 | // slot before popping them off! Same applies for the case, when stack was |
2502 | // realigned. Don't do this if this was a funclet epilogue, since the funclets |
2503 | // will not do realignment or dynamic stack allocation. |
2504 | if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) && |
2505 | !IsFunclet) { |
2506 | if (TRI->hasStackRealignment(MF)) |
2507 | MBBI = FirstCSPop; |
2508 | unsigned SEHFrameOffset = calculateSetFPREG(SPAdjust: SEHStackAllocAmt); |
2509 | uint64_t LEAAmount = |
2510 | IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize; |
2511 | |
2512 | if (X86FI->hasSwiftAsyncContext()) |
2513 | LEAAmount -= 16; |
2514 | |
2515 | // There are only two legal forms of epilogue: |
2516 | // - add SEHAllocationSize, %rsp |
2517 | // - lea SEHAllocationSize(%FramePtr), %rsp |
2518 | // |
2519 | // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence. |
2520 | // However, we may use this sequence if we have a frame pointer because the |
2521 | // effects of the prologue can safely be undone. |
2522 | if (LEAAmount != 0) { |
2523 | unsigned Opc = getLEArOpcode(IsLP64: Uses64BitFramePtr); |
2524 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: StackPtr), Reg: FramePtr, |
2525 | isKill: false, Offset: LEAAmount); |
2526 | --MBBI; |
2527 | } else { |
2528 | unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr); |
2529 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: StackPtr).addReg(RegNo: FramePtr); |
2530 | --MBBI; |
2531 | } |
2532 | } else if (NumBytes) { |
2533 | // Adjust stack pointer back: ESP += numbytes. |
2534 | emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true); |
2535 | if (!HasFP && NeedsDwarfCFI) { |
2536 | // Define the current CFA rule to use the provided offset. |
2537 | BuildCFI(MBB, MBBI, DL, |
2538 | CFIInst: MCCFIInstruction::cfiDefCfaOffset( |
2539 | L: nullptr, Offset: CSSize + TailCallArgReserveSize + SlotSize), |
2540 | Flag: MachineInstr::FrameDestroy); |
2541 | } |
2542 | --MBBI; |
2543 | } |
2544 | |
2545 | // Windows unwinder will not invoke function's exception handler if IP is |
2546 | // either in prologue or in epilogue. This behavior causes a problem when a |
2547 | // call immediately precedes an epilogue, because the return address points |
2548 | // into the epilogue. To cope with that, we insert an epilogue marker here, |
2549 | // then replace it with a 'nop' if it ends up immediately after a CALL in the |
2550 | // final emitted code. |
2551 | if (NeedsWin64CFI && MF.hasWinCFI()) |
2552 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_Epilogue)); |
2553 | |
2554 | if (!HasFP && NeedsDwarfCFI) { |
2555 | MBBI = FirstCSPop; |
2556 | int64_t Offset = -(int64_t)CSSize - SlotSize; |
2557 | // Mark callee-saved pop instruction. |
2558 | // Define the current CFA rule to use the provided offset. |
2559 | while (MBBI != MBB.end()) { |
2560 | MachineBasicBlock::iterator PI = MBBI; |
2561 | unsigned Opc = PI->getOpcode(); |
2562 | ++MBBI; |
2563 | if (Opc == X86::POP32r || Opc == X86::POP64r || Opc == X86::POPP64r || |
2564 | Opc == X86::POP2 || Opc == X86::POP2P) { |
2565 | Offset += SlotSize; |
2566 | // Compared to pop, pop2 introduces more stack offset (one more |
2567 | // register). |
2568 | if (Opc == X86::POP2 || Opc == X86::POP2P) |
2569 | Offset += SlotSize; |
2570 | BuildCFI(MBB, MBBI, DL, |
2571 | CFIInst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -Offset), |
2572 | Flag: MachineInstr::FrameDestroy); |
2573 | } |
2574 | } |
2575 | } |
2576 | |
2577 | // Emit DWARF info specifying the restores of the callee-saved registers. |
2578 | // For epilogue with return inside or being other block without successor, |
2579 | // no need to generate .cfi_restore for callee-saved registers. |
2580 | if (NeedsDwarfCFI && !MBB.succ_empty()) |
2581 | emitCalleeSavedFrameMoves(MBB, MBBI: AfterPop, DL, IsPrologue: false); |
2582 | |
2583 | if (Terminator == MBB.end() || !isTailCallOpcode(Opc: Terminator->getOpcode())) { |
2584 | // Add the return addr area delta back since we are not tail calling. |
2585 | int Offset = -1 * X86FI->getTCReturnAddrDelta(); |
2586 | assert(Offset >= 0 && "TCDelta should never be positive" ); |
2587 | if (Offset) { |
2588 | // Check for possible merge with preceding ADD instruction. |
2589 | Offset += mergeSPUpdates(MBB, MBBI&: Terminator, doMergeWithPrevious: true); |
2590 | emitSPUpdate(MBB, MBBI&: Terminator, DL, NumBytes: Offset, /*InEpilogue=*/true); |
2591 | } |
2592 | } |
2593 | |
2594 | // Emit tilerelease for AMX kernel. |
2595 | if (X86FI->getAMXProgModel() == AMXProgModelEnum::ManagedRA) |
2596 | BuildMI(BB&: MBB, I: Terminator, MIMD: DL, MCID: TII.get(Opcode: X86::TILERELEASE)); |
2597 | } |
2598 | |
2599 | StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, |
2600 | int FI, |
2601 | Register &FrameReg) const { |
2602 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
2603 | |
2604 | bool IsFixed = MFI.isFixedObjectIndex(ObjectIdx: FI); |
2605 | // We can't calculate offset from frame pointer if the stack is realigned, |
2606 | // so enforce usage of stack/base pointer. The base pointer is used when we |
2607 | // have dynamic allocas in addition to dynamic realignment. |
2608 | if (TRI->hasBasePointer(MF)) |
2609 | FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister(); |
2610 | else if (TRI->hasStackRealignment(MF)) |
2611 | FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister(); |
2612 | else |
2613 | FrameReg = TRI->getFrameRegister(MF); |
2614 | |
2615 | // Offset will hold the offset from the stack pointer at function entry to the |
2616 | // object. |
2617 | // We need to factor in additional offsets applied during the prologue to the |
2618 | // frame, base, and stack pointer depending on which is used. |
2619 | int Offset = MFI.getObjectOffset(ObjectIdx: FI) - getOffsetOfLocalArea(); |
2620 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
2621 | unsigned CSSize = X86FI->getCalleeSavedFrameSize(); |
2622 | uint64_t StackSize = MFI.getStackSize(); |
2623 | bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); |
2624 | int64_t FPDelta = 0; |
2625 | |
2626 | // In an x86 interrupt, remove the offset we added to account for the return |
2627 | // address from any stack object allocated in the caller's frame. Interrupts |
2628 | // do not have a standard return address. Fixed objects in the current frame, |
2629 | // such as SSE register spills, should not get this treatment. |
2630 | if (MF.getFunction().getCallingConv() == CallingConv::X86_INTR && |
2631 | Offset >= 0) { |
2632 | Offset += getOffsetOfLocalArea(); |
2633 | } |
2634 | |
2635 | if (IsWin64Prologue) { |
2636 | assert(!MFI.hasCalls() || (StackSize % 16) == 8); |
2637 | |
2638 | // Calculate required stack adjustment. |
2639 | uint64_t FrameSize = StackSize - SlotSize; |
2640 | // If required, include space for extra hidden slot for stashing base |
2641 | // pointer. |
2642 | if (X86FI->getRestoreBasePointer()) |
2643 | FrameSize += SlotSize; |
2644 | uint64_t NumBytes = FrameSize - CSSize; |
2645 | |
2646 | uint64_t SEHFrameOffset = calculateSetFPREG(SPAdjust: NumBytes); |
2647 | if (FI && FI == X86FI->getFAIndex()) |
2648 | return StackOffset::getFixed(Fixed: -SEHFrameOffset); |
2649 | |
2650 | // FPDelta is the offset from the "traditional" FP location of the old base |
2651 | // pointer followed by return address and the location required by the |
2652 | // restricted Win64 prologue. |
2653 | // Add FPDelta to all offsets below that go through the frame pointer. |
2654 | FPDelta = FrameSize - SEHFrameOffset; |
2655 | assert((!MFI.hasCalls() || (FPDelta % 16) == 0) && |
2656 | "FPDelta isn't aligned per the Win64 ABI!" ); |
2657 | } |
2658 | |
2659 | if (FrameReg == TRI->getFramePtr()) { |
2660 | // Skip saved EBP/RBP |
2661 | Offset += SlotSize; |
2662 | |
2663 | // Account for restricted Windows prologue. |
2664 | Offset += FPDelta; |
2665 | |
2666 | // Skip the RETADDR move area |
2667 | int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); |
2668 | if (TailCallReturnAddrDelta < 0) |
2669 | Offset -= TailCallReturnAddrDelta; |
2670 | |
2671 | return StackOffset::getFixed(Fixed: Offset); |
2672 | } |
2673 | |
2674 | // FrameReg is either the stack pointer or a base pointer. But the base is |
2675 | // located at the end of the statically known StackSize so the distinction |
2676 | // doesn't really matter. |
2677 | if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF)) |
2678 | assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize))); |
2679 | return StackOffset::getFixed(Fixed: Offset + StackSize); |
2680 | } |
2681 | |
2682 | int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, |
2683 | Register &FrameReg) const { |
2684 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
2685 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
2686 | const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); |
2687 | const auto it = WinEHXMMSlotInfo.find(Val: FI); |
2688 | |
2689 | if (it == WinEHXMMSlotInfo.end()) |
2690 | return getFrameIndexReference(MF, FI, FrameReg).getFixed(); |
2691 | |
2692 | FrameReg = TRI->getStackRegister(); |
2693 | return alignDown(Value: MFI.getMaxCallFrameSize(), Align: getStackAlign().value()) + |
2694 | it->second; |
2695 | } |
2696 | |
2697 | StackOffset |
2698 | X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF, int FI, |
2699 | Register &FrameReg, |
2700 | int Adjustment) const { |
2701 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
2702 | FrameReg = TRI->getStackRegister(); |
2703 | return StackOffset::getFixed(Fixed: MFI.getObjectOffset(ObjectIdx: FI) - |
2704 | getOffsetOfLocalArea() + Adjustment); |
2705 | } |
2706 | |
2707 | StackOffset |
2708 | X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF, |
2709 | int FI, Register &FrameReg, |
2710 | bool IgnoreSPUpdates) const { |
2711 | |
2712 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
2713 | // Does not include any dynamic realign. |
2714 | const uint64_t StackSize = MFI.getStackSize(); |
2715 | // LLVM arranges the stack as follows: |
2716 | // ... |
2717 | // ARG2 |
2718 | // ARG1 |
2719 | // RETADDR |
2720 | // PUSH RBP <-- RBP points here |
2721 | // PUSH CSRs |
2722 | // ~~~~~~~ <-- possible stack realignment (non-win64) |
2723 | // ... |
2724 | // STACK OBJECTS |
2725 | // ... <-- RSP after prologue points here |
2726 | // ~~~~~~~ <-- possible stack realignment (win64) |
2727 | // |
2728 | // if (hasVarSizedObjects()): |
2729 | // ... <-- "base pointer" (ESI/RBX) points here |
2730 | // DYNAMIC ALLOCAS |
2731 | // ... <-- RSP points here |
2732 | // |
2733 | // Case 1: In the simple case of no stack realignment and no dynamic |
2734 | // allocas, both "fixed" stack objects (arguments and CSRs) are addressable |
2735 | // with fixed offsets from RSP. |
2736 | // |
2737 | // Case 2: In the case of stack realignment with no dynamic allocas, fixed |
2738 | // stack objects are addressed with RBP and regular stack objects with RSP. |
2739 | // |
2740 | // Case 3: In the case of dynamic allocas and stack realignment, RSP is used |
2741 | // to address stack arguments for outgoing calls and nothing else. The "base |
2742 | // pointer" points to local variables, and RBP points to fixed objects. |
2743 | // |
2744 | // In cases 2 and 3, we can only answer for non-fixed stack objects, and the |
2745 | // answer we give is relative to the SP after the prologue, and not the |
2746 | // SP in the middle of the function. |
2747 | |
2748 | if (MFI.isFixedObjectIndex(ObjectIdx: FI) && TRI->hasStackRealignment(MF) && |
2749 | !STI.isTargetWin64()) |
2750 | return getFrameIndexReference(MF, FI, FrameReg); |
2751 | |
2752 | // If !hasReservedCallFrame the function might have SP adjustement in the |
2753 | // body. So, even though the offset is statically known, it depends on where |
2754 | // we are in the function. |
2755 | if (!IgnoreSPUpdates && !hasReservedCallFrame(MF)) |
2756 | return getFrameIndexReference(MF, FI, FrameReg); |
2757 | |
2758 | // We don't handle tail calls, and shouldn't be seeing them either. |
2759 | assert(MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta() >= 0 && |
2760 | "we don't handle this case!" ); |
2761 | |
2762 | // This is how the math works out: |
2763 | // |
2764 | // %rsp grows (i.e. gets lower) left to right. Each box below is |
2765 | // one word (eight bytes). Obj0 is the stack slot we're trying to |
2766 | // get to. |
2767 | // |
2768 | // ---------------------------------- |
2769 | // | BP | Obj0 | Obj1 | ... | ObjN | |
2770 | // ---------------------------------- |
2771 | // ^ ^ ^ ^ |
2772 | // A B C E |
2773 | // |
2774 | // A is the incoming stack pointer. |
2775 | // (B - A) is the local area offset (-8 for x86-64) [1] |
2776 | // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2] |
2777 | // |
2778 | // |(E - B)| is the StackSize (absolute value, positive). For a |
2779 | // stack that grown down, this works out to be (B - E). [3] |
2780 | // |
2781 | // E is also the value of %rsp after stack has been set up, and we |
2782 | // want (C - E) -- the value we can add to %rsp to get to Obj0. Now |
2783 | // (C - E) == (C - A) - (B - A) + (B - E) |
2784 | // { Using [1], [2] and [3] above } |
2785 | // == getObjectOffset - LocalAreaOffset + StackSize |
2786 | |
2787 | return getFrameIndexReferenceSP(MF, FI, FrameReg, Adjustment: StackSize); |
2788 | } |
2789 | |
2790 | bool X86FrameLowering::assignCalleeSavedSpillSlots( |
2791 | MachineFunction &MF, const TargetRegisterInfo *TRI, |
2792 | std::vector<CalleeSavedInfo> &CSI) const { |
2793 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
2794 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
2795 | |
2796 | unsigned CalleeSavedFrameSize = 0; |
2797 | unsigned XMMCalleeSavedFrameSize = 0; |
2798 | auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); |
2799 | int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta(); |
2800 | |
2801 | int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); |
2802 | |
2803 | if (TailCallReturnAddrDelta < 0) { |
2804 | // create RETURNADDR area |
2805 | // arg |
2806 | // arg |
2807 | // RETADDR |
2808 | // { ... |
2809 | // RETADDR area |
2810 | // ... |
2811 | // } |
2812 | // [EBP] |
2813 | MFI.CreateFixedObject(Size: -TailCallReturnAddrDelta, |
2814 | SPOffset: TailCallReturnAddrDelta - SlotSize, IsImmutable: true); |
2815 | } |
2816 | |
2817 | // Spill the BasePtr if it's used. |
2818 | if (this->TRI->hasBasePointer(MF)) { |
2819 | // Allocate a spill slot for EBP if we have a base pointer and EH funclets. |
2820 | if (MF.hasEHFunclets()) { |
2821 | int FI = MFI.CreateSpillStackObject(Size: SlotSize, Alignment: Align(SlotSize)); |
2822 | X86FI->setHasSEHFramePtrSave(true); |
2823 | X86FI->setSEHFramePtrSaveIndex(FI); |
2824 | } |
2825 | } |
2826 | |
2827 | if (hasFP(MF)) { |
2828 | // emitPrologue always spills frame register the first thing. |
2829 | SpillSlotOffset -= SlotSize; |
2830 | MFI.CreateFixedSpillStackObject(Size: SlotSize, SPOffset: SpillSlotOffset); |
2831 | |
2832 | // The async context lives directly before the frame pointer, and we |
2833 | // allocate a second slot to preserve stack alignment. |
2834 | if (X86FI->hasSwiftAsyncContext()) { |
2835 | SpillSlotOffset -= SlotSize; |
2836 | MFI.CreateFixedSpillStackObject(Size: SlotSize, SPOffset: SpillSlotOffset); |
2837 | SpillSlotOffset -= SlotSize; |
2838 | } |
2839 | |
2840 | // Since emitPrologue and emitEpilogue will handle spilling and restoring of |
2841 | // the frame register, we can delete it from CSI list and not have to worry |
2842 | // about avoiding it later. |
2843 | Register FPReg = TRI->getFrameRegister(MF); |
2844 | for (unsigned i = 0; i < CSI.size(); ++i) { |
2845 | if (TRI->regsOverlap(RegA: CSI[i].getReg(), RegB: FPReg)) { |
2846 | CSI.erase(position: CSI.begin() + i); |
2847 | break; |
2848 | } |
2849 | } |
2850 | } |
2851 | |
2852 | // Strategy: |
2853 | // 1. Use push2 when |
2854 | // a) number of CSR > 1 if no need padding |
2855 | // b) number of CSR > 2 if need padding |
2856 | // 2. When the number of CSR push is odd |
2857 | // a. Start to use push2 from the 1st push if stack is 16B aligned. |
2858 | // b. Start to use push2 from the 2nd push if stack is not 16B aligned. |
2859 | // 3. When the number of CSR push is even, start to use push2 from the 1st |
2860 | // push and make the stack 16B aligned before the push |
2861 | unsigned NumRegsForPush2 = 0; |
2862 | if (STI.hasPush2Pop2()) { |
2863 | unsigned NumCSGPR = llvm::count_if(Range&: CSI, P: [](const CalleeSavedInfo &I) { |
2864 | return X86::GR64RegClass.contains(Reg: I.getReg()); |
2865 | }); |
2866 | bool NeedPadding = (SpillSlotOffset % 16 != 0) && (NumCSGPR % 2 == 0); |
2867 | bool UsePush2Pop2 = NeedPadding ? NumCSGPR > 2 : NumCSGPR > 1; |
2868 | X86FI->setPadForPush2Pop2(NeedPadding && UsePush2Pop2); |
2869 | NumRegsForPush2 = UsePush2Pop2 ? alignDown(Value: NumCSGPR, Align: 2) : 0; |
2870 | if (X86FI->padForPush2Pop2()) { |
2871 | SpillSlotOffset -= SlotSize; |
2872 | MFI.CreateFixedSpillStackObject(Size: SlotSize, SPOffset: SpillSlotOffset); |
2873 | } |
2874 | } |
2875 | |
2876 | // Assign slots for GPRs. It increases frame size. |
2877 | for (CalleeSavedInfo &I : llvm::reverse(C&: CSI)) { |
2878 | Register Reg = I.getReg(); |
2879 | |
2880 | if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) |
2881 | continue; |
2882 | |
2883 | // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned |
2884 | // or only an odd number of registers in the candidates. |
2885 | if (X86FI->getNumCandidatesForPush2Pop2() < NumRegsForPush2 && |
2886 | (SpillSlotOffset % 16 == 0 || |
2887 | X86FI->getNumCandidatesForPush2Pop2() % 2)) |
2888 | X86FI->addCandidateForPush2Pop2(Reg); |
2889 | |
2890 | SpillSlotOffset -= SlotSize; |
2891 | CalleeSavedFrameSize += SlotSize; |
2892 | |
2893 | int SlotIndex = MFI.CreateFixedSpillStackObject(Size: SlotSize, SPOffset: SpillSlotOffset); |
2894 | I.setFrameIdx(SlotIndex); |
2895 | } |
2896 | |
2897 | // Adjust the offset of spill slot as we know the accurate callee saved frame |
2898 | // size. |
2899 | if (X86FI->getRestoreBasePointer()) { |
2900 | SpillSlotOffset -= SlotSize; |
2901 | CalleeSavedFrameSize += SlotSize; |
2902 | |
2903 | MFI.CreateFixedSpillStackObject(Size: SlotSize, SPOffset: SpillSlotOffset); |
2904 | // TODO: saving the slot index is better? |
2905 | X86FI->setRestoreBasePointer(CalleeSavedFrameSize); |
2906 | } |
2907 | assert(X86FI->getNumCandidatesForPush2Pop2() % 2 == 0 && |
2908 | "Expect even candidates for push2/pop2" ); |
2909 | if (X86FI->getNumCandidatesForPush2Pop2()) |
2910 | ++NumFunctionUsingPush2Pop2; |
2911 | X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize); |
2912 | MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize); |
2913 | |
2914 | // Assign slots for XMMs. |
2915 | for (CalleeSavedInfo &I : llvm::reverse(C&: CSI)) { |
2916 | Register Reg = I.getReg(); |
2917 | if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) |
2918 | continue; |
2919 | |
2920 | // If this is k-register make sure we lookup via the largest legal type. |
2921 | MVT VT = MVT::Other; |
2922 | if (X86::VK16RegClass.contains(Reg)) |
2923 | VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; |
2924 | |
2925 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); |
2926 | unsigned Size = TRI->getSpillSize(RC: *RC); |
2927 | Align Alignment = TRI->getSpillAlign(RC: *RC); |
2928 | // ensure alignment |
2929 | assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86" ); |
2930 | SpillSlotOffset = -alignTo(Size: -SpillSlotOffset, A: Alignment); |
2931 | |
2932 | // spill into slot |
2933 | SpillSlotOffset -= Size; |
2934 | int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SPOffset: SpillSlotOffset); |
2935 | I.setFrameIdx(SlotIndex); |
2936 | MFI.ensureMaxAlignment(Alignment); |
2937 | |
2938 | // Save the start offset and size of XMM in stack frame for funclets. |
2939 | if (X86::VR128RegClass.contains(Reg)) { |
2940 | WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize; |
2941 | XMMCalleeSavedFrameSize += Size; |
2942 | } |
2943 | } |
2944 | |
2945 | return true; |
2946 | } |
2947 | |
2948 | bool X86FrameLowering::spillCalleeSavedRegisters( |
2949 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
2950 | ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
2951 | DebugLoc DL = MBB.findDebugLoc(MBBI: MI); |
2952 | |
2953 | // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI |
2954 | // for us, and there are no XMM CSRs on Win32. |
2955 | if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows()) |
2956 | return true; |
2957 | |
2958 | // Push GPRs. It increases frame size. |
2959 | const MachineFunction &MF = *MBB.getParent(); |
2960 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
2961 | if (X86FI->padForPush2Pop2()) |
2962 | emitSPUpdate(MBB, MBBI&: MI, DL, NumBytes: -(int64_t)SlotSize, /*InEpilogue=*/false); |
2963 | |
2964 | // Update LiveIn of the basic block and decide whether we can add a kill flag |
2965 | // to the use. |
2966 | auto UpdateLiveInCheckCanKill = [&](Register Reg) { |
2967 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
2968 | // Do not set a kill flag on values that are also marked as live-in. This |
2969 | // happens with the @llvm-returnaddress intrinsic and with arguments |
2970 | // passed in callee saved registers. |
2971 | // Omitting the kill flags is conservatively correct even if the live-in |
2972 | // is not used after all. |
2973 | if (MRI.isLiveIn(Reg)) |
2974 | return false; |
2975 | MBB.addLiveIn(PhysReg: Reg); |
2976 | // Check if any subregister is live-in |
2977 | for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg) |
2978 | if (MRI.isLiveIn(Reg: *AReg)) |
2979 | return false; |
2980 | return true; |
2981 | }; |
2982 | auto UpdateLiveInGetKillRegState = [&](Register Reg) { |
2983 | return getKillRegState(B: UpdateLiveInCheckCanKill(Reg)); |
2984 | }; |
2985 | |
2986 | for (auto RI = CSI.rbegin(), RE = CSI.rend(); RI != RE; ++RI) { |
2987 | Register Reg = RI->getReg(); |
2988 | if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) |
2989 | continue; |
2990 | |
2991 | if (X86FI->isCandidateForPush2Pop2(Reg)) { |
2992 | Register Reg2 = (++RI)->getReg(); |
2993 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: getPUSH2Opcode(ST: STI))) |
2994 | .addReg(RegNo: Reg, flags: UpdateLiveInGetKillRegState(Reg)) |
2995 | .addReg(RegNo: Reg2, flags: UpdateLiveInGetKillRegState(Reg2)) |
2996 | .setMIFlag(MachineInstr::FrameSetup); |
2997 | } else { |
2998 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: getPUSHOpcode(ST: STI))) |
2999 | .addReg(RegNo: Reg, flags: UpdateLiveInGetKillRegState(Reg)) |
3000 | .setMIFlag(MachineInstr::FrameSetup); |
3001 | } |
3002 | } |
3003 | |
3004 | if (X86FI->getRestoreBasePointer()) { |
3005 | unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; |
3006 | Register BaseReg = this->TRI->getBaseRegister(); |
3007 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc)) |
3008 | .addReg(RegNo: BaseReg, flags: getKillRegState(B: true)) |
3009 | .setMIFlag(MachineInstr::FrameSetup); |
3010 | } |
3011 | |
3012 | // Make XMM regs spilled. X86 does not have ability of push/pop XMM. |
3013 | // It can be done by spilling XMMs to stack frame. |
3014 | for (const CalleeSavedInfo &I : llvm::reverse(C&: CSI)) { |
3015 | Register Reg = I.getReg(); |
3016 | if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) |
3017 | continue; |
3018 | |
3019 | // If this is k-register make sure we lookup via the largest legal type. |
3020 | MVT VT = MVT::Other; |
3021 | if (X86::VK16RegClass.contains(Reg)) |
3022 | VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; |
3023 | |
3024 | // Add the callee-saved register as live-in. It's killed at the spill. |
3025 | MBB.addLiveIn(PhysReg: Reg); |
3026 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); |
3027 | |
3028 | TII.storeRegToStackSlot(MBB, MI, SrcReg: Reg, isKill: true, FrameIndex: I.getFrameIdx(), RC, TRI, |
3029 | VReg: Register()); |
3030 | --MI; |
3031 | MI->setFlag(MachineInstr::FrameSetup); |
3032 | ++MI; |
3033 | } |
3034 | |
3035 | return true; |
3036 | } |
3037 | |
3038 | void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB, |
3039 | MachineBasicBlock::iterator MBBI, |
3040 | MachineInstr *CatchRet) const { |
3041 | // SEH shouldn't use catchret. |
3042 | assert(!isAsynchronousEHPersonality(classifyEHPersonality( |
3043 | MBB.getParent()->getFunction().getPersonalityFn())) && |
3044 | "SEH should not use CATCHRET" ); |
3045 | const DebugLoc &DL = CatchRet->getDebugLoc(); |
3046 | MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(i: 0).getMBB(); |
3047 | |
3048 | // Fill EAX/RAX with the address of the target block. |
3049 | if (STI.is64Bit()) { |
3050 | // LEA64r CatchRetTarget(%rip), %rax |
3051 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::LEA64r), DestReg: X86::RAX) |
3052 | .addReg(RegNo: X86::RIP) |
3053 | .addImm(Val: 0) |
3054 | .addReg(RegNo: 0) |
3055 | .addMBB(MBB: CatchRetTarget) |
3056 | .addReg(RegNo: 0); |
3057 | } else { |
3058 | // MOV32ri $CatchRetTarget, %eax |
3059 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV32ri), DestReg: X86::EAX) |
3060 | .addMBB(MBB: CatchRetTarget); |
3061 | } |
3062 | |
3063 | // Record that we've taken the address of CatchRetTarget and no longer just |
3064 | // reference it in a terminator. |
3065 | CatchRetTarget->setMachineBlockAddressTaken(); |
3066 | } |
3067 | |
3068 | bool X86FrameLowering::restoreCalleeSavedRegisters( |
3069 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
3070 | MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
3071 | if (CSI.empty()) |
3072 | return false; |
3073 | |
3074 | if (MI != MBB.end() && isFuncletReturnInstr(MI&: *MI) && STI.isOSWindows()) { |
3075 | // Don't restore CSRs in 32-bit EH funclets. Matches |
3076 | // spillCalleeSavedRegisters. |
3077 | if (STI.is32Bit()) |
3078 | return true; |
3079 | // Don't restore CSRs before an SEH catchret. SEH except blocks do not form |
3080 | // funclets. emitEpilogue transforms these to normal jumps. |
3081 | if (MI->getOpcode() == X86::CATCHRET) { |
3082 | const Function &F = MBB.getParent()->getFunction(); |
3083 | bool IsSEH = isAsynchronousEHPersonality( |
3084 | Pers: classifyEHPersonality(Pers: F.getPersonalityFn())); |
3085 | if (IsSEH) |
3086 | return true; |
3087 | } |
3088 | } |
3089 | |
3090 | DebugLoc DL = MBB.findDebugLoc(MBBI: MI); |
3091 | |
3092 | // Reload XMMs from stack frame. |
3093 | for (const CalleeSavedInfo &I : CSI) { |
3094 | Register Reg = I.getReg(); |
3095 | if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) |
3096 | continue; |
3097 | |
3098 | // If this is k-register make sure we lookup via the largest legal type. |
3099 | MVT VT = MVT::Other; |
3100 | if (X86::VK16RegClass.contains(Reg)) |
3101 | VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; |
3102 | |
3103 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); |
3104 | TII.loadRegFromStackSlot(MBB, MI, DestReg: Reg, FrameIndex: I.getFrameIdx(), RC, TRI, |
3105 | VReg: Register()); |
3106 | } |
3107 | |
3108 | // Clear the stack slot for spill base pointer register. |
3109 | MachineFunction &MF = *MBB.getParent(); |
3110 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
3111 | if (X86FI->getRestoreBasePointer()) { |
3112 | unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; |
3113 | Register BaseReg = this->TRI->getBaseRegister(); |
3114 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: BaseReg) |
3115 | .setMIFlag(MachineInstr::FrameDestroy); |
3116 | } |
3117 | |
3118 | // POP GPRs. |
3119 | for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) { |
3120 | Register Reg = I->getReg(); |
3121 | if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) |
3122 | continue; |
3123 | |
3124 | if (X86FI->isCandidateForPush2Pop2(Reg)) |
3125 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: getPOP2Opcode(ST: STI)), DestReg: Reg) |
3126 | .addReg(RegNo: (++I)->getReg(), flags: RegState::Define) |
3127 | .setMIFlag(MachineInstr::FrameDestroy); |
3128 | else |
3129 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: getPOPOpcode(ST: STI)), DestReg: Reg) |
3130 | .setMIFlag(MachineInstr::FrameDestroy); |
3131 | } |
3132 | if (X86FI->padForPush2Pop2()) |
3133 | emitSPUpdate(MBB, MBBI&: MI, DL, NumBytes: SlotSize, /*InEpilogue=*/true); |
3134 | |
3135 | return true; |
3136 | } |
3137 | |
3138 | void X86FrameLowering::determineCalleeSaves(MachineFunction &MF, |
3139 | BitVector &SavedRegs, |
3140 | RegScavenger *RS) const { |
3141 | TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); |
3142 | |
3143 | // Spill the BasePtr if it's used. |
3144 | if (TRI->hasBasePointer(MF)) { |
3145 | Register BasePtr = TRI->getBaseRegister(); |
3146 | if (STI.isTarget64BitILP32()) |
3147 | BasePtr = getX86SubSuperRegister(Reg: BasePtr, Size: 64); |
3148 | SavedRegs.set(BasePtr); |
3149 | } |
3150 | } |
3151 | |
3152 | static bool HasNestArgument(const MachineFunction *MF) { |
3153 | const Function &F = MF->getFunction(); |
3154 | for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; |
3155 | I++) { |
3156 | if (I->hasNestAttr() && !I->use_empty()) |
3157 | return true; |
3158 | } |
3159 | return false; |
3160 | } |
3161 | |
3162 | /// GetScratchRegister - Get a temp register for performing work in the |
3163 | /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform |
3164 | /// and the properties of the function either one or two registers will be |
3165 | /// needed. Set primary to true for the first register, false for the second. |
3166 | static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, |
3167 | const MachineFunction &MF, bool Primary) { |
3168 | CallingConv::ID CallingConvention = MF.getFunction().getCallingConv(); |
3169 | |
3170 | // Erlang stuff. |
3171 | if (CallingConvention == CallingConv::HiPE) { |
3172 | if (Is64Bit) |
3173 | return Primary ? X86::R14 : X86::R13; |
3174 | else |
3175 | return Primary ? X86::EBX : X86::EDI; |
3176 | } |
3177 | |
3178 | if (Is64Bit) { |
3179 | if (IsLP64) |
3180 | return Primary ? X86::R11 : X86::R12; |
3181 | else |
3182 | return Primary ? X86::R11D : X86::R12D; |
3183 | } |
3184 | |
3185 | bool IsNested = HasNestArgument(MF: &MF); |
3186 | |
3187 | if (CallingConvention == CallingConv::X86_FastCall || |
3188 | CallingConvention == CallingConv::Fast || |
3189 | CallingConvention == CallingConv::Tail) { |
3190 | if (IsNested) |
3191 | report_fatal_error(reason: "Segmented stacks does not support fastcall with " |
3192 | "nested function." ); |
3193 | return Primary ? X86::EAX : X86::ECX; |
3194 | } |
3195 | if (IsNested) |
3196 | return Primary ? X86::EDX : X86::EAX; |
3197 | return Primary ? X86::ECX : X86::EAX; |
3198 | } |
3199 | |
3200 | // The stack limit in the TCB is set to this many bytes above the actual stack |
3201 | // limit. |
3202 | static const uint64_t kSplitStackAvailable = 256; |
3203 | |
3204 | void X86FrameLowering::adjustForSegmentedStacks( |
3205 | MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { |
3206 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
3207 | uint64_t StackSize; |
3208 | unsigned TlsReg, TlsOffset; |
3209 | DebugLoc DL; |
3210 | |
3211 | // To support shrink-wrapping we would need to insert the new blocks |
3212 | // at the right place and update the branches to PrologueMBB. |
3213 | assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet" ); |
3214 | |
3215 | unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, Primary: true); |
3216 | assert(!MF.getRegInfo().isLiveIn(ScratchReg) && |
3217 | "Scratch register is live-in" ); |
3218 | |
3219 | if (MF.getFunction().isVarArg()) |
3220 | report_fatal_error(reason: "Segmented stacks do not support vararg functions." ); |
3221 | if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() && |
3222 | !STI.isTargetWin64() && !STI.isTargetFreeBSD() && |
3223 | !STI.isTargetDragonFly()) |
3224 | report_fatal_error(reason: "Segmented stacks not supported on this platform." ); |
3225 | |
3226 | // Eventually StackSize will be calculated by a link-time pass; which will |
3227 | // also decide whether checking code needs to be injected into this particular |
3228 | // prologue. |
3229 | StackSize = MFI.getStackSize(); |
3230 | |
3231 | if (!MFI.needsSplitStackProlog()) |
3232 | return; |
3233 | |
3234 | MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); |
3235 | MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); |
3236 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
3237 | bool IsNested = false; |
3238 | |
3239 | // We need to know if the function has a nest argument only in 64 bit mode. |
3240 | if (Is64Bit) |
3241 | IsNested = HasNestArgument(MF: &MF); |
3242 | |
3243 | // The MOV R10, RAX needs to be in a different block, since the RET we emit in |
3244 | // allocMBB needs to be last (terminating) instruction. |
3245 | |
3246 | for (const auto &LI : PrologueMBB.liveins()) { |
3247 | allocMBB->addLiveIn(RegMaskPair: LI); |
3248 | checkMBB->addLiveIn(RegMaskPair: LI); |
3249 | } |
3250 | |
3251 | if (IsNested) |
3252 | allocMBB->addLiveIn(PhysReg: IsLP64 ? X86::R10 : X86::R10D); |
3253 | |
3254 | MF.push_front(MBB: allocMBB); |
3255 | MF.push_front(MBB: checkMBB); |
3256 | |
3257 | // When the frame size is less than 256 we just compare the stack |
3258 | // boundary directly to the value of the stack pointer, per gcc. |
3259 | bool CompareStackPointer = StackSize < kSplitStackAvailable; |
3260 | |
3261 | // Read the limit off the current stacklet off the stack_guard location. |
3262 | if (Is64Bit) { |
3263 | if (STI.isTargetLinux()) { |
3264 | TlsReg = X86::FS; |
3265 | TlsOffset = IsLP64 ? 0x70 : 0x40; |
3266 | } else if (STI.isTargetDarwin()) { |
3267 | TlsReg = X86::GS; |
3268 | TlsOffset = 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90. |
3269 | } else if (STI.isTargetWin64()) { |
3270 | TlsReg = X86::GS; |
3271 | TlsOffset = 0x28; // pvArbitrary, reserved for application use |
3272 | } else if (STI.isTargetFreeBSD()) { |
3273 | TlsReg = X86::FS; |
3274 | TlsOffset = 0x18; |
3275 | } else if (STI.isTargetDragonFly()) { |
3276 | TlsReg = X86::FS; |
3277 | TlsOffset = 0x20; // use tls_tcb.tcb_segstack |
3278 | } else { |
3279 | report_fatal_error(reason: "Segmented stacks not supported on this platform." ); |
3280 | } |
3281 | |
3282 | if (CompareStackPointer) |
3283 | ScratchReg = IsLP64 ? X86::RSP : X86::ESP; |
3284 | else |
3285 | BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: IsLP64 ? X86::LEA64r : X86::LEA64_32r), |
3286 | DestReg: ScratchReg) |
3287 | .addReg(RegNo: X86::RSP) |
3288 | .addImm(Val: 1) |
3289 | .addReg(RegNo: 0) |
3290 | .addImm(Val: -StackSize) |
3291 | .addReg(RegNo: 0); |
3292 | |
3293 | BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: IsLP64 ? X86::CMP64rm : X86::CMP32rm)) |
3294 | .addReg(RegNo: ScratchReg) |
3295 | .addReg(RegNo: 0) |
3296 | .addImm(Val: 1) |
3297 | .addReg(RegNo: 0) |
3298 | .addImm(Val: TlsOffset) |
3299 | .addReg(RegNo: TlsReg); |
3300 | } else { |
3301 | if (STI.isTargetLinux()) { |
3302 | TlsReg = X86::GS; |
3303 | TlsOffset = 0x30; |
3304 | } else if (STI.isTargetDarwin()) { |
3305 | TlsReg = X86::GS; |
3306 | TlsOffset = 0x48 + 90 * 4; |
3307 | } else if (STI.isTargetWin32()) { |
3308 | TlsReg = X86::FS; |
3309 | TlsOffset = 0x14; // pvArbitrary, reserved for application use |
3310 | } else if (STI.isTargetDragonFly()) { |
3311 | TlsReg = X86::FS; |
3312 | TlsOffset = 0x10; // use tls_tcb.tcb_segstack |
3313 | } else if (STI.isTargetFreeBSD()) { |
3314 | report_fatal_error(reason: "Segmented stacks not supported on FreeBSD i386." ); |
3315 | } else { |
3316 | report_fatal_error(reason: "Segmented stacks not supported on this platform." ); |
3317 | } |
3318 | |
3319 | if (CompareStackPointer) |
3320 | ScratchReg = X86::ESP; |
3321 | else |
3322 | BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: X86::LEA32r), DestReg: ScratchReg) |
3323 | .addReg(RegNo: X86::ESP) |
3324 | .addImm(Val: 1) |
3325 | .addReg(RegNo: 0) |
3326 | .addImm(Val: -StackSize) |
3327 | .addReg(RegNo: 0); |
3328 | |
3329 | if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() || |
3330 | STI.isTargetDragonFly()) { |
3331 | BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: X86::CMP32rm)) |
3332 | .addReg(RegNo: ScratchReg) |
3333 | .addReg(RegNo: 0) |
3334 | .addImm(Val: 0) |
3335 | .addReg(RegNo: 0) |
3336 | .addImm(Val: TlsOffset) |
3337 | .addReg(RegNo: TlsReg); |
3338 | } else if (STI.isTargetDarwin()) { |
3339 | |
3340 | // TlsOffset doesn't fit into a mod r/m byte so we need an extra register. |
3341 | unsigned ScratchReg2; |
3342 | bool SaveScratch2; |
3343 | if (CompareStackPointer) { |
3344 | // The primary scratch register is available for holding the TLS offset. |
3345 | ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, Primary: true); |
3346 | SaveScratch2 = false; |
3347 | } else { |
3348 | // Need to use a second register to hold the TLS offset |
3349 | ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, Primary: false); |
3350 | |
3351 | // Unfortunately, with fastcc the second scratch register may hold an |
3352 | // argument. |
3353 | SaveScratch2 = MF.getRegInfo().isLiveIn(Reg: ScratchReg2); |
3354 | } |
3355 | |
3356 | // If Scratch2 is live-in then it needs to be saved. |
3357 | assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) && |
3358 | "Scratch register is live-in and not saved" ); |
3359 | |
3360 | if (SaveScratch2) |
3361 | BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH32r)) |
3362 | .addReg(RegNo: ScratchReg2, flags: RegState::Kill); |
3363 | |
3364 | BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: X86::MOV32ri), DestReg: ScratchReg2) |
3365 | .addImm(Val: TlsOffset); |
3366 | BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: X86::CMP32rm)) |
3367 | .addReg(RegNo: ScratchReg) |
3368 | .addReg(RegNo: ScratchReg2) |
3369 | .addImm(Val: 1) |
3370 | .addReg(RegNo: 0) |
3371 | .addImm(Val: 0) |
3372 | .addReg(RegNo: TlsReg); |
3373 | |
3374 | if (SaveScratch2) |
3375 | BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: X86::POP32r), DestReg: ScratchReg2); |
3376 | } |
3377 | } |
3378 | |
3379 | // This jump is taken if SP >= (Stacklet Limit + Stack Space required). |
3380 | // It jumps to normal execution of the function body. |
3381 | BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1)) |
3382 | .addMBB(MBB: &PrologueMBB) |
3383 | .addImm(Val: X86::COND_A); |
3384 | |
3385 | // On 32 bit we first push the arguments size and then the frame size. On 64 |
3386 | // bit, we pass the stack frame size in r10 and the argument size in r11. |
3387 | if (Is64Bit) { |
3388 | // Functions with nested arguments use R10, so it needs to be saved across |
3389 | // the call to _morestack |
3390 | |
3391 | const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX; |
3392 | const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D; |
3393 | const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D; |
3394 | const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr; |
3395 | |
3396 | if (IsNested) |
3397 | BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: MOVrr), DestReg: RegAX).addReg(RegNo: Reg10); |
3398 | |
3399 | BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: getMOVriOpcode(Use64BitReg: IsLP64, Imm: StackSize)), DestReg: Reg10) |
3400 | .addImm(Val: StackSize); |
3401 | BuildMI(BB: allocMBB, MIMD: DL, |
3402 | MCID: TII.get(Opcode: getMOVriOpcode(Use64BitReg: IsLP64, Imm: X86FI->getArgumentStackSize())), |
3403 | DestReg: Reg11) |
3404 | .addImm(Val: X86FI->getArgumentStackSize()); |
3405 | } else { |
3406 | BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH32i)) |
3407 | .addImm(Val: X86FI->getArgumentStackSize()); |
3408 | BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH32i)).addImm(Val: StackSize); |
3409 | } |
3410 | |
3411 | // __morestack is in libgcc |
3412 | if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { |
3413 | // Under the large code model, we cannot assume that __morestack lives |
3414 | // within 2^31 bytes of the call site, so we cannot use pc-relative |
3415 | // addressing. We cannot perform the call via a temporary register, |
3416 | // as the rax register may be used to store the static chain, and all |
3417 | // other suitable registers may be either callee-save or used for |
3418 | // parameter passing. We cannot use the stack at this point either |
3419 | // because __morestack manipulates the stack directly. |
3420 | // |
3421 | // To avoid these issues, perform an indirect call via a read-only memory |
3422 | // location containing the address. |
3423 | // |
3424 | // This solution is not perfect, as it assumes that the .rodata section |
3425 | // is laid out within 2^31 bytes of each function body, but this seems |
3426 | // to be sufficient for JIT. |
3427 | // FIXME: Add retpoline support and remove the error here.. |
3428 | if (STI.useIndirectThunkCalls()) |
3429 | report_fatal_error(reason: "Emitting morestack calls on 64-bit with the large " |
3430 | "code model and thunks not yet implemented." ); |
3431 | BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::CALL64m)) |
3432 | .addReg(RegNo: X86::RIP) |
3433 | .addImm(Val: 0) |
3434 | .addReg(RegNo: 0) |
3435 | .addExternalSymbol(FnName: "__morestack_addr" ) |
3436 | .addReg(RegNo: 0); |
3437 | } else { |
3438 | if (Is64Bit) |
3439 | BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::CALL64pcrel32)) |
3440 | .addExternalSymbol(FnName: "__morestack" ); |
3441 | else |
3442 | BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::CALLpcrel32)) |
3443 | .addExternalSymbol(FnName: "__morestack" ); |
3444 | } |
3445 | |
3446 | if (IsNested) |
3447 | BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::MORESTACK_RET_RESTORE_R10)); |
3448 | else |
3449 | BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::MORESTACK_RET)); |
3450 | |
3451 | allocMBB->addSuccessor(Succ: &PrologueMBB); |
3452 | |
3453 | checkMBB->addSuccessor(Succ: allocMBB, Prob: BranchProbability::getZero()); |
3454 | checkMBB->addSuccessor(Succ: &PrologueMBB, Prob: BranchProbability::getOne()); |
3455 | |
3456 | #ifdef EXPENSIVE_CHECKS |
3457 | MF.verify(); |
3458 | #endif |
3459 | } |
3460 | |
3461 | /// Lookup an ERTS parameter in the !hipe.literals named metadata node. |
3462 | /// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets |
3463 | /// to fields it needs, through a named metadata node "hipe.literals" containing |
3464 | /// name-value pairs. |
3465 | static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, |
3466 | const StringRef LiteralName) { |
3467 | for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) { |
3468 | MDNode *Node = HiPELiteralsMD->getOperand(i); |
3469 | if (Node->getNumOperands() != 2) |
3470 | continue; |
3471 | MDString *NodeName = dyn_cast<MDString>(Val: Node->getOperand(I: 0)); |
3472 | ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Val: Node->getOperand(I: 1)); |
3473 | if (!NodeName || !NodeVal) |
3474 | continue; |
3475 | ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(Val: NodeVal->getValue()); |
3476 | if (ValConst && NodeName->getString() == LiteralName) { |
3477 | return ValConst->getZExtValue(); |
3478 | } |
3479 | } |
3480 | |
3481 | report_fatal_error(reason: "HiPE literal " + LiteralName + |
3482 | " required but not provided" ); |
3483 | } |
3484 | |
3485 | // Return true if there are no non-ehpad successors to MBB and there are no |
3486 | // non-meta instructions between MBBI and MBB.end(). |
3487 | static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, |
3488 | MachineBasicBlock::const_iterator MBBI) { |
3489 | return llvm::all_of( |
3490 | Range: MBB.successors(), |
3491 | P: [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) && |
3492 | std::all_of(first: MBBI, last: MBB.end(), pred: [](const MachineInstr &MI) { |
3493 | return MI.isMetaInstruction(); |
3494 | }); |
3495 | } |
3496 | |
3497 | /// Erlang programs may need a special prologue to handle the stack size they |
3498 | /// might need at runtime. That is because Erlang/OTP does not implement a C |
3499 | /// stack but uses a custom implementation of hybrid stack/heap architecture. |
3500 | /// (for more information see Eric Stenman's Ph.D. thesis: |
3501 | /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) |
3502 | /// |
3503 | /// CheckStack: |
3504 | /// temp0 = sp - MaxStack |
3505 | /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart |
3506 | /// OldStart: |
3507 | /// ... |
3508 | /// IncStack: |
3509 | /// call inc_stack # doubles the stack space |
3510 | /// temp0 = sp - MaxStack |
3511 | /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart |
3512 | void X86FrameLowering::adjustForHiPEPrologue( |
3513 | MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { |
3514 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
3515 | DebugLoc DL; |
3516 | |
3517 | // To support shrink-wrapping we would need to insert the new blocks |
3518 | // at the right place and update the branches to PrologueMBB. |
3519 | assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet" ); |
3520 | |
3521 | // HiPE-specific values |
3522 | NamedMDNode *HiPELiteralsMD = |
3523 | MF.getFunction().getParent()->getNamedMetadata(Name: "hipe.literals" ); |
3524 | if (!HiPELiteralsMD) |
3525 | report_fatal_error( |
3526 | reason: "Can't generate HiPE prologue without runtime parameters" ); |
3527 | const unsigned HipeLeafWords = getHiPELiteral( |
3528 | HiPELiteralsMD, LiteralName: Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS" ); |
3529 | const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; |
3530 | const unsigned Guaranteed = HipeLeafWords * SlotSize; |
3531 | unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs |
3532 | ? MF.getFunction().arg_size() - CCRegisteredArgs |
3533 | : 0; |
3534 | unsigned MaxStack = MFI.getStackSize() + CallerStkArity * SlotSize + SlotSize; |
3535 | |
3536 | assert(STI.isTargetLinux() && |
3537 | "HiPE prologue is only supported on Linux operating systems." ); |
3538 | |
3539 | // Compute the largest caller's frame that is needed to fit the callees' |
3540 | // frames. This 'MaxStack' is computed from: |
3541 | // |
3542 | // a) the fixed frame size, which is the space needed for all spilled temps, |
3543 | // b) outgoing on-stack parameter areas, and |
3544 | // c) the minimum stack space this function needs to make available for the |
3545 | // functions it calls (a tunable ABI property). |
3546 | if (MFI.hasCalls()) { |
3547 | unsigned MoreStackForCalls = 0; |
3548 | |
3549 | for (auto &MBB : MF) { |
3550 | for (auto &MI : MBB) { |
3551 | if (!MI.isCall()) |
3552 | continue; |
3553 | |
3554 | // Get callee operand. |
3555 | const MachineOperand &MO = MI.getOperand(i: 0); |
3556 | |
3557 | // Only take account of global function calls (no closures etc.). |
3558 | if (!MO.isGlobal()) |
3559 | continue; |
3560 | |
3561 | const Function *F = dyn_cast<Function>(Val: MO.getGlobal()); |
3562 | if (!F) |
3563 | continue; |
3564 | |
3565 | // Do not update 'MaxStack' for primitive and built-in functions |
3566 | // (encoded with names either starting with "erlang."/"bif_" or not |
3567 | // having a ".", such as a simple <Module>.<Function>.<Arity>, or an |
3568 | // "_", such as the BIF "suspend_0") as they are executed on another |
3569 | // stack. |
3570 | if (F->getName().contains(Other: "erlang." ) || F->getName().contains(Other: "bif_" ) || |
3571 | F->getName().find_first_of(Chars: "._" ) == StringRef::npos) |
3572 | continue; |
3573 | |
3574 | unsigned CalleeStkArity = F->arg_size() > CCRegisteredArgs |
3575 | ? F->arg_size() - CCRegisteredArgs |
3576 | : 0; |
3577 | if (HipeLeafWords - 1 > CalleeStkArity) |
3578 | MoreStackForCalls = |
3579 | std::max(a: MoreStackForCalls, |
3580 | b: (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); |
3581 | } |
3582 | } |
3583 | MaxStack += MoreStackForCalls; |
3584 | } |
3585 | |
3586 | // If the stack frame needed is larger than the guaranteed then runtime checks |
3587 | // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue. |
3588 | if (MaxStack > Guaranteed) { |
3589 | MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock(); |
3590 | MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock(); |
3591 | |
3592 | for (const auto &LI : PrologueMBB.liveins()) { |
3593 | stackCheckMBB->addLiveIn(RegMaskPair: LI); |
3594 | incStackMBB->addLiveIn(RegMaskPair: LI); |
3595 | } |
3596 | |
3597 | MF.push_front(MBB: incStackMBB); |
3598 | MF.push_front(MBB: stackCheckMBB); |
3599 | |
3600 | unsigned ScratchReg, SPReg, PReg, SPLimitOffset; |
3601 | unsigned LEAop, CMPop, CALLop; |
3602 | SPLimitOffset = getHiPELiteral(HiPELiteralsMD, LiteralName: "P_NSP_LIMIT" ); |
3603 | if (Is64Bit) { |
3604 | SPReg = X86::RSP; |
3605 | PReg = X86::RBP; |
3606 | LEAop = X86::LEA64r; |
3607 | CMPop = X86::CMP64rm; |
3608 | CALLop = X86::CALL64pcrel32; |
3609 | } else { |
3610 | SPReg = X86::ESP; |
3611 | PReg = X86::EBP; |
3612 | LEAop = X86::LEA32r; |
3613 | CMPop = X86::CMP32rm; |
3614 | CALLop = X86::CALLpcrel32; |
3615 | } |
3616 | |
3617 | ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, Primary: true); |
3618 | assert(!MF.getRegInfo().isLiveIn(ScratchReg) && |
3619 | "HiPE prologue scratch register is live-in" ); |
3620 | |
3621 | // Create new MBB for StackCheck: |
3622 | addRegOffset(MIB: BuildMI(BB: stackCheckMBB, MIMD: DL, MCID: TII.get(Opcode: LEAop), DestReg: ScratchReg), Reg: SPReg, |
3623 | isKill: false, Offset: -MaxStack); |
3624 | // SPLimitOffset is in a fixed heap location (pointed by BP). |
3625 | addRegOffset(MIB: BuildMI(BB: stackCheckMBB, MIMD: DL, MCID: TII.get(Opcode: CMPop)).addReg(RegNo: ScratchReg), |
3626 | Reg: PReg, isKill: false, Offset: SPLimitOffset); |
3627 | BuildMI(BB: stackCheckMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1)) |
3628 | .addMBB(MBB: &PrologueMBB) |
3629 | .addImm(Val: X86::COND_AE); |
3630 | |
3631 | // Create new MBB for IncStack: |
3632 | BuildMI(BB: incStackMBB, MIMD: DL, MCID: TII.get(Opcode: CALLop)).addExternalSymbol(FnName: "inc_stack_0" ); |
3633 | addRegOffset(MIB: BuildMI(BB: incStackMBB, MIMD: DL, MCID: TII.get(Opcode: LEAop), DestReg: ScratchReg), Reg: SPReg, |
3634 | isKill: false, Offset: -MaxStack); |
3635 | addRegOffset(MIB: BuildMI(BB: incStackMBB, MIMD: DL, MCID: TII.get(Opcode: CMPop)).addReg(RegNo: ScratchReg), |
3636 | Reg: PReg, isKill: false, Offset: SPLimitOffset); |
3637 | BuildMI(BB: incStackMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1)) |
3638 | .addMBB(MBB: incStackMBB) |
3639 | .addImm(Val: X86::COND_LE); |
3640 | |
3641 | stackCheckMBB->addSuccessor(Succ: &PrologueMBB, Prob: {99, 100}); |
3642 | stackCheckMBB->addSuccessor(Succ: incStackMBB, Prob: {1, 100}); |
3643 | incStackMBB->addSuccessor(Succ: &PrologueMBB, Prob: {99, 100}); |
3644 | incStackMBB->addSuccessor(Succ: incStackMBB, Prob: {1, 100}); |
3645 | } |
3646 | #ifdef EXPENSIVE_CHECKS |
3647 | MF.verify(); |
3648 | #endif |
3649 | } |
3650 | |
3651 | bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB, |
3652 | MachineBasicBlock::iterator MBBI, |
3653 | const DebugLoc &DL, |
3654 | int Offset) const { |
3655 | if (Offset <= 0) |
3656 | return false; |
3657 | |
3658 | if (Offset % SlotSize) |
3659 | return false; |
3660 | |
3661 | int NumPops = Offset / SlotSize; |
3662 | // This is only worth it if we have at most 2 pops. |
3663 | if (NumPops != 1 && NumPops != 2) |
3664 | return false; |
3665 | |
3666 | // Handle only the trivial case where the adjustment directly follows |
3667 | // a call. This is the most common one, anyway. |
3668 | if (MBBI == MBB.begin()) |
3669 | return false; |
3670 | MachineBasicBlock::iterator Prev = std::prev(x: MBBI); |
3671 | if (!Prev->isCall() || !Prev->getOperand(i: 1).isRegMask()) |
3672 | return false; |
3673 | |
3674 | unsigned Regs[2]; |
3675 | unsigned FoundRegs = 0; |
3676 | |
3677 | const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
3678 | const MachineOperand &RegMask = Prev->getOperand(i: 1); |
3679 | |
3680 | auto &RegClass = |
3681 | Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass; |
3682 | // Try to find up to NumPops free registers. |
3683 | for (auto Candidate : RegClass) { |
3684 | // Poor man's liveness: |
3685 | // Since we're immediately after a call, any register that is clobbered |
3686 | // by the call and not defined by it can be considered dead. |
3687 | if (!RegMask.clobbersPhysReg(PhysReg: Candidate)) |
3688 | continue; |
3689 | |
3690 | // Don't clobber reserved registers |
3691 | if (MRI.isReserved(PhysReg: Candidate)) |
3692 | continue; |
3693 | |
3694 | bool IsDef = false; |
3695 | for (const MachineOperand &MO : Prev->implicit_operands()) { |
3696 | if (MO.isReg() && MO.isDef() && |
3697 | TRI->isSuperOrSubRegisterEq(RegA: MO.getReg(), RegB: Candidate)) { |
3698 | IsDef = true; |
3699 | break; |
3700 | } |
3701 | } |
3702 | |
3703 | if (IsDef) |
3704 | continue; |
3705 | |
3706 | Regs[FoundRegs++] = Candidate; |
3707 | if (FoundRegs == (unsigned)NumPops) |
3708 | break; |
3709 | } |
3710 | |
3711 | if (FoundRegs == 0) |
3712 | return false; |
3713 | |
3714 | // If we found only one free register, but need two, reuse the same one twice. |
3715 | while (FoundRegs < (unsigned)NumPops) |
3716 | Regs[FoundRegs++] = Regs[0]; |
3717 | |
3718 | for (int i = 0; i < NumPops; ++i) |
3719 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: STI.is64Bit() ? X86::POP64r : X86::POP32r), |
3720 | DestReg: Regs[i]); |
3721 | |
3722 | return true; |
3723 | } |
3724 | |
3725 | MachineBasicBlock::iterator X86FrameLowering::eliminateCallFramePseudoInstr( |
3726 | MachineFunction &MF, MachineBasicBlock &MBB, |
3727 | MachineBasicBlock::iterator I) const { |
3728 | bool reserveCallFrame = hasReservedCallFrame(MF); |
3729 | unsigned Opcode = I->getOpcode(); |
3730 | bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); |
3731 | DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased. |
3732 | uint64_t Amount = TII.getFrameSize(I: *I); |
3733 | uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(I: *I) : 0; |
3734 | I = MBB.erase(I); |
3735 | auto InsertPos = skipDebugInstructionsForward(It: I, End: MBB.end()); |
3736 | |
3737 | // Try to avoid emitting dead SP adjustments if the block end is unreachable, |
3738 | // typically because the function is marked noreturn (abort, throw, |
3739 | // assert_fail, etc). |
3740 | if (isDestroy && blockEndIsUnreachable(MBB, MBBI: I)) |
3741 | return I; |
3742 | |
3743 | if (!reserveCallFrame) { |
3744 | // If the stack pointer can be changed after prologue, turn the |
3745 | // adjcallstackup instruction into a 'sub ESP, <amt>' and the |
3746 | // adjcallstackdown instruction into 'add ESP, <amt>' |
3747 | |
3748 | // We need to keep the stack aligned properly. To do this, we round the |
3749 | // amount of space needed for the outgoing arguments up to the next |
3750 | // alignment boundary. |
3751 | Amount = alignTo(Size: Amount, A: getStackAlign()); |
3752 | |
3753 | const Function &F = MF.getFunction(); |
3754 | bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); |
3755 | bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves(); |
3756 | |
3757 | // If we have any exception handlers in this function, and we adjust |
3758 | // the SP before calls, we may need to indicate this to the unwinder |
3759 | // using GNU_ARGS_SIZE. Note that this may be necessary even when |
3760 | // Amount == 0, because the preceding function may have set a non-0 |
3761 | // GNU_ARGS_SIZE. |
3762 | // TODO: We don't need to reset this between subsequent functions, |
3763 | // if it didn't change. |
3764 | bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty(); |
3765 | |
3766 | if (HasDwarfEHHandlers && !isDestroy && |
3767 | MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences()) |
3768 | BuildCFI(MBB, MBBI: InsertPos, DL, |
3769 | CFIInst: MCCFIInstruction::createGnuArgsSize(L: nullptr, Size: Amount)); |
3770 | |
3771 | if (Amount == 0) |
3772 | return I; |
3773 | |
3774 | // Factor out the amount that gets handled inside the sequence |
3775 | // (Pushes of argument for frame setup, callee pops for frame destroy) |
3776 | Amount -= InternalAmt; |
3777 | |
3778 | // TODO: This is needed only if we require precise CFA. |
3779 | // If this is a callee-pop calling convention, emit a CFA adjust for |
3780 | // the amount the callee popped. |
3781 | if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF)) |
3782 | BuildCFI(MBB, MBBI: InsertPos, DL, |
3783 | CFIInst: MCCFIInstruction::createAdjustCfaOffset(L: nullptr, Adjustment: -InternalAmt)); |
3784 | |
3785 | // Add Amount to SP to destroy a frame, or subtract to setup. |
3786 | int64_t StackAdjustment = isDestroy ? Amount : -Amount; |
3787 | |
3788 | if (StackAdjustment) { |
3789 | // Merge with any previous or following adjustment instruction. Note: the |
3790 | // instructions merged with here do not have CFI, so their stack |
3791 | // adjustments do not feed into CfaAdjustment. |
3792 | StackAdjustment += mergeSPUpdates(MBB, MBBI&: InsertPos, doMergeWithPrevious: true); |
3793 | StackAdjustment += mergeSPUpdates(MBB, MBBI&: InsertPos, doMergeWithPrevious: false); |
3794 | |
3795 | if (StackAdjustment) { |
3796 | if (!(F.hasMinSize() && |
3797 | adjustStackWithPops(MBB, MBBI: InsertPos, DL, Offset: StackAdjustment))) |
3798 | BuildStackAdjustment(MBB, MBBI: InsertPos, DL, Offset: StackAdjustment, |
3799 | /*InEpilogue=*/false); |
3800 | } |
3801 | } |
3802 | |
3803 | if (DwarfCFI && !hasFP(MF)) { |
3804 | // If we don't have FP, but need to generate unwind information, |
3805 | // we need to set the correct CFA offset after the stack adjustment. |
3806 | // How much we adjust the CFA offset depends on whether we're emitting |
3807 | // CFI only for EH purposes or for debugging. EH only requires the CFA |
3808 | // offset to be correct at each call site, while for debugging we want |
3809 | // it to be more precise. |
3810 | |
3811 | int64_t CfaAdjustment = -StackAdjustment; |
3812 | // TODO: When not using precise CFA, we also need to adjust for the |
3813 | // InternalAmt here. |
3814 | if (CfaAdjustment) { |
3815 | BuildCFI( |
3816 | MBB, MBBI: InsertPos, DL, |
3817 | CFIInst: MCCFIInstruction::createAdjustCfaOffset(L: nullptr, Adjustment: CfaAdjustment)); |
3818 | } |
3819 | } |
3820 | |
3821 | return I; |
3822 | } |
3823 | |
3824 | if (InternalAmt) { |
3825 | MachineBasicBlock::iterator CI = I; |
3826 | MachineBasicBlock::iterator B = MBB.begin(); |
3827 | while (CI != B && !std::prev(x: CI)->isCall()) |
3828 | --CI; |
3829 | BuildStackAdjustment(MBB, MBBI: CI, DL, Offset: -InternalAmt, /*InEpilogue=*/false); |
3830 | } |
3831 | |
3832 | return I; |
3833 | } |
3834 | |
3835 | bool X86FrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { |
3836 | assert(MBB.getParent() && "Block is not attached to a function!" ); |
3837 | const MachineFunction &MF = *MBB.getParent(); |
3838 | if (!MBB.isLiveIn(Reg: X86::EFLAGS)) |
3839 | return true; |
3840 | |
3841 | // If stack probes have to loop inline or call, that will clobber EFLAGS. |
3842 | // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock. |
3843 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
3844 | const X86TargetLowering &TLI = *STI.getTargetLowering(); |
3845 | if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF)) |
3846 | return false; |
3847 | |
3848 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
3849 | return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext(); |
3850 | } |
3851 | |
3852 | bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { |
3853 | assert(MBB.getParent() && "Block is not attached to a function!" ); |
3854 | |
3855 | // Win64 has strict requirements in terms of epilogue and we are |
3856 | // not taking a chance at messing with them. |
3857 | // I.e., unless this block is already an exit block, we can't use |
3858 | // it as an epilogue. |
3859 | if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock()) |
3860 | return false; |
3861 | |
3862 | // Swift async context epilogue has a BTR instruction that clobbers parts of |
3863 | // EFLAGS. |
3864 | const MachineFunction &MF = *MBB.getParent(); |
3865 | if (MF.getInfo<X86MachineFunctionInfo>()->hasSwiftAsyncContext()) |
3866 | return !flagsNeedToBePreservedBeforeTheTerminators(MBB); |
3867 | |
3868 | if (canUseLEAForSPInEpilogue(MF: *MBB.getParent())) |
3869 | return true; |
3870 | |
3871 | // If we cannot use LEA to adjust SP, we may need to use ADD, which |
3872 | // clobbers the EFLAGS. Check that we do not need to preserve it, |
3873 | // otherwise, conservatively assume this is not |
3874 | // safe to insert the epilogue here. |
3875 | return !flagsNeedToBePreservedBeforeTheTerminators(MBB); |
3876 | } |
3877 | |
3878 | bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { |
3879 | // If we may need to emit frameless compact unwind information, give |
3880 | // up as this is currently broken: PR25614. |
3881 | bool CompactUnwind = |
3882 | MF.getContext().getObjectFileInfo()->getCompactUnwindSection() != nullptr; |
3883 | return (MF.getFunction().hasFnAttribute(Kind: Attribute::NoUnwind) || hasFP(MF) || |
3884 | !CompactUnwind) && |
3885 | // The lowering of segmented stack and HiPE only support entry |
3886 | // blocks as prologue blocks: PR26107. This limitation may be |
3887 | // lifted if we fix: |
3888 | // - adjustForSegmentedStacks |
3889 | // - adjustForHiPEPrologue |
3890 | MF.getFunction().getCallingConv() != CallingConv::HiPE && |
3891 | !MF.shouldSplitStack(); |
3892 | } |
3893 | |
3894 | MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers( |
3895 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
3896 | const DebugLoc &DL, bool RestoreSP) const { |
3897 | assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env" ); |
3898 | assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32" ); |
3899 | assert(STI.is32Bit() && !Uses64BitFramePtr && |
3900 | "restoring EBP/ESI on non-32-bit target" ); |
3901 | |
3902 | MachineFunction &MF = *MBB.getParent(); |
3903 | Register FramePtr = TRI->getFrameRegister(MF); |
3904 | Register BasePtr = TRI->getBaseRegister(); |
3905 | WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo(); |
3906 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
3907 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
3908 | |
3909 | // FIXME: Don't set FrameSetup flag in catchret case. |
3910 | |
3911 | int FI = FuncInfo.EHRegNodeFrameIndex; |
3912 | int EHRegSize = MFI.getObjectSize(ObjectIdx: FI); |
3913 | |
3914 | if (RestoreSP) { |
3915 | // MOV32rm -EHRegSize(%ebp), %esp |
3916 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV32rm), DestReg: X86::ESP), |
3917 | Reg: X86::EBP, isKill: true, Offset: -EHRegSize) |
3918 | .setMIFlag(MachineInstr::FrameSetup); |
3919 | } |
3920 | |
3921 | Register UsedReg; |
3922 | int EHRegOffset = getFrameIndexReference(MF, FI, FrameReg&: UsedReg).getFixed(); |
3923 | int EndOffset = -EHRegOffset - EHRegSize; |
3924 | FuncInfo.EHRegNodeEndOffset = EndOffset; |
3925 | |
3926 | if (UsedReg == FramePtr) { |
3927 | // ADD $offset, %ebp |
3928 | unsigned ADDri = getADDriOpcode(IsLP64: false); |
3929 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ADDri), DestReg: FramePtr) |
3930 | .addReg(RegNo: FramePtr) |
3931 | .addImm(Val: EndOffset) |
3932 | .setMIFlag(MachineInstr::FrameSetup) |
3933 | ->getOperand(i: 3) |
3934 | .setIsDead(); |
3935 | assert(EndOffset >= 0 && |
3936 | "end of registration object above normal EBP position!" ); |
3937 | } else if (UsedReg == BasePtr) { |
3938 | // LEA offset(%ebp), %esi |
3939 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::LEA32r), DestReg: BasePtr), |
3940 | Reg: FramePtr, isKill: false, Offset: EndOffset) |
3941 | .setMIFlag(MachineInstr::FrameSetup); |
3942 | // MOV32rm SavedEBPOffset(%esi), %ebp |
3943 | assert(X86FI->getHasSEHFramePtrSave()); |
3944 | int Offset = |
3945 | getFrameIndexReference(MF, FI: X86FI->getSEHFramePtrSaveIndex(), FrameReg&: UsedReg) |
3946 | .getFixed(); |
3947 | assert(UsedReg == BasePtr); |
3948 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV32rm), DestReg: FramePtr), |
3949 | Reg: UsedReg, isKill: true, Offset) |
3950 | .setMIFlag(MachineInstr::FrameSetup); |
3951 | } else { |
3952 | llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr" ); |
3953 | } |
3954 | return MBBI; |
3955 | } |
3956 | |
3957 | int X86FrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { |
3958 | return TRI->getSlotSize(); |
3959 | } |
3960 | |
3961 | Register |
3962 | X86FrameLowering::getInitialCFARegister(const MachineFunction &MF) const { |
3963 | return StackPtr; |
3964 | } |
3965 | |
3966 | TargetFrameLowering::DwarfFrameBase |
3967 | X86FrameLowering::getDwarfFrameBase(const MachineFunction &MF) const { |
3968 | const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); |
3969 | Register FrameRegister = RI->getFrameRegister(MF); |
3970 | if (getInitialCFARegister(MF) == FrameRegister && |
3971 | MF.getInfo<X86MachineFunctionInfo>()->hasCFIAdjustCfa()) { |
3972 | DwarfFrameBase FrameBase; |
3973 | FrameBase.Kind = DwarfFrameBase::CFA; |
3974 | FrameBase.Location.Offset = |
3975 | -MF.getFrameInfo().getStackSize() - getInitialCFAOffset(MF); |
3976 | return FrameBase; |
3977 | } |
3978 | |
3979 | return DwarfFrameBase{.Kind: DwarfFrameBase::Register, .Location: {.Reg: FrameRegister}}; |
3980 | } |
3981 | |
3982 | namespace { |
3983 | // Struct used by orderFrameObjects to help sort the stack objects. |
3984 | struct X86FrameSortingObject { |
3985 | bool IsValid = false; // true if we care about this Object. |
3986 | unsigned ObjectIndex = 0; // Index of Object into MFI list. |
3987 | unsigned ObjectSize = 0; // Size of Object in bytes. |
3988 | Align ObjectAlignment = Align(1); // Alignment of Object in bytes. |
3989 | unsigned ObjectNumUses = 0; // Object static number of uses. |
3990 | }; |
3991 | |
3992 | // The comparison function we use for std::sort to order our local |
3993 | // stack symbols. The current algorithm is to use an estimated |
3994 | // "density". This takes into consideration the size and number of |
3995 | // uses each object has in order to roughly minimize code size. |
3996 | // So, for example, an object of size 16B that is referenced 5 times |
3997 | // will get higher priority than 4 4B objects referenced 1 time each. |
3998 | // It's not perfect and we may be able to squeeze a few more bytes out of |
3999 | // it (for example : 0(esp) requires fewer bytes, symbols allocated at the |
4000 | // fringe end can have special consideration, given their size is less |
4001 | // important, etc.), but the algorithmic complexity grows too much to be |
4002 | // worth the extra gains we get. This gets us pretty close. |
4003 | // The final order leaves us with objects with highest priority going |
4004 | // at the end of our list. |
4005 | struct X86FrameSortingComparator { |
4006 | inline bool operator()(const X86FrameSortingObject &A, |
4007 | const X86FrameSortingObject &B) const { |
4008 | uint64_t DensityAScaled, DensityBScaled; |
4009 | |
4010 | // For consistency in our comparison, all invalid objects are placed |
4011 | // at the end. This also allows us to stop walking when we hit the |
4012 | // first invalid item after it's all sorted. |
4013 | if (!A.IsValid) |
4014 | return false; |
4015 | if (!B.IsValid) |
4016 | return true; |
4017 | |
4018 | // The density is calculated by doing : |
4019 | // (double)DensityA = A.ObjectNumUses / A.ObjectSize |
4020 | // (double)DensityB = B.ObjectNumUses / B.ObjectSize |
4021 | // Since this approach may cause inconsistencies in |
4022 | // the floating point <, >, == comparisons, depending on the floating |
4023 | // point model with which the compiler was built, we're going |
4024 | // to scale both sides by multiplying with |
4025 | // A.ObjectSize * B.ObjectSize. This ends up factoring away |
4026 | // the division and, with it, the need for any floating point |
4027 | // arithmetic. |
4028 | DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) * |
4029 | static_cast<uint64_t>(B.ObjectSize); |
4030 | DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) * |
4031 | static_cast<uint64_t>(A.ObjectSize); |
4032 | |
4033 | // If the two densities are equal, prioritize highest alignment |
4034 | // objects. This allows for similar alignment objects |
4035 | // to be packed together (given the same density). |
4036 | // There's room for improvement here, also, since we can pack |
4037 | // similar alignment (different density) objects next to each |
4038 | // other to save padding. This will also require further |
4039 | // complexity/iterations, and the overall gain isn't worth it, |
4040 | // in general. Something to keep in mind, though. |
4041 | if (DensityAScaled == DensityBScaled) |
4042 | return A.ObjectAlignment < B.ObjectAlignment; |
4043 | |
4044 | return DensityAScaled < DensityBScaled; |
4045 | } |
4046 | }; |
4047 | } // namespace |
4048 | |
4049 | // Order the symbols in the local stack. |
4050 | // We want to place the local stack objects in some sort of sensible order. |
4051 | // The heuristic we use is to try and pack them according to static number |
4052 | // of uses and size of object in order to minimize code size. |
4053 | void X86FrameLowering::orderFrameObjects( |
4054 | const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const { |
4055 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
4056 | |
4057 | // Don't waste time if there's nothing to do. |
4058 | if (ObjectsToAllocate.empty()) |
4059 | return; |
4060 | |
4061 | // Create an array of all MFI objects. We won't need all of these |
4062 | // objects, but we're going to create a full array of them to make |
4063 | // it easier to index into when we're counting "uses" down below. |
4064 | // We want to be able to easily/cheaply access an object by simply |
4065 | // indexing into it, instead of having to search for it every time. |
4066 | std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd()); |
4067 | |
4068 | // Walk the objects we care about and mark them as such in our working |
4069 | // struct. |
4070 | for (auto &Obj : ObjectsToAllocate) { |
4071 | SortingObjects[Obj].IsValid = true; |
4072 | SortingObjects[Obj].ObjectIndex = Obj; |
4073 | SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(ObjectIdx: Obj); |
4074 | // Set the size. |
4075 | int ObjectSize = MFI.getObjectSize(ObjectIdx: Obj); |
4076 | if (ObjectSize == 0) |
4077 | // Variable size. Just use 4. |
4078 | SortingObjects[Obj].ObjectSize = 4; |
4079 | else |
4080 | SortingObjects[Obj].ObjectSize = ObjectSize; |
4081 | } |
4082 | |
4083 | // Count the number of uses for each object. |
4084 | for (auto &MBB : MF) { |
4085 | for (auto &MI : MBB) { |
4086 | if (MI.isDebugInstr()) |
4087 | continue; |
4088 | for (const MachineOperand &MO : MI.operands()) { |
4089 | // Check to see if it's a local stack symbol. |
4090 | if (!MO.isFI()) |
4091 | continue; |
4092 | int Index = MO.getIndex(); |
4093 | // Check to see if it falls within our range, and is tagged |
4094 | // to require ordering. |
4095 | if (Index >= 0 && Index < MFI.getObjectIndexEnd() && |
4096 | SortingObjects[Index].IsValid) |
4097 | SortingObjects[Index].ObjectNumUses++; |
4098 | } |
4099 | } |
4100 | } |
4101 | |
4102 | // Sort the objects using X86FrameSortingAlgorithm (see its comment for |
4103 | // info). |
4104 | llvm::stable_sort(Range&: SortingObjects, C: X86FrameSortingComparator()); |
4105 | |
4106 | // Now modify the original list to represent the final order that |
4107 | // we want. The order will depend on whether we're going to access them |
4108 | // from the stack pointer or the frame pointer. For SP, the list should |
4109 | // end up with the END containing objects that we want with smaller offsets. |
4110 | // For FP, it should be flipped. |
4111 | int i = 0; |
4112 | for (auto &Obj : SortingObjects) { |
4113 | // All invalid items are sorted at the end, so it's safe to stop. |
4114 | if (!Obj.IsValid) |
4115 | break; |
4116 | ObjectsToAllocate[i++] = Obj.ObjectIndex; |
4117 | } |
4118 | |
4119 | // Flip it if we're accessing off of the FP. |
4120 | if (!TRI->hasStackRealignment(MF) && hasFP(MF)) |
4121 | std::reverse(first: ObjectsToAllocate.begin(), last: ObjectsToAllocate.end()); |
4122 | } |
4123 | |
4124 | unsigned |
4125 | X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const { |
4126 | // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue. |
4127 | unsigned Offset = 16; |
4128 | // RBP is immediately pushed. |
4129 | Offset += SlotSize; |
4130 | // All callee-saved registers are then pushed. |
4131 | Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize(); |
4132 | // Every funclet allocates enough stack space for the largest outgoing call. |
4133 | Offset += getWinEHFuncletFrameSize(MF); |
4134 | return Offset; |
4135 | } |
4136 | |
4137 | void X86FrameLowering::processFunctionBeforeFrameFinalized( |
4138 | MachineFunction &MF, RegScavenger *RS) const { |
4139 | // Mark the function as not having WinCFI. We will set it back to true in |
4140 | // emitPrologue if it gets called and emits CFI. |
4141 | MF.setHasWinCFI(false); |
4142 | |
4143 | // If we are using Windows x64 CFI, ensure that the stack is always 8 byte |
4144 | // aligned. The format doesn't support misaligned stack adjustments. |
4145 | if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) |
4146 | MF.getFrameInfo().ensureMaxAlignment(Alignment: Align(SlotSize)); |
4147 | |
4148 | // If this function isn't doing Win64-style C++ EH, we don't need to do |
4149 | // anything. |
4150 | if (STI.is64Bit() && MF.hasEHFunclets() && |
4151 | classifyEHPersonality(Pers: MF.getFunction().getPersonalityFn()) == |
4152 | EHPersonality::MSVC_CXX) { |
4153 | adjustFrameForMsvcCxxEh(MF); |
4154 | } |
4155 | } |
4156 | |
4157 | void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const { |
4158 | // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset |
4159 | // relative to RSP after the prologue. Find the offset of the last fixed |
4160 | // object, so that we can allocate a slot immediately following it. If there |
4161 | // were no fixed objects, use offset -SlotSize, which is immediately after the |
4162 | // return address. Fixed objects have negative frame indices. |
4163 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
4164 | WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo(); |
4165 | int64_t MinFixedObjOffset = -SlotSize; |
4166 | for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) |
4167 | MinFixedObjOffset = std::min(a: MinFixedObjOffset, b: MFI.getObjectOffset(ObjectIdx: I)); |
4168 | |
4169 | for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { |
4170 | for (WinEHHandlerType &H : TBME.HandlerArray) { |
4171 | int FrameIndex = H.CatchObj.FrameIndex; |
4172 | if (FrameIndex != INT_MAX) { |
4173 | // Ensure alignment. |
4174 | unsigned Align = MFI.getObjectAlign(ObjectIdx: FrameIndex).value(); |
4175 | MinFixedObjOffset -= std::abs(i: MinFixedObjOffset) % Align; |
4176 | MinFixedObjOffset -= MFI.getObjectSize(ObjectIdx: FrameIndex); |
4177 | MFI.setObjectOffset(ObjectIdx: FrameIndex, SPOffset: MinFixedObjOffset); |
4178 | } |
4179 | } |
4180 | } |
4181 | |
4182 | // Ensure alignment. |
4183 | MinFixedObjOffset -= std::abs(i: MinFixedObjOffset) % 8; |
4184 | int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize; |
4185 | int UnwindHelpFI = |
4186 | MFI.CreateFixedObject(Size: SlotSize, SPOffset: UnwindHelpOffset, /*IsImmutable=*/false); |
4187 | EHInfo.UnwindHelpFrameIdx = UnwindHelpFI; |
4188 | |
4189 | // Store -2 into UnwindHelp on function entry. We have to scan forwards past |
4190 | // other frame setup instructions. |
4191 | MachineBasicBlock &MBB = MF.front(); |
4192 | auto MBBI = MBB.begin(); |
4193 | while (MBBI != MBB.end() && MBBI->getFlag(Flag: MachineInstr::FrameSetup)) |
4194 | ++MBBI; |
4195 | |
4196 | DebugLoc DL = MBB.findDebugLoc(MBBI); |
4197 | addFrameReference(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64mi32)), |
4198 | FI: UnwindHelpFI) |
4199 | .addImm(Val: -2); |
4200 | } |
4201 | |
4202 | void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced( |
4203 | MachineFunction &MF, RegScavenger *RS) const { |
4204 | auto *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
4205 | |
4206 | if (STI.is32Bit() && MF.hasEHFunclets()) |
4207 | restoreWinEHStackPointersInParent(MF); |
4208 | // We have emitted prolog and epilog. Don't need stack pointer saving |
4209 | // instruction any more. |
4210 | if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) { |
4211 | MI->eraseFromParent(); |
4212 | X86FI->setStackPtrSaveMI(nullptr); |
4213 | } |
4214 | } |
4215 | |
4216 | void X86FrameLowering::restoreWinEHStackPointersInParent( |
4217 | MachineFunction &MF) const { |
4218 | // 32-bit functions have to restore stack pointers when control is transferred |
4219 | // back to the parent function. These blocks are identified as eh pads that |
4220 | // are not funclet entries. |
4221 | bool IsSEH = isAsynchronousEHPersonality( |
4222 | Pers: classifyEHPersonality(Pers: MF.getFunction().getPersonalityFn())); |
4223 | for (MachineBasicBlock &MBB : MF) { |
4224 | bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry(); |
4225 | if (NeedsRestore) |
4226 | restoreWin32EHStackPointers(MBB, MBBI: MBB.begin(), DL: DebugLoc(), |
4227 | /*RestoreSP=*/IsSEH); |
4228 | } |
4229 | } |
4230 | |