1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86FrameLowering.h"
14#include "MCTargetDesc/X86MCTargetDesc.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrInfo.h"
17#include "X86MachineFunctionInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/Statistic.h"
21#include "llvm/CodeGen/LivePhysRegs.h"
22#include "llvm/CodeGen/MachineFrameInfo.h"
23#include "llvm/CodeGen/MachineFunction.h"
24#include "llvm/CodeGen/MachineInstrBuilder.h"
25#include "llvm/CodeGen/MachineModuleInfo.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/RegisterScavenging.h"
28#include "llvm/CodeGen/WinEHFuncInfo.h"
29#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/EHPersonalities.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/Module.h"
33#include "llvm/MC/MCAsmInfo.h"
34#include "llvm/MC/MCObjectFileInfo.h"
35#include "llvm/MC/MCSymbol.h"
36#include "llvm/Support/LEB128.h"
37#include "llvm/Target/TargetOptions.h"
38#include <cstdlib>
39
40#define DEBUG_TYPE "x86-fl"
41
42STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
43STATISTIC(NumFrameExtraProbe,
44 "Number of extra stack probes generated in prologue");
45STATISTIC(NumFunctionUsingPush2Pop2, "Number of functions using push2/pop2");
46
47using namespace llvm;
48
49static const TargetRegisterClass *
50getCalleeSavedSpillRC(MCRegister Reg, const X86Subtarget &STI,
51 const TargetRegisterInfo &TRI) {
52 if (X86::VK16RegClass.contains(Reg))
53 return STI.hasBWI() ? &X86::VK64RegClass : &X86::VK16RegClass;
54 return TRI.getMinimalPhysRegClass(Reg);
55}
56
57X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
58 MaybeAlign StackAlignOverride)
59 : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
60 STI.is64Bit() ? -8 : -4),
61 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
62 // Cache a bunch of frame-related predicates for this subtarget.
63 SlotSize = TRI->getSlotSize();
64 assert(SlotSize == 4 || SlotSize == 8);
65 Is64Bit = STI.is64Bit();
66 IsLP64 = STI.isTarget64BitLP64();
67 // standard x86_64 uses 64-bit frame/stack pointers, x32 - 32-bit.
68 Uses64BitFramePtr = STI.isTarget64BitLP64();
69 StackPtr = TRI->getStackRegister();
70}
71
72bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
73 return !MF.getFrameInfo().hasVarSizedObjects() &&
74 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
75 !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
76}
77
78/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
79/// call frame pseudos can be simplified. Having a FP, as in the default
80/// implementation, is not sufficient here since we can't always use it.
81/// Use a more nuanced condition.
82bool X86FrameLowering::canSimplifyCallFramePseudos(
83 const MachineFunction &MF) const {
84 return hasReservedCallFrame(MF) ||
85 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
86 (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
87 TRI->hasBasePointer(MF);
88}
89
90// needsFrameIndexResolution - Do we need to perform FI resolution for
91// this function. Normally, this is required only when the function
92// has any stack objects. However, FI resolution actually has another job,
93// not apparent from the title - it resolves callframesetup/destroy
94// that were not simplified earlier.
95// So, this is required for x86 functions that have push sequences even
96// when there are no stack objects.
97bool X86FrameLowering::needsFrameIndexResolution(
98 const MachineFunction &MF) const {
99 return MF.getFrameInfo().hasStackObjects() ||
100 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
101}
102
103/// hasFPImpl - Return true if the specified function should have a dedicated
104/// frame pointer register. This is true if the function has variable sized
105/// allocas or if frame pointer elimination is disabled.
106bool X86FrameLowering::hasFPImpl(const MachineFunction &MF) const {
107 const MachineFrameInfo &MFI = MF.getFrameInfo();
108 return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
109 TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
110 MFI.isFrameAddressTaken() || MFI.hasOpaqueSPAdjustment() ||
111 MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
112 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
113 MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
114 MFI.hasStackMap() || MFI.hasPatchPoint() ||
115 (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
116}
117
118static unsigned getSUBriOpcode(bool IsLP64) {
119 return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri;
120}
121
122static unsigned getADDriOpcode(bool IsLP64) {
123 return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri;
124}
125
126static unsigned getSUBrrOpcode(bool IsLP64) {
127 return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
128}
129
130static unsigned getADDrrOpcode(bool IsLP64) {
131 return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
132}
133
134static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
135 return IsLP64 ? X86::AND64ri32 : X86::AND32ri;
136}
137
138static unsigned getLEArOpcode(bool IsLP64) {
139 return IsLP64 ? X86::LEA64r : X86::LEA32r;
140}
141
142// Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the
143// value written by the PUSH from the stack. The processor tracks these marked
144// instructions internally and fast-forwards register data between matching PUSH
145// and POP instructions, without going through memory or through the training
146// loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient
147// memory-renaming optimization can be used.
148//
149// The PPX hint is purely a performance hint. Instructions with this hint have
150// the same functional semantics as those without. PPX hints set by the
151// compiler that violate the balancing rule may turn off the PPX optimization,
152// but they will not affect program semantics.
153//
154// Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp
155// are not considered).
156//
157// PUSH2 and POP2 are instructions for (respectively) pushing/popping 2
158// GPRs at a time to/from the stack.
159static unsigned getPUSHOpcode(const X86Subtarget &ST) {
160 return ST.is64Bit() ? (ST.hasPPX() ? X86::PUSHP64r : X86::PUSH64r)
161 : X86::PUSH32r;
162}
163static unsigned getPOPOpcode(const X86Subtarget &ST) {
164 return ST.is64Bit() ? (ST.hasPPX() ? X86::POPP64r : X86::POP64r)
165 : X86::POP32r;
166}
167static unsigned getPUSH2Opcode(const X86Subtarget &ST) {
168 return ST.hasPPX() ? X86::PUSH2P : X86::PUSH2;
169}
170static unsigned getPOP2Opcode(const X86Subtarget &ST) {
171 return ST.hasPPX() ? X86::POP2P : X86::POP2;
172}
173
174static bool isEAXLiveIn(MachineBasicBlock &MBB) {
175 for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) {
176 MCRegister Reg = RegMask.PhysReg;
177
178 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
179 Reg == X86::AH || Reg == X86::AL)
180 return true;
181 }
182
183 return false;
184}
185
186/// Check if the flags need to be preserved before the terminators.
187/// This would be the case, if the eflags is live-in of the region
188/// composed by the terminators or live-out of that region, without
189/// being defined by a terminator.
190static bool
191flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) {
192 for (const MachineInstr &MI : MBB.terminators()) {
193 bool BreakNext = false;
194 for (const MachineOperand &MO : MI.operands()) {
195 if (!MO.isReg())
196 continue;
197 Register Reg = MO.getReg();
198 if (Reg != X86::EFLAGS)
199 continue;
200
201 // This terminator needs an eflags that is not defined
202 // by a previous another terminator:
203 // EFLAGS is live-in of the region composed by the terminators.
204 if (!MO.isDef())
205 return true;
206 // This terminator defines the eflags, i.e., we don't need to preserve it.
207 // However, we still need to check this specific terminator does not
208 // read a live-in value.
209 BreakNext = true;
210 }
211 // We found a definition of the eflags, no need to preserve them.
212 if (BreakNext)
213 return false;
214 }
215
216 // None of the terminators use or define the eflags.
217 // Check if they are live-out, that would imply we need to preserve them.
218 for (const MachineBasicBlock *Succ : MBB.successors())
219 if (Succ->isLiveIn(Reg: X86::EFLAGS))
220 return true;
221
222 return false;
223}
224
225constexpr uint64_t MaxSPChunk = (1ULL << 31) - 1;
226
227/// emitSPUpdate - Emit a series of instructions to increment / decrement the
228/// stack pointer by a constant value.
229void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
230 MachineBasicBlock::iterator &MBBI,
231 const DebugLoc &DL, int64_t NumBytes,
232 bool InEpilogue) const {
233 bool isSub = NumBytes < 0;
234 uint64_t Offset = isSub ? -NumBytes : NumBytes;
235 MachineInstr::MIFlag Flag =
236 isSub ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy;
237
238 if (!Uses64BitFramePtr && !isUInt<32>(x: Offset)) {
239 // We're being asked to adjust a 32-bit stack pointer by 4 GiB or more.
240 // This might be unreachable code, so don't complain now; just trap if
241 // it's reached at runtime.
242 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::TRAP));
243 return;
244 }
245
246 MachineFunction &MF = *MBB.getParent();
247 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
248 const X86TargetLowering &TLI = *STI.getTargetLowering();
249 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
250
251 // It's ok to not take into account large chunks when probing, as the
252 // allocation is split in smaller chunks anyway.
253 if (EmitInlineStackProbe && !InEpilogue) {
254
255 // This pseudo-instruction is going to be expanded, potentially using a
256 // loop, by inlineStackProbe().
257 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::STACKALLOC_W_PROBING)).addImm(Val: Offset);
258 return;
259 } else if (Offset > MaxSPChunk) {
260 // Rather than emit a long series of instructions for large offsets,
261 // load the offset into a register and do one sub/add
262 unsigned Reg = 0;
263 unsigned Rax = (unsigned)(Uses64BitFramePtr ? X86::RAX : X86::EAX);
264
265 if (isSub && !isEAXLiveIn(MBB))
266 Reg = Rax;
267 else
268 Reg = getX86SubSuperRegister(Reg: TRI->findDeadCallerSavedReg(MBB, MBBI),
269 Size: Uses64BitFramePtr ? 64 : 32);
270
271 unsigned AddSubRROpc = isSub ? getSUBrrOpcode(IsLP64: Uses64BitFramePtr)
272 : getADDrrOpcode(IsLP64: Uses64BitFramePtr);
273 if (Reg) {
274 BuildMI(BB&: MBB, I: MBBI, MIMD: DL,
275 MCID: TII.get(Opcode: X86::getMOVriOpcode(Use64BitReg: Uses64BitFramePtr, Imm: Offset)), DestReg: Reg)
276 .addImm(Val: Offset)
277 .setMIFlag(Flag);
278 MachineInstr *MI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: AddSubRROpc), DestReg: StackPtr)
279 .addReg(RegNo: StackPtr)
280 .addReg(RegNo: Reg);
281 MI->getOperand(i: 3).setIsDead(); // The EFLAGS implicit def is dead.
282 return;
283 } else if (Offset > 8 * MaxSPChunk) {
284 // If we would need more than 8 add or sub instructions (a >16GB stack
285 // frame), it's worth spilling RAX to materialize this immediate.
286 // pushq %rax
287 // movabsq +-$Offset+-SlotSize, %rax
288 // addq %rsp, %rax
289 // xchg %rax, (%rsp)
290 // movq (%rsp), %rsp
291 assert(Uses64BitFramePtr && "can't have 32-bit 16GB stack frame");
292 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH64r))
293 .addReg(RegNo: Rax, Flags: RegState::Kill)
294 .setMIFlag(Flag);
295 // Subtract is not commutative, so negate the offset and always use add.
296 // Subtract 8 less and add 8 more to account for the PUSH we just did.
297 if (isSub)
298 Offset = -(Offset - SlotSize);
299 else
300 Offset = Offset + SlotSize;
301 BuildMI(BB&: MBB, I: MBBI, MIMD: DL,
302 MCID: TII.get(Opcode: X86::getMOVriOpcode(Use64BitReg: Uses64BitFramePtr, Imm: Offset)), DestReg: Rax)
303 .addImm(Val: Offset)
304 .setMIFlag(Flag);
305 MachineInstr *MI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::ADD64rr), DestReg: Rax)
306 .addReg(RegNo: Rax)
307 .addReg(RegNo: StackPtr);
308 MI->getOperand(i: 3).setIsDead(); // The EFLAGS implicit def is dead.
309 // Exchange the new SP in RAX with the top of the stack.
310 addRegOffset(
311 MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::XCHG64rm), DestReg: Rax).addReg(RegNo: Rax),
312 Reg: StackPtr, isKill: false, Offset: 0);
313 // Load new SP from the top of the stack into RSP.
314 addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64rm), DestReg: StackPtr),
315 Reg: StackPtr, isKill: false, Offset: 0);
316 return;
317 }
318 }
319
320 while (Offset) {
321 if (Offset == SlotSize) {
322 // Use push / pop for slot sized adjustments as a size optimization. We
323 // need to find a dead register when using pop.
324 unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
325 : TRI->findDeadCallerSavedReg(MBB, MBBI);
326 if (Reg) {
327 unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
328 : (Is64Bit ? X86::POP64r : X86::POP32r);
329 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc))
330 .addReg(RegNo: Reg, Flags: getDefRegState(B: !isSub) | getUndefRegState(B: isSub))
331 .setMIFlag(Flag);
332 return;
333 }
334 }
335
336 uint64_t ThisVal = std::min(a: Offset, b: MaxSPChunk);
337
338 BuildStackAdjustment(MBB, MBBI, DL, Offset: isSub ? -ThisVal : ThisVal, InEpilogue)
339 .setMIFlag(Flag);
340
341 Offset -= ThisVal;
342 }
343}
344
345MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
346 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
347 const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
348 assert(Offset != 0 && "zero offset stack adjustment requested");
349
350 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
351 // is tricky.
352 bool UseLEA;
353 if (!InEpilogue) {
354 // Check if inserting the prologue at the beginning
355 // of MBB would require to use LEA operations.
356 // We need to use LEA operations if EFLAGS is live in, because
357 // it means an instruction will read it before it gets defined.
358 UseLEA = STI.useLeaForSP() || MBB.isLiveIn(Reg: X86::EFLAGS);
359 } else {
360 // If we can use LEA for SP but we shouldn't, check that none
361 // of the terminators uses the eflags. Otherwise we will insert
362 // a ADD that will redefine the eflags and break the condition.
363 // Alternatively, we could move the ADD, but this may not be possible
364 // and is an optimization anyway.
365 UseLEA = canUseLEAForSPInEpilogue(MF: *MBB.getParent());
366 if (UseLEA && !STI.useLeaForSP())
367 UseLEA = flagsNeedToBePreservedBeforeTheTerminators(MBB);
368 // If that assert breaks, that means we do not do the right thing
369 // in canUseAsEpilogue.
370 assert((UseLEA || !flagsNeedToBePreservedBeforeTheTerminators(MBB)) &&
371 "We shouldn't have allowed this insertion point");
372 }
373
374 MachineInstrBuilder MI;
375 // Use an NF (no-flags) variant as a smaller replacement for LEA when EFLAGS
376 // must be preserved (i.e. only when we would otherwise emit LEA). If EFLAGS
377 // is dead we prefer the plain SUB/ADD, which is shorter than the EVEX-encoded
378 // NF form. The NF stack-adjust opcodes below are 64-bit (SUB64ri32_NF/
379 // ADD64ri32_NF), so don't use them for the x32 ABI where the stack pointer is
380 // 32-bit. NF cannot reach a Win64 epilogue (which never uses LEA for the SP
381 // adjustment unless it has a frame pointer, and that path doesn't go through
382 // here), so the Windows epilogue unwinder never sees an undisassemblable NF
383 // add/sub.
384 bool UseNF = UseLEA && STI.hasNF() && Uses64BitFramePtr;
385 bool IsSub = Offset < 0;
386 uint64_t AbsOffset = IsSub ? -Offset : Offset;
387 if (UseNF) {
388 const unsigned Opc = IsSub ? X86::SUB64ri32_NF : X86::ADD64ri32_NF;
389 MI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: StackPtr)
390 .addReg(RegNo: StackPtr)
391 .addImm(Val: AbsOffset);
392 // NF instructions define no EFLAGS, so there is nothing to mark dead.
393 } else if (UseLEA) {
394 MI = addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL,
395 MCID: TII.get(Opcode: getLEArOpcode(IsLP64: Uses64BitFramePtr)),
396 DestReg: StackPtr),
397 Reg: StackPtr, isKill: false, Offset);
398 } else {
399 const unsigned Opc = IsSub ? getSUBriOpcode(IsLP64: Uses64BitFramePtr)
400 : getADDriOpcode(IsLP64: Uses64BitFramePtr);
401 MI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: StackPtr)
402 .addReg(RegNo: StackPtr)
403 .addImm(Val: AbsOffset);
404 MI->getOperand(i: 3).setIsDead(); // The EFLAGS implicit def is dead.
405 }
406 return MI;
407}
408
409template <typename FoundT, typename CalcT>
410int64_t X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
411 MachineBasicBlock::iterator &MBBI,
412 FoundT FoundStackAdjust,
413 CalcT CalcNewOffset,
414 bool doMergeWithPrevious) const {
415 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
416 (!doMergeWithPrevious && MBBI == MBB.end()))
417 return CalcNewOffset(0);
418
419 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(x: MBBI) : MBBI;
420
421 PI = skipDebugInstructionsBackward(It: PI, Begin: MBB.begin());
422 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
423 // instruction, and that there are no DBG_VALUE or other instructions between
424 // ADD/SUB/LEA and its corresponding CFI instruction.
425 /* TODO: Add support for the case where there are multiple CFI instructions
426 below the ADD/SUB/LEA, e.g.:
427 ...
428 add
429 cfi_def_cfa_offset
430 cfi_offset
431 ...
432 */
433 if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
434 PI = std::prev(x: PI);
435
436 int64_t Offset = 0;
437 for (;;) {
438 unsigned Opc = PI->getOpcode();
439
440 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri ||
441 Opc == X86::ADD64ri32_NF) &&
442 PI->getOperand(i: 0).getReg() == StackPtr) {
443 assert(PI->getOperand(1).getReg() == StackPtr);
444 Offset = PI->getOperand(i: 2).getImm();
445 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
446 PI->getOperand(i: 0).getReg() == StackPtr &&
447 PI->getOperand(i: 1).getReg() == StackPtr &&
448 PI->getOperand(i: 2).getImm() == 1 &&
449 PI->getOperand(i: 3).getReg() == X86::NoRegister &&
450 PI->getOperand(i: 5).getReg() == X86::NoRegister) {
451 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
452 Offset = PI->getOperand(i: 4).getImm();
453 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri ||
454 Opc == X86::SUB64ri32_NF) &&
455 PI->getOperand(i: 0).getReg() == StackPtr) {
456 assert(PI->getOperand(1).getReg() == StackPtr);
457 Offset = -PI->getOperand(i: 2).getImm();
458 } else
459 return CalcNewOffset(0);
460
461 FoundStackAdjust(PI, Offset);
462 if ((uint64_t)std::abs(i: (int64_t)CalcNewOffset(Offset)) < MaxSPChunk)
463 break;
464
465 if (doMergeWithPrevious ? (PI == MBB.begin()) : (PI == MBB.end()))
466 return CalcNewOffset(0);
467
468 PI = doMergeWithPrevious ? std::prev(x: PI) : std::next(x: PI);
469 }
470
471 PI = MBB.erase(I: PI);
472 if (PI != MBB.end() && PI->isCFIInstruction()) {
473 auto CIs = MBB.getParent()->getFrameInstructions();
474 MCCFIInstruction CI = CIs[PI->getOperand(i: 0).getCFIIndex()];
475 if (CI.getOperation() == MCCFIInstruction::OpDefCfaOffset ||
476 CI.getOperation() == MCCFIInstruction::OpAdjustCfaOffset)
477 PI = MBB.erase(I: PI);
478 }
479 if (!doMergeWithPrevious)
480 MBBI = skipDebugInstructionsForward(It: PI, End: MBB.end());
481
482 return CalcNewOffset(Offset);
483}
484
485int64_t X86FrameLowering::mergeSPAdd(MachineBasicBlock &MBB,
486 MachineBasicBlock::iterator &MBBI,
487 int64_t AddOffset,
488 bool doMergeWithPrevious) const {
489 return mergeSPUpdates(
490 MBB, MBBI, CalcNewOffset: [AddOffset](int64_t Offset) { return AddOffset + Offset; },
491 doMergeWithPrevious);
492}
493
494void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB,
495 MachineBasicBlock::iterator MBBI,
496 const DebugLoc &DL,
497 const MCCFIInstruction &CFIInst,
498 MachineInstr::MIFlag Flag) const {
499 MachineFunction &MF = *MBB.getParent();
500 unsigned CFIIndex = MF.addFrameInst(Inst: CFIInst);
501
502 if (CFIInst.getOperation() == MCCFIInstruction::OpAdjustCfaOffset)
503 MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
504
505 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
506 .addCFIIndex(CFIIndex)
507 .setMIFlag(Flag);
508}
509
510/// Emits Dwarf Info specifying offsets of callee saved registers and
511/// frame pointer. This is called only when basic block sections are enabled.
512void X86FrameLowering::emitCalleeSavedFrameMovesFullCFA(
513 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
514 MachineFunction &MF = *MBB.getParent();
515 if (!hasFP(MF)) {
516 emitCalleeSavedFrameMoves(MBB, MBBI, DL: DebugLoc{}, IsPrologue: true);
517 return;
518 }
519 const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo();
520 const Register FramePtr = TRI->getFrameRegister(MF);
521 const Register MachineFramePtr =
522 STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(Reg: FramePtr, Size: 64))
523 : FramePtr;
524 unsigned DwarfReg = MRI->getDwarfRegNum(Reg: MachineFramePtr, isEH: true);
525 // Offset = space for return address + size of the frame pointer itself.
526 int64_t Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
527 BuildCFI(MBB, MBBI, DL: DebugLoc{},
528 CFIInst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfReg, Offset: -Offset));
529 emitCalleeSavedFrameMoves(MBB, MBBI, DL: DebugLoc{}, IsPrologue: true);
530}
531
532void X86FrameLowering::emitCalleeSavedFrameMoves(
533 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
534 const DebugLoc &DL, bool IsPrologue) const {
535 MachineFunction &MF = *MBB.getParent();
536 MachineFrameInfo &MFI = MF.getFrameInfo();
537 const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo();
538 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
539
540 // Add callee saved registers to move list.
541 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
542
543 // Calculate offsets.
544 for (const CalleeSavedInfo &I : CSI) {
545 int64_t Offset = MFI.getObjectOffset(ObjectIdx: I.getFrameIdx());
546 MCRegister Reg = I.getReg();
547 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, isEH: true);
548
549 if (IsPrologue) {
550 if (X86FI->getStackPtrSaveMI()) {
551 // +2*SlotSize because there is return address and ebp at the bottom
552 // of the stack.
553 // | retaddr |
554 // | ebp |
555 // | |<--ebp
556 Offset += 2 * SlotSize;
557 SmallString<64> CfaExpr;
558 CfaExpr.push_back(Elt: dwarf::DW_CFA_expression);
559 uint8_t buffer[16];
560 CfaExpr.append(in_start: buffer, in_end: buffer + encodeULEB128(Value: DwarfReg, p: buffer));
561 CfaExpr.push_back(Elt: 2);
562 Register FramePtr = TRI->getFrameRegister(MF);
563 const Register MachineFramePtr =
564 STI.isTarget64BitILP32()
565 ? Register(getX86SubSuperRegister(Reg: FramePtr, Size: 64))
566 : FramePtr;
567 unsigned DwarfFramePtr = MRI->getDwarfRegNum(Reg: MachineFramePtr, isEH: true);
568 CfaExpr.push_back(Elt: (uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
569 CfaExpr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: Offset, p: buffer));
570 BuildCFI(MBB, MBBI, DL,
571 CFIInst: MCCFIInstruction::createEscape(L: nullptr, Vals: CfaExpr.str()),
572 Flag: MachineInstr::FrameSetup);
573 } else {
574 BuildCFI(MBB, MBBI, DL,
575 CFIInst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfReg, Offset));
576 }
577 } else {
578 BuildCFI(MBB, MBBI, DL,
579 CFIInst: MCCFIInstruction::createRestore(L: nullptr, Register: DwarfReg));
580 }
581 }
582 if (auto *MI = X86FI->getStackPtrSaveMI()) {
583 int FI = MI->getOperand(i: 1).getIndex();
584 int64_t Offset = MFI.getObjectOffset(ObjectIdx: FI) + 2 * SlotSize;
585 SmallString<64> CfaExpr;
586 Register FramePtr = TRI->getFrameRegister(MF);
587 const Register MachineFramePtr =
588 STI.isTarget64BitILP32()
589 ? Register(getX86SubSuperRegister(Reg: FramePtr, Size: 64))
590 : FramePtr;
591 unsigned DwarfFramePtr = MRI->getDwarfRegNum(Reg: MachineFramePtr, isEH: true);
592 CfaExpr.push_back(Elt: (uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
593 uint8_t buffer[16];
594 CfaExpr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: Offset, p: buffer));
595 CfaExpr.push_back(Elt: dwarf::DW_OP_deref);
596
597 SmallString<64> DefCfaExpr;
598 DefCfaExpr.push_back(Elt: dwarf::DW_CFA_def_cfa_expression);
599 DefCfaExpr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: CfaExpr.size(), p: buffer));
600 DefCfaExpr.append(RHS: CfaExpr.str());
601 // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref
602 BuildCFI(MBB, MBBI, DL,
603 CFIInst: MCCFIInstruction::createEscape(L: nullptr, Vals: DefCfaExpr.str()),
604 Flag: MachineInstr::FrameSetup);
605 }
606}
607
608void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
609 MachineBasicBlock &MBB) const {
610 const MachineFunction &MF = *MBB.getParent();
611
612 // Insertion point.
613 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
614
615 // Fake a debug loc.
616 DebugLoc DL;
617 if (MBBI != MBB.end())
618 DL = MBBI->getDebugLoc();
619
620 // Zero out FP stack if referenced. Do this outside of the loop below so that
621 // it's done only once.
622 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
623 for (MCRegister Reg : RegsToZero.set_bits()) {
624 if (!X86::RFP80RegClass.contains(Reg))
625 continue;
626
627 unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
628 for (unsigned i = 0; i != NumFPRegs; ++i)
629 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::LD_F0));
630
631 for (unsigned i = 0; i != NumFPRegs; ++i)
632 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::ST_FPrr)).addReg(RegNo: X86::ST0);
633 break;
634 }
635
636 // For GPRs, we only care to clear out the 32-bit register.
637 BitVector GPRsToZero(TRI->getNumRegs());
638 for (MCRegister Reg : RegsToZero.set_bits())
639 if (TRI->isGeneralPurposeRegister(MF, Reg)) {
640 GPRsToZero.set(getX86SubSuperRegister(Reg, Size: 32));
641 RegsToZero.reset(Idx: Reg);
642 }
643
644 // Zero out the GPRs first.
645 for (MCRegister Reg : GPRsToZero.set_bits())
646 TII.buildClearRegister(Reg, MBB, Iter: MBBI, DL);
647
648 // Zero out the remaining registers.
649 for (MCRegister Reg : RegsToZero.set_bits())
650 TII.buildClearRegister(Reg, MBB, Iter: MBBI, DL);
651}
652
653void X86FrameLowering::emitStackProbe(
654 MachineFunction &MF, MachineBasicBlock &MBB,
655 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
656 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
657 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
658 if (STI.isTargetWindowsCoreCLR()) {
659 if (InProlog) {
660 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::STACKALLOC_W_PROBING))
661 .addImm(Val: 0 /* no explicit stack size */);
662 } else {
663 emitStackProbeInline(MF, MBB, MBBI, DL, InProlog: false);
664 }
665 } else {
666 emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
667 }
668}
669
670bool X86FrameLowering::stackProbeFunctionModifiesSP() const {
671 return STI.isOSWindows() && !STI.isTargetWin64();
672}
673
674void X86FrameLowering::inlineStackProbe(MachineFunction &MF,
675 MachineBasicBlock &PrologMBB) const {
676 auto Where = llvm::find_if(Range&: PrologMBB, P: [](MachineInstr &MI) {
677 return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
678 });
679 if (Where != PrologMBB.end()) {
680 DebugLoc DL = PrologMBB.findDebugLoc(MBBI: Where);
681 emitStackProbeInline(MF, MBB&: PrologMBB, MBBI: Where, DL, InProlog: true);
682 Where->eraseFromParent();
683 }
684}
685
686void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
687 MachineBasicBlock &MBB,
688 MachineBasicBlock::iterator MBBI,
689 const DebugLoc &DL,
690 bool InProlog) const {
691 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
692 if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
693 emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
694 else
695 emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
696}
697
698void X86FrameLowering::emitStackProbeInlineGeneric(
699 MachineFunction &MF, MachineBasicBlock &MBB,
700 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
701 MachineInstr &AllocWithProbe = *MBBI;
702 uint64_t Offset = AllocWithProbe.getOperand(i: 0).getImm();
703
704 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
705 const X86TargetLowering &TLI = *STI.getTargetLowering();
706 assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
707 "different expansion expected for CoreCLR 64 bit");
708
709 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
710 uint64_t ProbeChunk = StackProbeSize * 8;
711
712 uint64_t MaxAlign =
713 TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
714
715 // Synthesize a loop or unroll it, depending on the number of iterations.
716 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
717 // between the unaligned rsp and current rsp.
718 if (Offset > ProbeChunk) {
719 emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
720 Align: MaxAlign % StackProbeSize);
721 } else {
722 emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
723 Align: MaxAlign % StackProbeSize);
724 }
725}
726
727void X86FrameLowering::emitStackProbeInlineGenericBlock(
728 MachineFunction &MF, MachineBasicBlock &MBB,
729 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
730 uint64_t AlignOffset) const {
731
732 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
733 const bool HasFP = hasFP(MF);
734 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
735 const X86TargetLowering &TLI = *STI.getTargetLowering();
736 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
737 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
738
739 uint64_t CurrentOffset = 0;
740
741 assert(AlignOffset < StackProbeSize);
742
743 // If the offset is so small it fits within a page, there's nothing to do.
744 if (StackProbeSize < Offset + AlignOffset) {
745
746 uint64_t StackAdjustment = StackProbeSize - AlignOffset;
747 BuildStackAdjustment(MBB, MBBI, DL, Offset: -StackAdjustment, /*InEpilogue=*/false)
748 .setMIFlag(MachineInstr::FrameSetup);
749 if (!HasFP && NeedsDwarfCFI) {
750 BuildCFI(
751 MBB, MBBI, DL,
752 CFIInst: MCCFIInstruction::createAdjustCfaOffset(L: nullptr, Adjustment: StackAdjustment));
753 }
754
755 addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: MovMIOpc))
756 .setMIFlag(MachineInstr::FrameSetup),
757 Reg: StackPtr, isKill: false, Offset: 0)
758 .addImm(Val: 0)
759 .setMIFlag(MachineInstr::FrameSetup);
760 NumFrameExtraProbe++;
761 CurrentOffset = StackProbeSize - AlignOffset;
762 }
763
764 // For the next N - 1 pages, just probe. I tried to take advantage of
765 // natural probes but it implies much more logic and there was very few
766 // interesting natural probes to interleave.
767 while (CurrentOffset + StackProbeSize < Offset) {
768 BuildStackAdjustment(MBB, MBBI, DL, Offset: -StackProbeSize, /*InEpilogue=*/false)
769 .setMIFlag(MachineInstr::FrameSetup);
770
771 if (!HasFP && NeedsDwarfCFI) {
772 BuildCFI(
773 MBB, MBBI, DL,
774 CFIInst: MCCFIInstruction::createAdjustCfaOffset(L: nullptr, Adjustment: StackProbeSize));
775 }
776 addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: MovMIOpc))
777 .setMIFlag(MachineInstr::FrameSetup),
778 Reg: StackPtr, isKill: false, Offset: 0)
779 .addImm(Val: 0)
780 .setMIFlag(MachineInstr::FrameSetup);
781 NumFrameExtraProbe++;
782 CurrentOffset += StackProbeSize;
783 }
784
785 // No need to probe the tail, it is smaller than a Page.
786 uint64_t ChunkSize = Offset - CurrentOffset;
787 if (ChunkSize == SlotSize) {
788 // Use push for slot sized adjustments as a size optimization,
789 // like emitSPUpdate does when not probing.
790 unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
791 unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
792 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc))
793 .addReg(RegNo: Reg, Flags: RegState::Undef)
794 .setMIFlag(MachineInstr::FrameSetup);
795 } else {
796 BuildStackAdjustment(MBB, MBBI, DL, Offset: -ChunkSize, /*InEpilogue=*/false)
797 .setMIFlag(MachineInstr::FrameSetup);
798 }
799 // No need to adjust Dwarf CFA offset here, the last position of the stack has
800 // been defined
801}
802
803void X86FrameLowering::emitStackProbeInlineGenericLoop(
804 MachineFunction &MF, MachineBasicBlock &MBB,
805 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
806 uint64_t AlignOffset) const {
807 assert(Offset && "null offset");
808
809 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
810 MachineBasicBlock::LQR_Live &&
811 "Inline stack probe loop will clobber live EFLAGS.");
812
813 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
814 const bool HasFP = hasFP(MF);
815 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
816 const X86TargetLowering &TLI = *STI.getTargetLowering();
817 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
818 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
819
820 if (AlignOffset) {
821 if (AlignOffset < StackProbeSize) {
822 // Perform a first smaller allocation followed by a probe.
823 BuildStackAdjustment(MBB, MBBI, DL, Offset: -AlignOffset, /*InEpilogue=*/false)
824 .setMIFlag(MachineInstr::FrameSetup);
825
826 addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: MovMIOpc))
827 .setMIFlag(MachineInstr::FrameSetup),
828 Reg: StackPtr, isKill: false, Offset: 0)
829 .addImm(Val: 0)
830 .setMIFlag(MachineInstr::FrameSetup);
831 NumFrameExtraProbe++;
832 Offset -= AlignOffset;
833 }
834 }
835
836 // Synthesize a loop
837 NumFrameLoopProbe++;
838 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
839
840 MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(BB: LLVM_BB);
841 MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(BB: LLVM_BB);
842
843 MachineFunction::iterator MBBIter = ++MBB.getIterator();
844 MF.insert(MBBI: MBBIter, MBB: testMBB);
845 MF.insert(MBBI: MBBIter, MBB: tailMBB);
846
847 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
848 : Is64Bit ? X86::R11D
849 : X86::EAX;
850
851 // save loop bound
852 {
853 const uint64_t BoundOffset = alignDown(Value: Offset, Align: StackProbeSize);
854
855 // Can we calculate the loop bound using SUB with a 32-bit immediate?
856 // Note that the immediate gets sign-extended when used with a 64-bit
857 // register, so in that case we only have 31 bits to work with.
858 bool canUseSub =
859 Uses64BitFramePtr ? isUInt<31>(x: BoundOffset) : isUInt<32>(x: BoundOffset);
860
861 if (canUseSub) {
862 const unsigned SUBOpc = getSUBriOpcode(IsLP64: Uses64BitFramePtr);
863
864 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: FinalStackProbed)
865 .addReg(RegNo: StackPtr)
866 .setMIFlag(MachineInstr::FrameSetup);
867 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: SUBOpc), DestReg: FinalStackProbed)
868 .addReg(RegNo: FinalStackProbed)
869 .addImm(Val: BoundOffset)
870 .setMIFlag(MachineInstr::FrameSetup);
871 } else if (Uses64BitFramePtr) {
872 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64ri), DestReg: FinalStackProbed)
873 .addImm(Val: -BoundOffset)
874 .setMIFlag(MachineInstr::FrameSetup);
875 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::ADD64rr), DestReg: FinalStackProbed)
876 .addReg(RegNo: FinalStackProbed)
877 .addReg(RegNo: StackPtr)
878 .setMIFlag(MachineInstr::FrameSetup);
879 } else {
880 llvm_unreachable("Offset too large for 32-bit stack pointer");
881 }
882
883 // while in the loop, use loop-invariant reg for CFI,
884 // instead of the stack pointer, which changes during the loop
885 if (!HasFP && NeedsDwarfCFI) {
886 // x32 uses the same DWARF register numbers as x86-64,
887 // so there isn't a register number for r11d, we must use r11 instead
888 const Register DwarfFinalStackProbed =
889 STI.isTarget64BitILP32()
890 ? Register(getX86SubSuperRegister(Reg: FinalStackProbed, Size: 64))
891 : FinalStackProbed;
892
893 BuildCFI(MBB, MBBI, DL,
894 CFIInst: MCCFIInstruction::createDefCfaRegister(
895 L: nullptr, Register: TRI->getDwarfRegNum(Reg: DwarfFinalStackProbed, isEH: true)));
896 BuildCFI(MBB, MBBI, DL,
897 CFIInst: MCCFIInstruction::createAdjustCfaOffset(L: nullptr, Adjustment: BoundOffset));
898 }
899 }
900
901 // allocate a page
902 BuildStackAdjustment(MBB&: *testMBB, MBBI: testMBB->end(), DL, Offset: -StackProbeSize,
903 /*InEpilogue=*/false)
904 .setMIFlag(MachineInstr::FrameSetup);
905
906 // touch the page
907 addRegOffset(MIB: BuildMI(BB: testMBB, MIMD: DL, MCID: TII.get(Opcode: MovMIOpc))
908 .setMIFlag(MachineInstr::FrameSetup),
909 Reg: StackPtr, isKill: false, Offset: 0)
910 .addImm(Val: 0)
911 .setMIFlag(MachineInstr::FrameSetup);
912
913 // cmp with stack pointer bound
914 BuildMI(BB: testMBB, MIMD: DL, MCID: TII.get(Opcode: Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
915 .addReg(RegNo: StackPtr)
916 .addReg(RegNo: FinalStackProbed)
917 .setMIFlag(MachineInstr::FrameSetup);
918
919 // jump
920 BuildMI(BB: testMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1))
921 .addMBB(MBB: testMBB)
922 .addImm(Val: X86::COND_NE)
923 .setMIFlag(MachineInstr::FrameSetup);
924 testMBB->addSuccessor(Succ: testMBB);
925 testMBB->addSuccessor(Succ: tailMBB);
926
927 // BB management
928 tailMBB->splice(Where: tailMBB->end(), Other: &MBB, From: MBBI, To: MBB.end());
929 tailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB);
930 MBB.addSuccessor(Succ: testMBB);
931
932 // handle tail
933 const uint64_t TailOffset = Offset % StackProbeSize;
934 MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
935 if (TailOffset) {
936 BuildStackAdjustment(MBB&: *tailMBB, MBBI: TailMBBIter, DL, Offset: -TailOffset,
937 /*InEpilogue=*/false)
938 .setMIFlag(MachineInstr::FrameSetup);
939 }
940
941 // after the loop, switch back to stack pointer for CFI
942 if (!HasFP && NeedsDwarfCFI) {
943 // x32 uses the same DWARF register numbers as x86-64,
944 // so there isn't a register number for esp, we must use rsp instead
945 const Register DwarfStackPtr =
946 STI.isTarget64BitILP32()
947 ? Register(getX86SubSuperRegister(Reg: StackPtr, Size: 64))
948 : Register(StackPtr);
949
950 BuildCFI(MBB&: *tailMBB, MBBI: TailMBBIter, DL,
951 CFIInst: MCCFIInstruction::createDefCfaRegister(
952 L: nullptr, Register: TRI->getDwarfRegNum(Reg: DwarfStackPtr, isEH: true)));
953 }
954
955 // Update Live In information
956 fullyRecomputeLiveIns(MBBs: {tailMBB, testMBB});
957}
958
959void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
960 MachineFunction &MF, MachineBasicBlock &MBB,
961 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
962 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
963 assert(STI.is64Bit() && "different expansion needed for 32 bit");
964 assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
965 const TargetInstrInfo &TII = *STI.getInstrInfo();
966 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
967
968 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
969 MachineBasicBlock::LQR_Live &&
970 "Inline stack probe loop will clobber live EFLAGS.");
971
972 // RAX contains the number of bytes of desired stack adjustment.
973 // The handling here assumes this value has already been updated so as to
974 // maintain stack alignment.
975 //
976 // We need to exit with RSP modified by this amount and execute suitable
977 // page touches to notify the OS that we're growing the stack responsibly.
978 // All stack probing must be done without modifying RSP.
979 //
980 // MBB:
981 // SizeReg = RAX;
982 // ZeroReg = 0
983 // CopyReg = RSP
984 // Flags, TestReg = CopyReg - SizeReg
985 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
986 // LimitReg = gs magic thread env access
987 // if FinalReg >= LimitReg goto ContinueMBB
988 // RoundBB:
989 // RoundReg = page address of FinalReg
990 // LoopMBB:
991 // LoopReg = PHI(LimitReg,ProbeReg)
992 // ProbeReg = LoopReg - PageSize
993 // [ProbeReg] = 0
994 // if (ProbeReg > RoundReg) goto LoopMBB
995 // ContinueMBB:
996 // RSP = RSP - RAX
997 // [rest of original MBB]
998
999 // Set up the new basic blocks
1000 MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(BB: LLVM_BB);
1001 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(BB: LLVM_BB);
1002 MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(BB: LLVM_BB);
1003
1004 MachineFunction::iterator MBBIter = std::next(x: MBB.getIterator());
1005 MF.insert(MBBI: MBBIter, MBB: RoundMBB);
1006 MF.insert(MBBI: MBBIter, MBB: LoopMBB);
1007 MF.insert(MBBI: MBBIter, MBB: ContinueMBB);
1008
1009 // Split MBB and move the tail portion down to ContinueMBB.
1010 MachineBasicBlock::iterator BeforeMBBI = std::prev(x: MBBI);
1011 ContinueMBB->splice(Where: ContinueMBB->begin(), Other: &MBB, From: MBBI, To: MBB.end());
1012 ContinueMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB);
1013
1014 // Some useful constants
1015 const int64_t ThreadEnvironmentStackLimit = 0x10;
1016 const int64_t PageSize = 0x1000;
1017 const int64_t PageMask = ~(PageSize - 1);
1018
1019 // Registers we need. For the normal case we use virtual
1020 // registers. For the prolog expansion we use RAX, RCX and RDX.
1021 MachineRegisterInfo &MRI = MF.getRegInfo();
1022 const TargetRegisterClass *RegClass = &X86::GR64RegClass;
1023 const Register
1024 SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister(RegClass),
1025 ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
1026 CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1027 TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1028 FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1029 RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1030 LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
1031 JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
1032 ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass);
1033
1034 // SP-relative offsets where we can save RCX and RDX.
1035 int64_t RCXShadowSlot = 0;
1036 int64_t RDXShadowSlot = 0;
1037
1038 // If inlining in the prolog, save RCX and RDX.
1039 if (InProlog) {
1040 // Compute the offsets. We need to account for things already
1041 // pushed onto the stack at this point: return address, frame
1042 // pointer (if used), and callee saves.
1043 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1044 const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
1045 const bool HasFP = hasFP(MF);
1046
1047 // Check if we need to spill RCX and/or RDX.
1048 // Here we assume that no earlier prologue instruction changes RCX and/or
1049 // RDX, so checking the block live-ins is enough.
1050 const bool IsRCXLiveIn = MBB.isLiveIn(Reg: X86::RCX);
1051 const bool IsRDXLiveIn = MBB.isLiveIn(Reg: X86::RDX);
1052 int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
1053 // Assign the initial slot to both registers, then change RDX's slot if both
1054 // need to be spilled.
1055 if (IsRCXLiveIn)
1056 RCXShadowSlot = InitSlot;
1057 if (IsRDXLiveIn)
1058 RDXShadowSlot = InitSlot;
1059 if (IsRDXLiveIn && IsRCXLiveIn)
1060 RDXShadowSlot += 8;
1061 // Emit the saves if needed.
1062 if (IsRCXLiveIn)
1063 addRegOffset(MIB: BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64mr)), Reg: X86::RSP, isKill: false,
1064 Offset: RCXShadowSlot)
1065 .addReg(RegNo: X86::RCX);
1066 if (IsRDXLiveIn)
1067 addRegOffset(MIB: BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64mr)), Reg: X86::RSP, isKill: false,
1068 Offset: RDXShadowSlot)
1069 .addReg(RegNo: X86::RDX);
1070 } else {
1071 // Not in the prolog. Copy RAX to a virtual reg.
1072 BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64rr), DestReg: SizeReg).addReg(RegNo: X86::RAX);
1073 }
1074
1075 // Add code to MBB to check for overflow and set the new target stack pointer
1076 // to zero if so.
1077 BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::XOR64rr), DestReg: ZeroReg)
1078 .addReg(RegNo: ZeroReg, Flags: RegState::Undef)
1079 .addReg(RegNo: ZeroReg, Flags: RegState::Undef);
1080 BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64rr), DestReg: CopyReg).addReg(RegNo: X86::RSP);
1081 BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::SUB64rr), DestReg: TestReg)
1082 .addReg(RegNo: CopyReg)
1083 .addReg(RegNo: SizeReg);
1084 BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::CMOV64rr), DestReg: FinalReg)
1085 .addReg(RegNo: TestReg)
1086 .addReg(RegNo: ZeroReg)
1087 .addImm(Val: X86::COND_B);
1088
1089 // FinalReg now holds final stack pointer value, or zero if
1090 // allocation would overflow. Compare against the current stack
1091 // limit from the thread environment block. Note this limit is the
1092 // lowest touched page on the stack, not the point at which the OS
1093 // will cause an overflow exception, so this is just an optimization
1094 // to avoid unnecessarily touching pages that are below the current
1095 // SP but already committed to the stack by the OS.
1096 BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64rm), DestReg: LimitReg)
1097 .addReg(RegNo: 0)
1098 .addImm(Val: 1)
1099 .addReg(RegNo: 0)
1100 .addImm(Val: ThreadEnvironmentStackLimit)
1101 .addReg(RegNo: X86::GS);
1102 BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::CMP64rr)).addReg(RegNo: FinalReg).addReg(RegNo: LimitReg);
1103 // Jump if the desired stack pointer is at or above the stack limit.
1104 BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1))
1105 .addMBB(MBB: ContinueMBB)
1106 .addImm(Val: X86::COND_AE);
1107
1108 // Add code to roundMBB to round the final stack pointer to a page boundary.
1109 if (InProlog)
1110 RoundMBB->addLiveIn(PhysReg: FinalReg);
1111 BuildMI(BB: RoundMBB, MIMD: DL, MCID: TII.get(Opcode: X86::AND64ri32), DestReg: RoundedReg)
1112 .addReg(RegNo: FinalReg)
1113 .addImm(Val: PageMask);
1114 BuildMI(BB: RoundMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JMP_1)).addMBB(MBB: LoopMBB);
1115
1116 // LimitReg now holds the current stack limit, RoundedReg page-rounded
1117 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
1118 // and probe until we reach RoundedReg.
1119 if (!InProlog) {
1120 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII.get(Opcode: X86::PHI), DestReg: JoinReg)
1121 .addReg(RegNo: LimitReg)
1122 .addMBB(MBB: RoundMBB)
1123 .addReg(RegNo: ProbeReg)
1124 .addMBB(MBB: LoopMBB);
1125 }
1126
1127 if (InProlog)
1128 LoopMBB->addLiveIn(PhysReg: JoinReg);
1129 addRegOffset(MIB: BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII.get(Opcode: X86::LEA64r), DestReg: ProbeReg), Reg: JoinReg,
1130 isKill: false, Offset: -PageSize);
1131
1132 // Probe by storing a byte onto the stack.
1133 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII.get(Opcode: X86::MOV8mi))
1134 .addReg(RegNo: ProbeReg)
1135 .addImm(Val: 1)
1136 .addReg(RegNo: 0)
1137 .addImm(Val: 0)
1138 .addReg(RegNo: 0)
1139 .addImm(Val: 0);
1140
1141 if (InProlog)
1142 LoopMBB->addLiveIn(PhysReg: RoundedReg);
1143 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII.get(Opcode: X86::CMP64rr))
1144 .addReg(RegNo: RoundedReg)
1145 .addReg(RegNo: ProbeReg);
1146 BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1))
1147 .addMBB(MBB: LoopMBB)
1148 .addImm(Val: X86::COND_NE);
1149
1150 MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
1151
1152 // If in prolog, restore RDX and RCX.
1153 if (InProlog) {
1154 if (RCXShadowSlot) // It means we spilled RCX in the prologue.
1155 addRegOffset(MIB: BuildMI(BB&: *ContinueMBB, I: ContinueMBBI, MIMD: DL,
1156 MCID: TII.get(Opcode: X86::MOV64rm), DestReg: X86::RCX),
1157 Reg: X86::RSP, isKill: false, Offset: RCXShadowSlot);
1158 if (RDXShadowSlot) // It means we spilled RDX in the prologue.
1159 addRegOffset(MIB: BuildMI(BB&: *ContinueMBB, I: ContinueMBBI, MIMD: DL,
1160 MCID: TII.get(Opcode: X86::MOV64rm), DestReg: X86::RDX),
1161 Reg: X86::RSP, isKill: false, Offset: RDXShadowSlot);
1162 }
1163
1164 // Now that the probing is done, add code to continueMBB to update
1165 // the stack pointer for real.
1166 BuildMI(BB&: *ContinueMBB, I: ContinueMBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SUB64rr), DestReg: X86::RSP)
1167 .addReg(RegNo: X86::RSP)
1168 .addReg(RegNo: SizeReg);
1169
1170 // Add the control flow edges we need.
1171 MBB.addSuccessor(Succ: ContinueMBB);
1172 MBB.addSuccessor(Succ: RoundMBB);
1173 RoundMBB->addSuccessor(Succ: LoopMBB);
1174 LoopMBB->addSuccessor(Succ: ContinueMBB);
1175 LoopMBB->addSuccessor(Succ: LoopMBB);
1176
1177 if (InProlog) {
1178 LivePhysRegs LiveRegs;
1179 computeAndAddLiveIns(LiveRegs, MBB&: *ContinueMBB);
1180 }
1181
1182 // Mark all the instructions added to the prolog as frame setup.
1183 if (InProlog) {
1184 for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
1185 BeforeMBBI->setFlag(MachineInstr::FrameSetup);
1186 }
1187 for (MachineInstr &MI : *RoundMBB) {
1188 MI.setFlag(MachineInstr::FrameSetup);
1189 }
1190 for (MachineInstr &MI : *LoopMBB) {
1191 MI.setFlag(MachineInstr::FrameSetup);
1192 }
1193 for (MachineInstr &MI :
1194 llvm::make_range(x: ContinueMBB->begin(), y: ContinueMBBI)) {
1195 MI.setFlag(MachineInstr::FrameSetup);
1196 }
1197 }
1198}
1199
1200void X86FrameLowering::emitStackProbeCall(
1201 MachineFunction &MF, MachineBasicBlock &MBB,
1202 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
1203 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
1204 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
1205
1206 // FIXME: Add indirect thunk support and remove this.
1207 if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
1208 report_fatal_error(reason: "Emitting stack probe calls on 64-bit with the large "
1209 "code model and indirect thunks not yet implemented.");
1210
1211 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
1212 MachineBasicBlock::LQR_Live &&
1213 "Stack probe calls will clobber live EFLAGS.");
1214
1215 unsigned CallOp;
1216 if (Is64Bit)
1217 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
1218 else
1219 CallOp = X86::CALLpcrel32;
1220
1221 StringRef Symbol = STI.getTargetLowering()->getStackProbeSymbolName(MF);
1222
1223 MachineInstrBuilder CI;
1224 MachineBasicBlock::iterator ExpansionMBBI = std::prev(x: MBBI);
1225
1226 // All current stack probes take AX and SP as input, clobber flags, and
1227 // preserve all registers. x86_64 probes leave RSP unmodified.
1228 if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
1229 // For the large code model, we have to call through a register. Use R11,
1230 // as it is scratch in all supported calling conventions.
1231 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64ri), DestReg: X86::R11)
1232 .addExternalSymbol(FnName: MF.createExternalSymbolName(Name: Symbol));
1233 CI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: CallOp)).addReg(RegNo: X86::R11);
1234 } else {
1235 CI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: CallOp))
1236 .addExternalSymbol(FnName: MF.createExternalSymbolName(Name: Symbol));
1237 }
1238
1239 unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
1240 unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
1241 CI.addReg(RegNo: AX, Flags: RegState::Implicit)
1242 .addReg(RegNo: SP, Flags: RegState::Implicit)
1243 .addReg(RegNo: AX, Flags: RegState::Define | RegState::Implicit)
1244 .addReg(RegNo: SP, Flags: RegState::Define | RegState::Implicit)
1245 .addReg(RegNo: X86::EFLAGS, Flags: RegState::Define | RegState::Implicit);
1246
1247 MachineInstr *ModInst = CI;
1248 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1249 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
1250 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
1251 // themselves. They also does not clobber %rax so we can reuse it when
1252 // adjusting %rsp.
1253 // All other platforms do not specify a particular ABI for the stack probe
1254 // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
1255 ModInst =
1256 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: getSUBrrOpcode(IsLP64: Uses64BitFramePtr)), DestReg: SP)
1257 .addReg(RegNo: SP)
1258 .addReg(RegNo: AX);
1259 }
1260
1261 // DebugInfo variable locations -- if there's an instruction number for the
1262 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
1263 // modifies SP.
1264 if (InstrNum) {
1265 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1266 // Label destination operand of the subtract.
1267 MF.makeDebugValueSubstitution(*InstrNum,
1268 {ModInst->getDebugInstrNum(), 0});
1269 } else {
1270 // Label the call. The operand number is the penultimate operand, zero
1271 // based.
1272 unsigned SPDefOperand = ModInst->getNumOperands() - 2;
1273 MF.makeDebugValueSubstitution(
1274 *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
1275 }
1276 }
1277
1278 if (InProlog) {
1279 // Apply the frame setup flag to all inserted instrs.
1280 for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
1281 ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
1282 }
1283}
1284
1285static unsigned calculateSetFPREG(uint64_t SPAdjust) {
1286 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
1287 // and might require smaller successive adjustments.
1288 const uint64_t Win64MaxSEHOffset = 128;
1289 uint64_t SEHFrameOffset = std::min(a: SPAdjust, b: Win64MaxSEHOffset);
1290 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
1291 return SEHFrameOffset & -16;
1292}
1293
1294// If we're forcing a stack realignment we can't rely on just the frame
1295// info, we need to know the ABI stack alignment as well in case we
1296// have a call out. Otherwise just make sure we have some alignment - we'll
1297// go with the minimum SlotSize.
1298uint64_t
1299X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
1300 const MachineFrameInfo &MFI = MF.getFrameInfo();
1301 Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
1302 Align StackAlign = getStackAlign();
1303 bool HasRealign = MF.getFunction().hasFnAttribute(Kind: "stackrealign");
1304 if (HasRealign) {
1305 if (MFI.hasCalls())
1306 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
1307 else if (MaxAlign < SlotSize)
1308 MaxAlign = Align(SlotSize);
1309 }
1310
1311 if (!Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR) {
1312 if (HasRealign)
1313 MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16);
1314 else
1315 MaxAlign = Align(16);
1316 }
1317 return MaxAlign.value();
1318}
1319
1320void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
1321 MachineBasicBlock::iterator MBBI,
1322 const DebugLoc &DL, Register Reg,
1323 uint64_t MaxAlign) const {
1324 uint64_t Val = -MaxAlign;
1325 unsigned AndOp = getANDriOpcode(IsLP64: Uses64BitFramePtr, Imm: Val);
1326
1327 MachineFunction &MF = *MBB.getParent();
1328 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
1329 const X86TargetLowering &TLI = *STI.getTargetLowering();
1330 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
1331 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
1332
1333 // We want to make sure that (in worst case) less than StackProbeSize bytes
1334 // are not probed after the AND. This assumption is used in
1335 // emitStackProbeInlineGeneric.
1336 if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
1337 {
1338 NumFrameLoopProbe++;
1339 MachineBasicBlock *entryMBB =
1340 MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock());
1341 MachineBasicBlock *headMBB =
1342 MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock());
1343 MachineBasicBlock *bodyMBB =
1344 MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock());
1345 MachineBasicBlock *footMBB =
1346 MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock());
1347
1348 MachineFunction::iterator MBBIter = MBB.getIterator();
1349 MF.insert(MBBI: MBBIter, MBB: entryMBB);
1350 MF.insert(MBBI: MBBIter, MBB: headMBB);
1351 MF.insert(MBBI: MBBIter, MBB: bodyMBB);
1352 MF.insert(MBBI: MBBIter, MBB: footMBB);
1353 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
1354 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
1355 : Is64Bit ? X86::R11D
1356 : X86::EAX;
1357
1358 // Setup entry block
1359 {
1360
1361 entryMBB->splice(Where: entryMBB->end(), Other: &MBB, From: MBB.begin(), To: MBBI);
1362 BuildMI(BB: entryMBB, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: FinalStackProbed)
1363 .addReg(RegNo: StackPtr)
1364 .setMIFlag(MachineInstr::FrameSetup);
1365 MachineInstr *MI =
1366 BuildMI(BB: entryMBB, MIMD: DL, MCID: TII.get(Opcode: AndOp), DestReg: FinalStackProbed)
1367 .addReg(RegNo: FinalStackProbed)
1368 .addImm(Val)
1369 .setMIFlag(MachineInstr::FrameSetup);
1370
1371 // The EFLAGS implicit def is dead.
1372 MI->getOperand(i: 3).setIsDead();
1373
1374 BuildMI(BB: entryMBB, MIMD: DL,
1375 MCID: TII.get(Opcode: Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1376 .addReg(RegNo: FinalStackProbed)
1377 .addReg(RegNo: StackPtr)
1378 .setMIFlag(MachineInstr::FrameSetup);
1379 BuildMI(BB: entryMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1))
1380 .addMBB(MBB: &MBB)
1381 .addImm(Val: X86::COND_E)
1382 .setMIFlag(MachineInstr::FrameSetup);
1383 entryMBB->addSuccessor(Succ: headMBB);
1384 entryMBB->addSuccessor(Succ: &MBB);
1385 }
1386
1387 // Loop entry block
1388
1389 {
1390 const unsigned SUBOpc = getSUBriOpcode(IsLP64: Uses64BitFramePtr);
1391 BuildMI(BB: headMBB, MIMD: DL, MCID: TII.get(Opcode: SUBOpc), DestReg: StackPtr)
1392 .addReg(RegNo: StackPtr)
1393 .addImm(Val: StackProbeSize)
1394 .setMIFlag(MachineInstr::FrameSetup);
1395
1396 BuildMI(BB: headMBB, MIMD: DL,
1397 MCID: TII.get(Opcode: Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1398 .addReg(RegNo: StackPtr)
1399 .addReg(RegNo: FinalStackProbed)
1400 .setMIFlag(MachineInstr::FrameSetup);
1401
1402 // jump to the footer if StackPtr < FinalStackProbed
1403 BuildMI(BB: headMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1))
1404 .addMBB(MBB: footMBB)
1405 .addImm(Val: X86::COND_B)
1406 .setMIFlag(MachineInstr::FrameSetup);
1407
1408 headMBB->addSuccessor(Succ: bodyMBB);
1409 headMBB->addSuccessor(Succ: footMBB);
1410 }
1411
1412 // setup loop body
1413 {
1414 addRegOffset(MIB: BuildMI(BB: bodyMBB, MIMD: DL, MCID: TII.get(Opcode: MovMIOpc))
1415 .setMIFlag(MachineInstr::FrameSetup),
1416 Reg: StackPtr, isKill: false, Offset: 0)
1417 .addImm(Val: 0)
1418 .setMIFlag(MachineInstr::FrameSetup);
1419
1420 const unsigned SUBOpc = getSUBriOpcode(IsLP64: Uses64BitFramePtr);
1421 BuildMI(BB: bodyMBB, MIMD: DL, MCID: TII.get(Opcode: SUBOpc), DestReg: StackPtr)
1422 .addReg(RegNo: StackPtr)
1423 .addImm(Val: StackProbeSize)
1424 .setMIFlag(MachineInstr::FrameSetup);
1425
1426 // cmp with stack pointer bound
1427 BuildMI(BB: bodyMBB, MIMD: DL,
1428 MCID: TII.get(Opcode: Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1429 .addReg(RegNo: FinalStackProbed)
1430 .addReg(RegNo: StackPtr)
1431 .setMIFlag(MachineInstr::FrameSetup);
1432
1433 // jump back while FinalStackProbed < StackPtr
1434 BuildMI(BB: bodyMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1))
1435 .addMBB(MBB: bodyMBB)
1436 .addImm(Val: X86::COND_B)
1437 .setMIFlag(MachineInstr::FrameSetup);
1438 bodyMBB->addSuccessor(Succ: bodyMBB);
1439 bodyMBB->addSuccessor(Succ: footMBB);
1440 }
1441
1442 // setup loop footer
1443 {
1444 BuildMI(BB: footMBB, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: StackPtr)
1445 .addReg(RegNo: FinalStackProbed)
1446 .setMIFlag(MachineInstr::FrameSetup);
1447 addRegOffset(MIB: BuildMI(BB: footMBB, MIMD: DL, MCID: TII.get(Opcode: MovMIOpc))
1448 .setMIFlag(MachineInstr::FrameSetup),
1449 Reg: StackPtr, isKill: false, Offset: 0)
1450 .addImm(Val: 0)
1451 .setMIFlag(MachineInstr::FrameSetup);
1452 footMBB->addSuccessor(Succ: &MBB);
1453 }
1454
1455 fullyRecomputeLiveIns(MBBs: {footMBB, bodyMBB, headMBB, &MBB});
1456 }
1457 } else {
1458 MachineInstr *MI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: AndOp), DestReg: Reg)
1459 .addReg(RegNo: Reg)
1460 .addImm(Val)
1461 .setMIFlag(MachineInstr::FrameSetup);
1462
1463 // The EFLAGS implicit def is dead.
1464 MI->getOperand(i: 3).setIsDead();
1465 }
1466}
1467
1468bool X86FrameLowering::has128ByteRedZone(const MachineFunction &MF) const {
1469 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
1470 // clobbered by any interrupt handler.
1471 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1472 "MF used frame lowering for wrong subtarget");
1473 const Function &Fn = MF.getFunction();
1474 const bool IsWin64CC = STI.isCallingConvWin64(CC: Fn.getCallingConv());
1475 return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Kind: Attribute::NoRedZone);
1476}
1477
1478/// Return true if we need to use the restricted Windows x64 prologue and
1479/// epilogue code patterns that can be described with WinCFI (.seh_*
1480/// directives).
1481bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
1482 return MF.getTarget().getMCAsmInfo().usesWindowsCFI();
1483}
1484
1485bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
1486 return !isWin64Prologue(MF) && MF.needsFrameMoves();
1487}
1488
1489/// Return true if an opcode is part of the REP group of instructions
1490static bool isOpcodeRep(unsigned Opcode) {
1491 switch (Opcode) {
1492 case X86::REPNE_PREFIX:
1493 case X86::REP_MOVSB_32:
1494 case X86::REP_MOVSB_64:
1495 case X86::REP_MOVSD_32:
1496 case X86::REP_MOVSD_64:
1497 case X86::REP_MOVSQ_32:
1498 case X86::REP_MOVSQ_64:
1499 case X86::REP_MOVSW_32:
1500 case X86::REP_MOVSW_64:
1501 case X86::REP_PREFIX:
1502 case X86::REP_STOSB_32:
1503 case X86::REP_STOSB_64:
1504 case X86::REP_STOSD_32:
1505 case X86::REP_STOSD_64:
1506 case X86::REP_STOSQ_32:
1507 case X86::REP_STOSQ_64:
1508 case X86::REP_STOSW_32:
1509 case X86::REP_STOSW_64:
1510 return true;
1511 default:
1512 break;
1513 }
1514 return false;
1515}
1516
1517/// emitPrologue - Push callee-saved registers onto the stack, which
1518/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
1519/// space for local variables. Also emit labels used by the exception handler to
1520/// generate the exception handling frames.
1521
1522/*
1523 Here's a gist of what gets emitted:
1524
1525 ; Establish frame pointer, if needed
1526 [if needs FP]
1527 push %rbp
1528 .cfi_def_cfa_offset 16
1529 .cfi_offset %rbp, -16
1530 .seh_pushreg %rpb
1531 mov %rsp, %rbp
1532 .cfi_def_cfa_register %rbp
1533
1534 ; Spill general-purpose registers
1535 [for all callee-saved GPRs]
1536 pushq %<reg>
1537 [if not needs FP]
1538 .cfi_def_cfa_offset (offset from RETADDR)
1539 .seh_pushreg %<reg>
1540
1541 ; If the required stack alignment > default stack alignment
1542 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
1543 ; of unknown size in the stack frame.
1544 [if stack needs re-alignment]
1545 and $MASK, %rsp
1546
1547 ; Allocate space for locals
1548 [if target is Windows and allocated space > 4096 bytes]
1549 ; Windows needs special care for allocations larger
1550 ; than one page.
1551 mov $NNN, %rax
1552 call ___chkstk_ms/___chkstk
1553 sub %rax, %rsp
1554 [else]
1555 sub $NNN, %rsp
1556
1557 [if needs FP]
1558 .seh_stackalloc (size of XMM spill slots)
1559 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
1560 [else]
1561 .seh_stackalloc NNN
1562
1563 ; Spill XMMs
1564 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
1565 ; they may get spilled on any platform, if the current function
1566 ; calls @llvm.eh.unwind.init
1567 [if needs FP]
1568 [for all callee-saved XMM registers]
1569 movaps %<xmm reg>, -MMM(%rbp)
1570 [for all callee-saved XMM registers]
1571 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
1572 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
1573 [else]
1574 [for all callee-saved XMM registers]
1575 movaps %<xmm reg>, KKK(%rsp)
1576 [for all callee-saved XMM registers]
1577 .seh_savexmm %<xmm reg>, KKK
1578
1579 .seh_endprologue
1580
1581 [if needs base pointer]
1582 mov %rsp, %rbx
1583 [if needs to restore base pointer]
1584 mov %rsp, -MMM(%rbp)
1585
1586 ; Emit CFI info
1587 [if needs FP]
1588 [for all callee-saved registers]
1589 .cfi_offset %<reg>, (offset from %rbp)
1590 [else]
1591 .cfi_def_cfa_offset (offset from RETADDR)
1592 [for all callee-saved registers]
1593 .cfi_offset %<reg>, (offset from %rsp)
1594
1595 Notes:
1596 - .seh directives are emitted only for Windows 64 ABI
1597 - .cv_fpo directives are emitted on win32 when emitting CodeView
1598 - .cfi directives are emitted for all other ABIs
1599 - for 32-bit code, substitute %e?? registers for %r??
1600*/
1601
1602void X86FrameLowering::emitPrologue(MachineFunction &MF,
1603 MachineBasicBlock &MBB) const {
1604 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1605 "MF used frame lowering for wrong subtarget");
1606 MachineBasicBlock::iterator MBBI = MBB.begin();
1607 MachineFrameInfo &MFI = MF.getFrameInfo();
1608 const Function &Fn = MF.getFunction();
1609 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1610 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
1611 uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
1612 bool IsFunclet = MBB.isEHFuncletEntry();
1613 EHPersonality Personality = EHPersonality::Unknown;
1614 if (Fn.hasPersonalityFn())
1615 Personality = classifyEHPersonality(Pers: Fn.getPersonalityFn());
1616 bool FnHasClrFunclet =
1617 MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
1618 bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
1619 bool HasFP = hasFP(MF);
1620 bool IsWin64Prologue = isWin64Prologue(MF);
1621 bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
1622 // FIXME: Emit FPO data for EH funclets.
1623 bool NeedsWinFPO = !IsFunclet && STI.isTargetWin32() &&
1624 MF.getFunction().getParent()->getCodeViewFlag();
1625 bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
1626 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1627 bool IsWin64UnwindV3 =
1628 NeedsWin64CFI &&
1629 Fn.getParent()->getWinX64EHUnwindMode() == WinX64EHUnwindMode::V3;
1630 Register FramePtr = TRI->getFrameRegister(MF);
1631 const Register MachineFramePtr =
1632 STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(Reg: FramePtr, Size: 64))
1633 : FramePtr;
1634 Register BasePtr = TRI->getBaseRegister();
1635 bool HasWinCFI = false;
1636
1637 // Helpers to emit Windows x64 unwind SEH pseudos with the correct placement.
1638 // V1/V2: pseudo goes after the real instruction.
1639 // V3: pseudo goes before the real instruction.
1640 // Usage:
1641 // EmitSEHBefore([&]{ BuildMI(...SEH_PushReg...); });
1642 // BuildMI(... real instruction ...);
1643 // EmitSEHAfter([&]{ BuildMI(...SEH_PushReg...); });
1644 auto EmitSEHBefore = [&](auto EmitFn) {
1645 if (NeedsWinCFI && IsWin64UnwindV3) {
1646 HasWinCFI = true;
1647 EmitFn();
1648 }
1649 };
1650 auto EmitSEHAfter = [&](auto EmitFn) {
1651 if (NeedsWinCFI && !IsWin64UnwindV3) {
1652 HasWinCFI = true;
1653 EmitFn();
1654 }
1655 };
1656
1657 // Debug location must be unknown since the first debug location is used
1658 // to determine the end of the prologue.
1659 DebugLoc DL;
1660 Register ArgBaseReg;
1661
1662 // Emit extra prolog for argument stack slot reference.
1663 if (auto *MI = X86FI->getStackPtrSaveMI()) {
1664 // MI is lea instruction that created in X86ArgumentStackSlotPass.
1665 // Creat extra prolog for stack realignment.
1666 ArgBaseReg = MI->getOperand(i: 0).getReg();
1667 // leal 4(%esp), %basereg
1668 // .cfi_def_cfa %basereg, 0
1669 // andl $-128, %esp
1670 // pushl -4(%basereg)
1671 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Is64Bit ? X86::LEA64r : X86::LEA32r),
1672 DestReg: ArgBaseReg)
1673 .addUse(RegNo: StackPtr)
1674 .addImm(Val: 1)
1675 .addUse(RegNo: X86::NoRegister)
1676 .addImm(Val: SlotSize)
1677 .addUse(RegNo: X86::NoRegister)
1678 .setMIFlag(MachineInstr::FrameSetup);
1679 if (NeedsDwarfCFI) {
1680 // .cfi_def_cfa %basereg, 0
1681 unsigned DwarfStackPtr = TRI->getDwarfRegNum(Reg: ArgBaseReg, isEH: true);
1682 BuildCFI(MBB, MBBI, DL,
1683 CFIInst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: DwarfStackPtr, Offset: 0),
1684 Flag: MachineInstr::FrameSetup);
1685 }
1686 BuildStackAlignAND(MBB, MBBI, DL, Reg: StackPtr, MaxAlign);
1687 int64_t Offset = -(int64_t)SlotSize;
1688 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm))
1689 .addReg(RegNo: ArgBaseReg)
1690 .addImm(Val: 1)
1691 .addReg(RegNo: X86::NoRegister)
1692 .addImm(Val: Offset)
1693 .addReg(RegNo: X86::NoRegister)
1694 .setMIFlag(MachineInstr::FrameSetup);
1695 }
1696
1697 // Space reserved for stack-based arguments when making a (ABI-guaranteed)
1698 // tail call.
1699 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
1700 if (TailCallArgReserveSize && IsWin64Prologue)
1701 report_fatal_error(reason: "Can't handle guaranteed tail call under win64 yet");
1702
1703 const bool EmitStackProbeCall =
1704 STI.getTargetLowering()->hasStackProbeSymbol(MF);
1705 unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
1706
1707 if (HasFP && X86FI->hasSwiftAsyncContext()) {
1708 switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
1709 case SwiftAsyncFramePointerMode::DeploymentBased:
1710 if (STI.swiftAsyncContextIsDynamicallySet()) {
1711 // The special symbol below is absolute and has a *value* suitable to be
1712 // combined with the frame pointer directly.
1713 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::OR64rm), DestReg: MachineFramePtr)
1714 .addUse(RegNo: MachineFramePtr)
1715 .addUse(RegNo: X86::RIP)
1716 .addImm(Val: 1)
1717 .addUse(RegNo: X86::NoRegister)
1718 .addExternalSymbol(FnName: "swift_async_extendedFramePointerFlags",
1719 TargetFlags: X86II::MO_GOTPCREL)
1720 .addUse(RegNo: X86::NoRegister);
1721 break;
1722 }
1723 [[fallthrough]];
1724
1725 case SwiftAsyncFramePointerMode::Always:
1726 assert(
1727 !IsWin64Prologue &&
1728 "win64 prologue does not set the bit 60 in the saved frame pointer");
1729 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::BTS64ri8), DestReg: MachineFramePtr)
1730 .addUse(RegNo: MachineFramePtr)
1731 .addImm(Val: 60)
1732 .setMIFlag(MachineInstr::FrameSetup);
1733 break;
1734
1735 case SwiftAsyncFramePointerMode::Never:
1736 break;
1737 }
1738 }
1739
1740 // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1741 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1742 // stack alignment.
1743 if (Fn.getCallingConv() == CallingConv::X86_INTR && Is64Bit &&
1744 Fn.arg_size() == 2) {
1745 StackSize += 8;
1746 MFI.setStackSize(StackSize);
1747
1748 // Update the stack pointer by pushing a register. This is the instruction
1749 // emitted that would be end up being emitted by a call to `emitSPUpdate`.
1750 // Hard-coding the update to a push avoids emitting a second
1751 // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack
1752 // probing isn't needed anyways for an 8-byte update.
1753 // Pushing a register leaves us in a similar situation to a regular
1754 // function call where we know that the address at (rsp-8) is writeable.
1755 // That way we avoid any off-by-ones with stack probing for additional
1756 // stack pointer updates later on.
1757 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH64r))
1758 .addReg(RegNo: X86::RAX, Flags: RegState::Undef)
1759 .setMIFlag(MachineInstr::FrameSetup);
1760 }
1761
1762 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1763 // function, and use up to 128 bytes of stack space, don't have a frame
1764 // pointer, calls, or dynamic alloca then we do not need to adjust the
1765 // stack pointer (we fit in the Red Zone). We also check that we don't
1766 // push and pop from the stack.
1767 if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
1768 !MFI.hasVarSizedObjects() && // No dynamic alloca.
1769 !MFI.adjustsStack() && // No calls.
1770 !EmitStackProbeCall && // No stack probes.
1771 !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1772 !MF.shouldSplitStack()) { // Regular stack
1773 uint64_t MinSize =
1774 X86FI->getCalleeSavedFrameSize() - X86FI->getTCReturnAddrDelta();
1775 if (HasFP)
1776 MinSize += SlotSize;
1777 X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
1778 StackSize = std::max(a: MinSize, b: StackSize > 128 ? StackSize - 128 : 0);
1779 MFI.setStackSize(StackSize);
1780 }
1781
1782 // Insert stack pointer adjustment for later moving of return addr. Only
1783 // applies to tail call optimized functions where the callee argument stack
1784 // size is bigger than the callers.
1785 if (TailCallArgReserveSize != 0) {
1786 BuildStackAdjustment(MBB, MBBI, DL, Offset: -(int)TailCallArgReserveSize,
1787 /*InEpilogue=*/false)
1788 .setMIFlag(MachineInstr::FrameSetup);
1789 }
1790
1791 // Mapping for machine moves:
1792 //
1793 // DST: VirtualFP AND
1794 // SRC: VirtualFP => DW_CFA_def_cfa_offset
1795 // ELSE => DW_CFA_def_cfa
1796 //
1797 // SRC: VirtualFP AND
1798 // DST: Register => DW_CFA_def_cfa_register
1799 //
1800 // ELSE
1801 // OFFSET < 0 => DW_CFA_offset_extended_sf
1802 // REG < 64 => DW_CFA_offset + Reg
1803 // ELSE => DW_CFA_offset_extended
1804
1805 uint64_t NumBytes = 0;
1806 int stackGrowth = -SlotSize;
1807
1808 // Find the funclet establisher parameter
1809 MCRegister Establisher;
1810 if (IsClrFunclet)
1811 Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
1812 else if (IsFunclet)
1813 Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
1814
1815 if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
1816 // Immediately spill establisher into the home slot.
1817 // The runtime cares about this.
1818 // MOV64mr %rdx, 16(%rsp)
1819 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1820 addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: MOVmr)), Reg: StackPtr, isKill: true, Offset: 16)
1821 .addReg(RegNo: Establisher)
1822 .setMIFlag(MachineInstr::FrameSetup);
1823 MBB.addLiveIn(PhysReg: Establisher);
1824 }
1825
1826 if (HasFP) {
1827 assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1828
1829 // Calculate required stack adjustment.
1830 uint64_t FrameSize = StackSize - SlotSize;
1831 NumBytes =
1832 FrameSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1833
1834 // Callee-saved registers are pushed on stack before the stack is realigned.
1835 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
1836 NumBytes = alignTo(Value: NumBytes, Align: MaxAlign);
1837
1838 // Save EBP/RBP into the appropriate stack slot.
1839 auto EmitSEHPushFramePtr = [&]() {
1840 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_PushReg))
1841 .addImm(Val: FramePtr)
1842 .setMIFlag(MachineInstr::FrameSetup);
1843 };
1844 EmitSEHBefore(EmitSEHPushFramePtr);
1845 BuildMI(BB&: MBB, I: MBBI, MIMD: DL,
1846 MCID: TII.get(Opcode: getPUSHOpcode(ST: MF.getSubtarget<X86Subtarget>())))
1847 .addReg(RegNo: MachineFramePtr, Flags: RegState::Kill)
1848 .setMIFlag(MachineInstr::FrameSetup);
1849 EmitSEHAfter(EmitSEHPushFramePtr);
1850
1851 if (NeedsDwarfCFI && !ArgBaseReg.isValid()) {
1852 // Mark the place where EBP/RBP was saved.
1853 // Define the current CFA rule to use the provided offset.
1854 assert(StackSize);
1855 BuildCFI(MBB, MBBI, DL,
1856 CFIInst: MCCFIInstruction::cfiDefCfaOffset(
1857 L: nullptr, Offset: -2 * stackGrowth + (int)TailCallArgReserveSize),
1858 Flag: MachineInstr::FrameSetup);
1859
1860 // Change the rule for the FramePtr to be an "offset" rule.
1861 unsigned DwarfFramePtr = TRI->getDwarfRegNum(Reg: MachineFramePtr, isEH: true);
1862 BuildCFI(MBB, MBBI, DL,
1863 CFIInst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfFramePtr,
1864 Offset: 2 * stackGrowth -
1865 (int)TailCallArgReserveSize),
1866 Flag: MachineInstr::FrameSetup);
1867 }
1868
1869 if (!IsFunclet) {
1870 if (X86FI->hasSwiftAsyncContext()) {
1871 assert(!IsWin64Prologue &&
1872 "win64 prologue does not store async context right below rbp");
1873 const auto &Attrs = MF.getFunction().getAttributes();
1874
1875 // Before we update the live frame pointer we have to ensure there's a
1876 // valid (or null) asynchronous context in its slot just before FP in
1877 // the frame record, so store it now.
1878 auto EmitSEHPushR14 = [&]() {
1879 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_PushReg))
1880 .addImm(Val: X86::R14)
1881 .setMIFlag(MachineInstr::FrameSetup);
1882 };
1883 EmitSEHBefore(EmitSEHPushR14);
1884 if (Attrs.hasAttrSomewhere(Kind: Attribute::SwiftAsync)) {
1885 // We have an initial context in r14, store it just before the frame
1886 // pointer.
1887 MBB.addLiveIn(PhysReg: X86::R14);
1888 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH64r))
1889 .addReg(RegNo: X86::R14)
1890 .setMIFlag(MachineInstr::FrameSetup);
1891 } else {
1892 // No initial context, store null so that there's no pointer that
1893 // could be misused.
1894 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH64i32))
1895 .addImm(Val: 0)
1896 .setMIFlag(MachineInstr::FrameSetup);
1897 }
1898 EmitSEHAfter(EmitSEHPushR14);
1899
1900 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::LEA64r), DestReg: FramePtr)
1901 .addUse(RegNo: X86::RSP)
1902 .addImm(Val: 1)
1903 .addUse(RegNo: X86::NoRegister)
1904 .addImm(Val: 8)
1905 .addUse(RegNo: X86::NoRegister)
1906 .setMIFlag(MachineInstr::FrameSetup);
1907 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SUB64ri32), DestReg: X86::RSP)
1908 .addUse(RegNo: X86::RSP)
1909 .addImm(Val: 8)
1910 .setMIFlag(MachineInstr::FrameSetup);
1911 }
1912
1913 if (!IsWin64Prologue && !IsFunclet) {
1914 // Update EBP with the new base value.
1915 if (!X86FI->hasSwiftAsyncContext())
1916 BuildMI(BB&: MBB, I: MBBI, MIMD: DL,
1917 MCID: TII.get(Opcode: Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1918 DestReg: FramePtr)
1919 .addReg(RegNo: StackPtr)
1920 .setMIFlag(MachineInstr::FrameSetup);
1921
1922 if (NeedsDwarfCFI) {
1923 if (ArgBaseReg.isValid()) {
1924 SmallString<64> CfaExpr;
1925 CfaExpr.push_back(Elt: dwarf::DW_CFA_expression);
1926 uint8_t buffer[16];
1927 unsigned DwarfReg = TRI->getDwarfRegNum(Reg: MachineFramePtr, isEH: true);
1928 CfaExpr.append(in_start: buffer, in_end: buffer + encodeULEB128(Value: DwarfReg, p: buffer));
1929 CfaExpr.push_back(Elt: 2);
1930 CfaExpr.push_back(Elt: (uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
1931 CfaExpr.push_back(Elt: 0);
1932 // DW_CFA_expression: reg5 DW_OP_breg5 +0
1933 BuildCFI(MBB, MBBI, DL,
1934 CFIInst: MCCFIInstruction::createEscape(L: nullptr, Vals: CfaExpr.str()),
1935 Flag: MachineInstr::FrameSetup);
1936 } else {
1937 // Mark effective beginning of when frame pointer becomes valid.
1938 // Define the current CFA to use the EBP/RBP register.
1939 unsigned DwarfFramePtr = TRI->getDwarfRegNum(Reg: MachineFramePtr, isEH: true);
1940 BuildCFI(
1941 MBB, MBBI, DL,
1942 CFIInst: MCCFIInstruction::createDefCfaRegister(L: nullptr, Register: DwarfFramePtr),
1943 Flag: MachineInstr::FrameSetup);
1944 }
1945 }
1946
1947 if (NeedsWinFPO) {
1948 // .cv_fpo_setframe $FramePtr
1949 // NeedsWinFPO is Win32 only, so we're never using Unwind v3, hence it
1950 // is always inserted afterwards.
1951 assert(!IsWin64UnwindV3);
1952 HasWinCFI = true;
1953 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_SetFrame))
1954 .addImm(Val: FramePtr)
1955 .addImm(Val: 0)
1956 .setMIFlag(MachineInstr::FrameSetup);
1957 }
1958 }
1959 }
1960 } else {
1961 assert(!IsFunclet && "funclets without FPs not yet implemented");
1962 NumBytes =
1963 StackSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1964 }
1965
1966 // Update the offset adjustment, which is mainly used by codeview to translate
1967 // from ESP to VFRAME relative local variable offsets.
1968 if (!IsFunclet) {
1969 if (HasFP && TRI->hasStackRealignment(MF))
1970 MFI.setOffsetAdjustment(-NumBytes);
1971 else
1972 MFI.setOffsetAdjustment(-StackSize);
1973 }
1974
1975 // For EH funclets, only allocate enough space for outgoing calls. Save the
1976 // NumBytes value that we would've used for the parent frame.
1977 unsigned ParentFrameNumBytes = NumBytes;
1978 if (IsFunclet)
1979 NumBytes = getWinEHFuncletFrameSize(MF);
1980
1981 // Skip the callee-saved push instructions.
1982 bool PushedRegs = false;
1983 int StackOffset = 2 * stackGrowth;
1984 MachineBasicBlock::const_iterator LastCSPush = MBBI;
1985 auto IsCSPush = [&](const MachineBasicBlock::iterator &MBBI) {
1986 if (MBBI == MBB.end() || !MBBI->getFlag(Flag: MachineInstr::FrameSetup))
1987 return false;
1988 unsigned Opc = MBBI->getOpcode();
1989 return Opc == X86::PUSH32r || Opc == X86::PUSH64r || Opc == X86::PUSHP64r ||
1990 Opc == X86::PUSH2 || Opc == X86::PUSH2P;
1991 };
1992
1993 while (IsCSPush(MBBI)) {
1994 PushedRegs = true;
1995 Register Reg = MBBI->getOperand(i: 0).getReg();
1996 LastCSPush = MBBI;
1997 unsigned Opc = LastCSPush->getOpcode();
1998 bool IsPush2 = Opc == X86::PUSH2 || Opc == X86::PUSH2P;
1999
2000 // V3: emit SEH pseudo before the real instruction.
2001 EmitSEHBefore([&]() {
2002 if (IsPush2) {
2003 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_Push2Regs))
2004 .addImm(Val: Reg)
2005 .addImm(Val: LastCSPush->getOperand(i: 1).getReg())
2006 .setMIFlag(MachineInstr::FrameSetup);
2007 } else {
2008 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_PushReg))
2009 .addImm(Val: Reg)
2010 .setMIFlag(MachineInstr::FrameSetup);
2011 }
2012 });
2013 ++MBBI;
2014
2015 if (!HasFP && NeedsDwarfCFI) {
2016 // Mark callee-saved push instruction.
2017 // Define the current CFA rule to use the provided offset.
2018 assert(StackSize);
2019 // Compared to push, push2 introduces more stack offset (one more
2020 // register).
2021 if (IsPush2)
2022 StackOffset += stackGrowth;
2023 BuildCFI(MBB, MBBI, DL,
2024 CFIInst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -StackOffset),
2025 Flag: MachineInstr::FrameSetup);
2026 StackOffset += stackGrowth;
2027 }
2028
2029 // V1/V2: emit SEH pseudo after the real instruction.
2030 EmitSEHAfter([&]() {
2031 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_PushReg))
2032 .addImm(Val: Reg)
2033 .setMIFlag(MachineInstr::FrameSetup);
2034 if (IsPush2)
2035 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_PushReg))
2036 .addImm(Val: LastCSPush->getOperand(i: 1).getReg())
2037 .setMIFlag(MachineInstr::FrameSetup);
2038 });
2039 }
2040
2041 // Realign stack after we pushed callee-saved registers (so that we'll be
2042 // able to calculate their offsets from the frame pointer).
2043 // Don't do this for Win64, it needs to realign the stack after the prologue.
2044 if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) &&
2045 !ArgBaseReg.isValid()) {
2046 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2047 auto EmitSEHStackAlign = [&]() {
2048 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_StackAlign))
2049 .addImm(Val: MaxAlign)
2050 .setMIFlag(MachineInstr::FrameSetup);
2051 };
2052 EmitSEHBefore(EmitSEHStackAlign);
2053 BuildStackAlignAND(MBB, MBBI, DL, Reg: StackPtr, MaxAlign);
2054 EmitSEHAfter(EmitSEHStackAlign);
2055 }
2056
2057 // If there is an SUB32ri of ESP immediately before this instruction, merge
2058 // the two. This can be the case when tail call elimination is enabled and
2059 // the callee has more arguments than the caller.
2060 NumBytes = mergeSPUpdates(
2061 MBB, MBBI, CalcNewOffset: [NumBytes](int64_t Offset) { return NumBytes - Offset; },
2062 doMergeWithPrevious: true);
2063
2064 // Adjust stack pointer: ESP -= numbytes.
2065
2066 // Windows and cygwin/mingw require a prologue helper routine when allocating
2067 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
2068 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
2069 // stack and adjust the stack pointer in one go. The 64-bit version of
2070 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
2071 // responsible for adjusting the stack pointer. Touching the stack at 4K
2072 // increments is necessary to ensure that the guard pages used by the OS
2073 // virtual memory manager are allocated in correct sequence.
2074 uint64_t AlignedNumBytes = NumBytes;
2075 if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
2076 AlignedNumBytes = alignTo(Value: AlignedNumBytes, Align: MaxAlign);
2077
2078 auto EmitSEHStackAlloc = [&]() {
2079 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_StackAlloc))
2080 .addImm(Val: NumBytes)
2081 .setMIFlag(MachineInstr::FrameSetup);
2082 };
2083 if (NumBytes)
2084 EmitSEHBefore(EmitSEHStackAlloc);
2085
2086 if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
2087 assert(!X86FI->getUsesRedZone() &&
2088 "The Red Zone is not accounted for in stack probes");
2089
2090 // Check whether EAX is livein for this block.
2091 bool isEAXAlive = isEAXLiveIn(MBB);
2092
2093 if (isEAXAlive) {
2094 if (Is64Bit) {
2095 // Save RAX
2096 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH64r))
2097 .addReg(RegNo: X86::RAX, Flags: RegState::Kill)
2098 .setMIFlag(MachineInstr::FrameSetup);
2099 } else {
2100 // Save EAX
2101 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH32r))
2102 .addReg(RegNo: X86::EAX, Flags: RegState::Kill)
2103 .setMIFlag(MachineInstr::FrameSetup);
2104 }
2105 }
2106
2107 if (Is64Bit) {
2108 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
2109 // Function prologue is responsible for adjusting the stack pointer.
2110 int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
2111 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::getMOVriOpcode(Use64BitReg: Is64Bit, Imm: Alloc)),
2112 DestReg: X86::RAX)
2113 .addImm(Val: Alloc)
2114 .setMIFlag(MachineInstr::FrameSetup);
2115 } else {
2116 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
2117 // We'll also use 4 already allocated bytes for EAX.
2118 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV32ri), DestReg: X86::EAX)
2119 .addImm(Val: isEAXAlive ? NumBytes - 4 : NumBytes)
2120 .setMIFlag(MachineInstr::FrameSetup);
2121 }
2122
2123 // Call __chkstk, __chkstk_ms, or __alloca.
2124 emitStackProbe(MF, MBB, MBBI, DL, InProlog: true);
2125
2126 if (isEAXAlive) {
2127 // Restore RAX/EAX
2128 MachineInstr *MI;
2129 if (Is64Bit)
2130 MI = addRegOffset(MIB: BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64rm), DestReg: X86::RAX),
2131 Reg: StackPtr, isKill: false, Offset: NumBytes - 8);
2132 else
2133 MI = addRegOffset(MIB: BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: X86::MOV32rm), DestReg: X86::EAX),
2134 Reg: StackPtr, isKill: false, Offset: NumBytes - 4);
2135 MI->setFlag(MachineInstr::FrameSetup);
2136 MBB.insert(I: MBBI, MI);
2137 }
2138 } else if (NumBytes) {
2139 emitSPUpdate(MBB, MBBI, DL, NumBytes: -(int64_t)NumBytes, /*InEpilogue=*/false);
2140 }
2141
2142 if (NumBytes)
2143 EmitSEHAfter(EmitSEHStackAlloc);
2144
2145 int SEHFrameOffset = 0;
2146 Register SPOrEstablisher;
2147 if (IsFunclet) {
2148 if (IsClrFunclet) {
2149 // The establisher parameter passed to a CLR funclet is actually a pointer
2150 // to the (mostly empty) frame of its nearest enclosing funclet; we have
2151 // to find the root function establisher frame by loading the PSPSym from
2152 // the intermediate frame.
2153 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2154 MachinePointerInfo NoInfo;
2155 MBB.addLiveIn(PhysReg: Establisher);
2156 addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64rm), DestReg: Establisher),
2157 Reg: Establisher, isKill: false, Offset: PSPSlotOffset)
2158 .addMemOperand(MMO: MF.getMachineMemOperand(
2159 PtrInfo: NoInfo, F: MachineMemOperand::MOLoad, Size: SlotSize, BaseAlignment: Align(SlotSize)));
2160 ;
2161 // Save the root establisher back into the current funclet's (mostly
2162 // empty) frame, in case a sub-funclet or the GC needs it.
2163 addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64mr)), Reg: StackPtr,
2164 isKill: false, Offset: PSPSlotOffset)
2165 .addReg(RegNo: Establisher)
2166 .addMemOperand(MMO: MF.getMachineMemOperand(
2167 PtrInfo: NoInfo,
2168 F: MachineMemOperand::MOStore | MachineMemOperand::MOVolatile,
2169 Size: SlotSize, BaseAlignment: Align(SlotSize)));
2170 }
2171 SPOrEstablisher = Establisher;
2172 } else {
2173 SPOrEstablisher = StackPtr;
2174 }
2175
2176 if (IsWin64Prologue && HasFP) {
2177 // Set RBP to a small fixed offset from RSP. In the funclet case, we base
2178 // this calculation on the incoming establisher, which holds the value of
2179 // RSP from the parent frame at the end of the prologue.
2180 SEHFrameOffset = calculateSetFPREG(SPAdjust: ParentFrameNumBytes);
2181
2182 // If this is not a funclet, emit the CFI describing our frame pointer.
2183 if (NeedsWinCFI && !IsFunclet) {
2184 assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
2185 HasWinCFI = true;
2186 if (isAsynchronousEHPersonality(Pers: Personality) || MF.hasEHFunclets()) {
2187 if (TRI->hasBasePointer(MF))
2188 MF.getWinEHFuncInfo()->SEHSetFrameOffset =
2189 getWinEHParentFrameOffset(MF);
2190 else
2191 MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
2192 }
2193 }
2194
2195 auto EmitSEHSetFrame = [&]() {
2196 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_SetFrame))
2197 .addImm(Val: FramePtr)
2198 .addImm(Val: SEHFrameOffset)
2199 .setMIFlag(MachineInstr::FrameSetup);
2200 };
2201
2202 if (!IsFunclet)
2203 EmitSEHBefore(EmitSEHSetFrame);
2204
2205 if (SEHFrameOffset)
2206 addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::LEA64r), DestReg: FramePtr),
2207 Reg: SPOrEstablisher, isKill: false, Offset: SEHFrameOffset);
2208 else
2209 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64rr), DestReg: FramePtr)
2210 .addReg(RegNo: SPOrEstablisher);
2211
2212 if (!IsFunclet)
2213 EmitSEHAfter(EmitSEHSetFrame);
2214 } else if (IsFunclet && STI.is32Bit()) {
2215 // Reset EBP / ESI to something good for funclets.
2216 MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL);
2217 // If we're a catch funclet, we can be returned to via catchret. Save ESP
2218 // into the registration node so that the runtime will restore it for us.
2219 if (!MBB.isCleanupFuncletEntry()) {
2220 assert(Personality == EHPersonality::MSVC_CXX);
2221 Register FrameReg;
2222 int FI = MF.getWinEHFuncInfo()->EHRegNodeFrameIndex;
2223 int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
2224 // ESP is the first field, so no extra displacement is needed.
2225 addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV32mr)), Reg: FrameReg,
2226 isKill: false, Offset: EHRegOffset)
2227 .addReg(RegNo: X86::ESP);
2228 }
2229 }
2230
2231 while (MBBI != MBB.end() && MBBI->getFlag(Flag: MachineInstr::FrameSetup)) {
2232 const MachineInstr &FrameInstr = *MBBI;
2233
2234 if (NeedsWinCFI) {
2235 int FI;
2236 if (Register Reg = TII.isStoreToStackSlot(MI: FrameInstr, FrameIndex&: FI)) {
2237 if (X86::FR64RegClass.contains(Reg)) {
2238 int Offset;
2239 Register IgnoredFrameReg;
2240 if (IsWin64Prologue && IsFunclet)
2241 Offset = getWin64EHFrameIndexRef(MF, FI, SPReg&: IgnoredFrameReg);
2242 else
2243 Offset =
2244 getFrameIndexReference(MF, FI, FrameReg&: IgnoredFrameReg).getFixed() +
2245 SEHFrameOffset;
2246
2247 assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
2248 auto EmitSEHSaveXMM = [&]() {
2249 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_SaveXMM))
2250 .addImm(Val: Reg)
2251 .addImm(Val: Offset)
2252 .setMIFlag(MachineInstr::FrameSetup);
2253 };
2254 EmitSEHBefore(EmitSEHSaveXMM);
2255 ++MBBI;
2256 EmitSEHAfter(EmitSEHSaveXMM);
2257 continue;
2258 }
2259 }
2260 }
2261 ++MBBI;
2262 }
2263
2264 if (NeedsWinCFI && HasWinCFI) {
2265 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_EndPrologue))
2266 .setMIFlag(MachineInstr::FrameSetup);
2267 }
2268
2269 if (FnHasClrFunclet && !IsFunclet) {
2270 // Save the so-called Initial-SP (i.e. the value of the stack pointer
2271 // immediately after the prolog) into the PSPSlot so that funclets
2272 // and the GC can recover it.
2273 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2274 auto PSPInfo = MachinePointerInfo::getFixedStack(
2275 MF, FI: MF.getWinEHFuncInfo()->PSPSymFrameIdx);
2276 addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64mr)), Reg: StackPtr, isKill: false,
2277 Offset: PSPSlotOffset)
2278 .addReg(RegNo: StackPtr)
2279 .addMemOperand(MMO: MF.getMachineMemOperand(
2280 PtrInfo: PSPInfo, F: MachineMemOperand::MOStore | MachineMemOperand::MOVolatile,
2281 Size: SlotSize, BaseAlignment: Align(SlotSize)));
2282 }
2283
2284 // Realign stack after we spilled callee-saved registers (so that we'll be
2285 // able to calculate their offsets from the frame pointer).
2286 // Win64 requires aligning the stack after the prologue.
2287 if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
2288 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2289 BuildStackAlignAND(MBB, MBBI, DL, Reg: SPOrEstablisher, MaxAlign);
2290 }
2291
2292 // We already dealt with stack realignment and funclets above.
2293 if (IsFunclet && STI.is32Bit())
2294 return;
2295
2296 // If we need a base pointer, set it up here. It's whatever the value
2297 // of the stack pointer is at this point. Any variable size objects
2298 // will be allocated after this, so we can still use the base pointer
2299 // to reference locals.
2300 if (TRI->hasBasePointer(MF)) {
2301 // Update the base pointer with the current stack pointer.
2302 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
2303 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: BasePtr)
2304 .addReg(RegNo: SPOrEstablisher)
2305 .setMIFlag(MachineInstr::FrameSetup);
2306 if (X86FI->getRestoreBasePointer()) {
2307 // Stash value of base pointer. Saving RSP instead of EBP shortens
2308 // dependence chain. Used by SjLj EH.
2309 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2310 addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opm)), Reg: FramePtr, isKill: true,
2311 Offset: X86FI->getRestoreBasePointerOffset())
2312 .addReg(RegNo: SPOrEstablisher)
2313 .setMIFlag(MachineInstr::FrameSetup);
2314 }
2315
2316 if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
2317 // Stash the value of the frame pointer relative to the base pointer for
2318 // Win32 EH. This supports Win32 EH, which does the inverse of the above:
2319 // it recovers the frame pointer from the base pointer rather than the
2320 // other way around.
2321 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2322 Register UsedReg;
2323 int Offset =
2324 getFrameIndexReference(MF, FI: X86FI->getSEHFramePtrSaveIndex(), FrameReg&: UsedReg)
2325 .getFixed();
2326 assert(UsedReg == BasePtr);
2327 addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opm)), Reg: UsedReg, isKill: true, Offset)
2328 .addReg(RegNo: FramePtr)
2329 .setMIFlag(MachineInstr::FrameSetup);
2330 }
2331 }
2332 if (ArgBaseReg.isValid()) {
2333 // Save argument base pointer.
2334 auto *MI = X86FI->getStackPtrSaveMI();
2335 int FI = MI->getOperand(i: 1).getIndex();
2336 unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
2337 // movl %basereg, offset(%ebp)
2338 addFrameReference(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: MOVmr)), FI)
2339 .addReg(RegNo: ArgBaseReg)
2340 .setMIFlag(MachineInstr::FrameSetup);
2341 }
2342
2343 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
2344 // Mark end of stack pointer adjustment.
2345 if (!HasFP && NumBytes) {
2346 // Define the current CFA rule to use the provided offset.
2347 assert(StackSize);
2348 BuildCFI(
2349 MBB, MBBI, DL,
2350 CFIInst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: StackSize - stackGrowth),
2351 Flag: MachineInstr::FrameSetup);
2352 }
2353
2354 // Emit DWARF info specifying the offsets of the callee-saved registers.
2355 emitCalleeSavedFrameMoves(MBB, MBBI, DL, IsPrologue: true);
2356 }
2357
2358 // X86 Interrupt handling function cannot assume anything about the direction
2359 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
2360 // in each prologue of interrupt handler function.
2361 //
2362 // Create "cld" instruction only in these cases:
2363 // 1. The interrupt handling function uses any of the "rep" instructions.
2364 // 2. Interrupt handling function calls another function.
2365 // 3. If there are any inline asm blocks, as we do not know what they do
2366 //
2367 // TODO: We should also emit cld if we detect the use of std, but as of now,
2368 // the compiler does not even emit that instruction or even define it, so in
2369 // practice, this would only happen with inline asm, which we cover anyway.
2370 if (Fn.getCallingConv() == CallingConv::X86_INTR) {
2371 bool NeedsCLD = false;
2372
2373 for (const MachineBasicBlock &B : MF) {
2374 for (const MachineInstr &MI : B) {
2375 if (MI.isCall()) {
2376 NeedsCLD = true;
2377 break;
2378 }
2379
2380 if (isOpcodeRep(Opcode: MI.getOpcode())) {
2381 NeedsCLD = true;
2382 break;
2383 }
2384
2385 if (MI.isInlineAsm()) {
2386 // TODO: Parse asm for rep instructions or call sites?
2387 // For now, let's play it safe and emit a cld instruction
2388 // just in case.
2389 NeedsCLD = true;
2390 break;
2391 }
2392 }
2393 }
2394
2395 if (NeedsCLD) {
2396 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::CLD))
2397 .setMIFlag(MachineInstr::FrameSetup);
2398 }
2399 }
2400
2401 // At this point we know if the function has WinCFI or not.
2402 MF.setHasWinCFI(HasWinCFI);
2403}
2404
2405bool X86FrameLowering::canUseLEAForSPInEpilogue(
2406 const MachineFunction &MF) const {
2407 // We can't use LEA instructions for adjusting the stack pointer if we don't
2408 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
2409 // to deallocate the stack.
2410 // This means that we can use LEA for SP in two situations:
2411 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
2412 // 2. We *have* a frame pointer which means we are permitted to use LEA.
2413 return !MF.getTarget().getMCAsmInfo().usesWindowsCFI() || hasFP(MF);
2414}
2415
2416static bool isFuncletReturnInstr(MachineInstr &MI) {
2417 switch (MI.getOpcode()) {
2418 case X86::CATCHRET:
2419 case X86::CLEANUPRET:
2420 return true;
2421 default:
2422 return false;
2423 }
2424 llvm_unreachable("impossible");
2425}
2426
2427// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
2428// stack. It holds a pointer to the bottom of the root function frame. The
2429// establisher frame pointer passed to a nested funclet may point to the
2430// (mostly empty) frame of its parent funclet, but it will need to find
2431// the frame of the root function to access locals. To facilitate this,
2432// every funclet copies the pointer to the bottom of the root function
2433// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
2434// same offset for the PSPSym in the root function frame that's used in the
2435// funclets' frames allows each funclet to dynamically accept any ancestor
2436// frame as its establisher argument (the runtime doesn't guarantee the
2437// immediate parent for some reason lost to history), and also allows the GC,
2438// which uses the PSPSym for some bookkeeping, to find it in any funclet's
2439// frame with only a single offset reported for the entire method.
2440unsigned
2441X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
2442 const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
2443 Register SPReg;
2444 int Offset = getFrameIndexReferencePreferSP(MF, FI: Info.PSPSymFrameIdx, FrameReg&: SPReg,
2445 /*IgnoreSPUpdates*/ true)
2446 .getFixed();
2447 assert(Offset >= 0 && SPReg == TRI->getStackRegister());
2448 return static_cast<unsigned>(Offset);
2449}
2450
2451unsigned
2452X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
2453 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
2454 // This is the size of the pushed CSRs.
2455 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2456 // This is the size of callee saved XMMs.
2457 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2458 unsigned XMMSize =
2459 WinEHXMMSlotInfo.size() * TRI->getSpillSize(RC: X86::VR128RegClass);
2460 // This is the amount of stack a funclet needs to allocate.
2461 unsigned UsedSize;
2462 EHPersonality Personality =
2463 classifyEHPersonality(Pers: MF.getFunction().getPersonalityFn());
2464 if (Personality == EHPersonality::CoreCLR) {
2465 // CLR funclets need to hold enough space to include the PSPSym, at the
2466 // same offset from the stack pointer (immediately after the prolog) as it
2467 // resides at in the main function.
2468 UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
2469 } else {
2470 // Other funclets just need enough stack for outgoing call arguments.
2471 UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
2472 }
2473 // RBP is not included in the callee saved register block. After pushing RBP,
2474 // everything is 16 byte aligned. Everything we allocate before an outgoing
2475 // call must also be 16 byte aligned.
2476 unsigned FrameSizeMinusRBP = alignTo(Size: CSSize + UsedSize, A: getStackAlign());
2477 // Subtract out the size of the callee saved registers. This is how much stack
2478 // each funclet will allocate.
2479 return FrameSizeMinusRBP + XMMSize - CSSize;
2480}
2481
2482static bool isTailCallOpcode(unsigned Opc) {
2483 return Opc == X86::TCRETURNri || Opc == X86::TCRETURN_WIN64ri ||
2484 Opc == X86::TCRETURN_HIPE32ri || Opc == X86::TCRETURNdi ||
2485 Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
2486 Opc == X86::TCRETURNri64_ImpCall || Opc == X86::TCRETURNdi64 ||
2487 Opc == X86::TCRETURNmi64 || Opc == X86::TCRETURN_WINmi64;
2488}
2489
2490void X86FrameLowering::emitEpilogue(MachineFunction &MF,
2491 MachineBasicBlock &MBB) const {
2492 const MachineFrameInfo &MFI = MF.getFrameInfo();
2493 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
2494 MachineBasicBlock::iterator Terminator = MBB.getFirstTerminator();
2495 MachineBasicBlock::iterator MBBI = Terminator;
2496 DebugLoc DL;
2497 if (MBBI != MBB.end())
2498 DL = MBBI->getDebugLoc();
2499 // standard x86_64 uses 64-bit frame/stack pointers, x32 - 32-bit.
2500 const bool Is64BitILP32 = STI.isTarget64BitILP32();
2501 Register FramePtr = TRI->getFrameRegister(MF);
2502 Register MachineFramePtr =
2503 Is64BitILP32 ? Register(getX86SubSuperRegister(Reg: FramePtr, Size: 64)) : FramePtr;
2504
2505 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo().usesWindowsCFI();
2506 bool NeedsWin64CFI =
2507 IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
2508 // For V3 unwind, epilog SEH pseudos are emitted inline before each
2509 // unwind-effecting instruction.
2510 bool IsWin64UnwindV3 =
2511 NeedsWin64CFI && MF.hasWinCFI() &&
2512 MF.getFunction().getParent()->getWinX64EHUnwindMode() ==
2513 WinX64EHUnwindMode::V3;
2514 bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(MI&: *MBBI);
2515
2516 // Get the number of bytes to allocate from the FrameInfo.
2517 uint64_t StackSize = MFI.getStackSize();
2518 uint64_t MaxAlign = calculateMaxStackAlign(MF);
2519 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2520 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
2521 bool HasFP = hasFP(MF);
2522 uint64_t NumBytes = 0;
2523
2524 bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
2525 !MF.getTarget().getTargetTriple().isOSWindows() &&
2526 !MF.getTarget().getTargetTriple().isUEFI()) &&
2527 MF.needsFrameMoves();
2528
2529 Register ArgBaseReg;
2530 if (auto *MI = X86FI->getStackPtrSaveMI()) {
2531 unsigned Opc = X86::LEA32r;
2532 Register StackReg = X86::ESP;
2533 ArgBaseReg = MI->getOperand(i: 0).getReg();
2534 if (STI.is64Bit()) {
2535 Opc = X86::LEA64r;
2536 StackReg = X86::RSP;
2537 }
2538 // leal -4(%basereg), %esp
2539 // .cfi_def_cfa %esp, 4
2540 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: StackReg)
2541 .addUse(RegNo: ArgBaseReg)
2542 .addImm(Val: 1)
2543 .addUse(RegNo: X86::NoRegister)
2544 .addImm(Val: -(int64_t)SlotSize)
2545 .addUse(RegNo: X86::NoRegister)
2546 .setMIFlag(MachineInstr::FrameDestroy);
2547 if (NeedsDwarfCFI) {
2548 unsigned DwarfStackPtr = TRI->getDwarfRegNum(Reg: StackReg, isEH: true);
2549 BuildCFI(MBB, MBBI, DL,
2550 CFIInst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: DwarfStackPtr, Offset: SlotSize),
2551 Flag: MachineInstr::FrameDestroy);
2552 --MBBI;
2553 }
2554 --MBBI;
2555 }
2556
2557 if (IsFunclet) {
2558 assert(HasFP && "EH funclets without FP not yet implemented");
2559 NumBytes = getWinEHFuncletFrameSize(MF);
2560 } else if (HasFP) {
2561 // Calculate required stack adjustment.
2562 uint64_t FrameSize = StackSize - SlotSize;
2563 NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
2564
2565 // Callee-saved registers were pushed on stack before the stack was
2566 // realigned.
2567 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
2568 NumBytes = alignTo(Value: FrameSize, Align: MaxAlign);
2569 } else {
2570 NumBytes = StackSize - CSSize - TailCallArgReserveSize;
2571 }
2572 uint64_t SEHStackAllocAmt = NumBytes;
2573
2574 unsigned SEHFrameOffset = 0;
2575 if (IsWin64Prologue && HasFP)
2576 SEHFrameOffset = calculateSetFPREG(SPAdjust: SEHStackAllocAmt);
2577
2578 // AfterPop is the position to insert .cfi_restore.
2579 MachineBasicBlock::iterator AfterPop = MBBI;
2580 if (HasFP) {
2581 if (X86FI->hasSwiftAsyncContext()) {
2582 // Discard the context.
2583 int64_t Offset = mergeSPAdd(MBB, MBBI, AddOffset: 16, doMergeWithPrevious: true);
2584 emitSPUpdate(MBB, MBBI, DL, NumBytes: Offset, /*InEpilogue*/ true);
2585 }
2586 // Pop EBP.
2587 if (IsWin64UnwindV3)
2588 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_PushReg))
2589 .addImm(Val: FramePtr)
2590 .setMIFlag(MachineInstr::FrameDestroy);
2591 BuildMI(BB&: MBB, I: MBBI, MIMD: DL,
2592 MCID: TII.get(Opcode: getPOPOpcode(ST: MF.getSubtarget<X86Subtarget>())),
2593 DestReg: MachineFramePtr)
2594 .setMIFlag(MachineInstr::FrameDestroy);
2595
2596 // We need to reset FP to its untagged state on return. Bit 60 is currently
2597 // used to show the presence of an extended frame.
2598 if (X86FI->hasSwiftAsyncContext()) {
2599 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::BTR64ri8), DestReg: MachineFramePtr)
2600 .addUse(RegNo: MachineFramePtr)
2601 .addImm(Val: 60)
2602 .setMIFlag(MachineInstr::FrameDestroy);
2603 }
2604
2605 if (NeedsDwarfCFI) {
2606 if (!ArgBaseReg.isValid()) {
2607 unsigned DwarfStackPtr =
2608 TRI->getDwarfRegNum(Reg: Is64Bit ? X86::RSP : X86::ESP, isEH: true);
2609 BuildCFI(MBB, MBBI, DL,
2610 CFIInst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: DwarfStackPtr, Offset: SlotSize),
2611 Flag: MachineInstr::FrameDestroy);
2612 }
2613 if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
2614 unsigned DwarfFramePtr = TRI->getDwarfRegNum(Reg: MachineFramePtr, isEH: true);
2615 BuildCFI(MBB, MBBI: AfterPop, DL,
2616 CFIInst: MCCFIInstruction::createRestore(L: nullptr, Register: DwarfFramePtr),
2617 Flag: MachineInstr::FrameDestroy);
2618 --MBBI;
2619 --AfterPop;
2620 }
2621 --MBBI;
2622 }
2623 }
2624
2625 MachineBasicBlock::iterator FirstCSPop = MBBI;
2626 // Skip the callee-saved pop instructions.
2627 while (MBBI != MBB.begin()) {
2628 MachineBasicBlock::iterator PI = std::prev(x: MBBI);
2629 unsigned Opc = PI->getOpcode();
2630
2631 if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
2632 if (!PI->getFlag(Flag: MachineInstr::FrameDestroy) ||
2633 (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::BTR64ri8 &&
2634 Opc != X86::ADD64ri32 && Opc != X86::POPP64r && Opc != X86::POP2 &&
2635 Opc != X86::POP2P && Opc != X86::LEA64r && Opc != X86::SEH_PushReg &&
2636 Opc != X86::SEH_Push2Regs && Opc != X86::SEH_StackAlloc &&
2637 Opc != X86::ADD64ri32_NF))
2638 break;
2639 FirstCSPop = PI;
2640 }
2641
2642 --MBBI;
2643 }
2644 if (ArgBaseReg.isValid()) {
2645 // Restore argument base pointer.
2646 auto *MI = X86FI->getStackPtrSaveMI();
2647 int FI = MI->getOperand(i: 1).getIndex();
2648 unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
2649 // movl offset(%ebp), %basereg
2650 addFrameReference(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: MOVrm), DestReg: ArgBaseReg), FI)
2651 .setMIFlag(MachineInstr::FrameDestroy);
2652 }
2653 MBBI = FirstCSPop;
2654
2655 if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
2656 emitCatchRetReturnValue(MBB, MBBI: FirstCSPop, CatchRet: &*Terminator);
2657
2658 if (MBBI != MBB.end())
2659 DL = MBBI->getDebugLoc();
2660 // If there is an ADD32ri or SUB32ri of ESP immediately before this
2661 // instruction, merge the two instructions.
2662 if (NumBytes || MFI.hasVarSizedObjects())
2663 NumBytes = mergeSPAdd(MBB, MBBI, AddOffset: NumBytes, doMergeWithPrevious: true);
2664
2665 if (IsWin64UnwindV3 && NeedsWin64CFI && MF.hasWinCFI()) {
2666 // Find the XMM restores that were tagged with FrameDestroy, now that we
2667 // know the offset we can emit the SEH pseudos for them.
2668 auto EpilogStart = MBBI;
2669 {
2670 auto ScanIt = MBBI;
2671 while (ScanIt != MBB.begin()) {
2672 auto PI = std::prev(x: ScanIt);
2673 int FI;
2674 if (PI->getFlag(Flag: MachineInstr::FrameDestroy) &&
2675 TII.isLoadFromStackSlot(MI: *PI, FrameIndex&: FI)) {
2676 Register Reg = PI->getOperand(i: 0).getReg();
2677 if (X86::FR64RegClass.contains(Reg)) {
2678 Register IgnoredFrameReg;
2679 int Offset =
2680 getFrameIndexReference(MF, FI, FrameReg&: IgnoredFrameReg).getFixed() +
2681 SEHFrameOffset;
2682 BuildMI(BB&: MBB, I: PI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_SaveXMM))
2683 .addImm(Val: Reg)
2684 .addImm(Val: Offset)
2685 .setMIFlag(MachineInstr::FrameDestroy);
2686 // std::prev(PI) is the SEH_SaveXMM we just inserted (before PI).
2687 // We start ScanIt from that point so that the next
2688 // std::prev(ScanIt) will examine the instruction before the pseudo,
2689 // i.e. the next potential XMM restore further up the block.
2690 EpilogStart = std::prev(x: PI);
2691 ScanIt = EpilogStart;
2692 continue;
2693 }
2694 }
2695 break;
2696 }
2697 }
2698
2699 // For V3, SEH_BeginEpilogue must be emitted before any epilog SEH pseudos.
2700 BuildMI(BB&: MBB, I: EpilogStart, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_BeginEpilogue));
2701 }
2702
2703 // If dynamic alloca is used, then reset esp to point to the last callee-saved
2704 // slot before popping them off! Same applies for the case, when stack was
2705 // realigned. Don't do this if this was a funclet epilogue, since the funclets
2706 // will not do realignment or dynamic stack allocation.
2707 if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
2708 !IsFunclet) {
2709 if (TRI->hasStackRealignment(MF))
2710 MBBI = FirstCSPop;
2711 uint64_t LEAAmount =
2712 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
2713
2714 if (X86FI->hasSwiftAsyncContext())
2715 LEAAmount -= 16;
2716
2717 // There are only two legal forms of epilogue:
2718 // - add SEHAllocationSize, %rsp
2719 // - lea SEHAllocationSize(%FramePtr), %rsp
2720 //
2721 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
2722 // However, we may use this sequence if we have a frame pointer because the
2723 // effects of the prologue can safely be undone.
2724 if (IsWin64UnwindV3) {
2725 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_SetFrame))
2726 .addImm(Val: FramePtr)
2727 .addImm(Val: SEHFrameOffset)
2728 .setMIFlag(MachineInstr::FrameDestroy);
2729 if (SEHStackAllocAmt)
2730 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_StackAlloc))
2731 .addImm(Val: SEHStackAllocAmt)
2732 .setMIFlag(MachineInstr::FrameDestroy);
2733 }
2734 if (LEAAmount != 0) {
2735 unsigned Opc = getLEArOpcode(IsLP64: Uses64BitFramePtr);
2736 addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: StackPtr), Reg: FramePtr,
2737 isKill: false, Offset: LEAAmount);
2738 --MBBI;
2739 } else {
2740 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
2741 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: StackPtr).addReg(RegNo: FramePtr);
2742 --MBBI;
2743 }
2744 } else if (NumBytes) {
2745 // Adjust stack pointer back: ESP += numbytes.
2746 if (IsWin64UnwindV3)
2747 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_StackAlloc))
2748 .addImm(Val: NumBytes)
2749 .setMIFlag(MachineInstr::FrameDestroy);
2750 emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
2751 if (!HasFP && NeedsDwarfCFI) {
2752 // Define the current CFA rule to use the provided offset.
2753 BuildCFI(MBB, MBBI, DL,
2754 CFIInst: MCCFIInstruction::cfiDefCfaOffset(
2755 L: nullptr, Offset: CSSize + TailCallArgReserveSize + SlotSize),
2756 Flag: MachineInstr::FrameDestroy);
2757 }
2758 --MBBI;
2759 }
2760
2761 // For V1/V2, emit SEH_BeginEpilogue after stack restore code.
2762 if (!IsWin64UnwindV3 && NeedsWin64CFI && MF.hasWinCFI())
2763 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_BeginEpilogue));
2764
2765 if (!HasFP && NeedsDwarfCFI) {
2766 MBBI = FirstCSPop;
2767 int64_t Offset = -(int64_t)CSSize - SlotSize;
2768 // Mark callee-saved pop instruction.
2769 // Define the current CFA rule to use the provided offset.
2770 while (MBBI != MBB.end()) {
2771 MachineBasicBlock::iterator PI = MBBI;
2772 unsigned Opc = PI->getOpcode();
2773 ++MBBI;
2774 if (Opc == X86::POP32r || Opc == X86::POP64r || Opc == X86::POPP64r ||
2775 Opc == X86::POP2 || Opc == X86::POP2P) {
2776 Offset += SlotSize;
2777 // Compared to pop, pop2 introduces more stack offset (one more
2778 // register).
2779 if (Opc == X86::POP2 || Opc == X86::POP2P)
2780 Offset += SlotSize;
2781 BuildCFI(MBB, MBBI, DL,
2782 CFIInst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -Offset),
2783 Flag: MachineInstr::FrameDestroy);
2784 }
2785 }
2786 }
2787
2788 // Emit DWARF info specifying the restores of the callee-saved registers.
2789 // For epilogue with return inside or being other block without successor,
2790 // no need to generate .cfi_restore for callee-saved registers.
2791 if (NeedsDwarfCFI && !MBB.succ_empty())
2792 emitCalleeSavedFrameMoves(MBB, MBBI: AfterPop, DL, IsPrologue: false);
2793
2794 if (Terminator == MBB.end() || !isTailCallOpcode(Opc: Terminator->getOpcode())) {
2795 // Add the return addr area delta back since we are not tail calling.
2796 int64_t Delta = X86FI->getTCReturnAddrDelta();
2797 assert(Delta <= 0 && "TCDelta should never be positive");
2798 if (Delta) {
2799 // Check for possible merge with preceding ADD instruction.
2800 int64_t Offset = mergeSPAdd(MBB, MBBI&: Terminator, AddOffset: -Delta, doMergeWithPrevious: true);
2801 emitSPUpdate(MBB, MBBI&: Terminator, DL, NumBytes: Offset, /*InEpilogue=*/true);
2802 }
2803 }
2804
2805 // Emit tilerelease for AMX kernel.
2806 if (X86FI->getAMXProgModel() == AMXProgModelEnum::ManagedRA)
2807 BuildMI(BB&: MBB, I: Terminator, MIMD: DL, MCID: TII.get(Opcode: X86::TILERELEASE));
2808
2809 if (NeedsWin64CFI && MF.hasWinCFI())
2810 BuildMI(BB&: MBB, I: Terminator, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_EndEpilogue));
2811}
2812
2813StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF,
2814 int FI,
2815 Register &FrameReg) const {
2816 const MachineFrameInfo &MFI = MF.getFrameInfo();
2817
2818 bool IsFixed = MFI.isFixedObjectIndex(ObjectIdx: FI);
2819 // We can't calculate offset from frame pointer if the stack is realigned,
2820 // so enforce usage of stack/base pointer. The base pointer is used when we
2821 // have dynamic allocas in addition to dynamic realignment.
2822 if (TRI->hasBasePointer(MF))
2823 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
2824 else if (TRI->hasStackRealignment(MF))
2825 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
2826 else
2827 FrameReg = TRI->getFrameRegister(MF);
2828
2829 // Offset will hold the offset from the stack pointer at function entry to the
2830 // object.
2831 // We need to factor in additional offsets applied during the prologue to the
2832 // frame, base, and stack pointer depending on which is used.
2833 int64_t Offset = MFI.getObjectOffset(ObjectIdx: FI) - getOffsetOfLocalArea();
2834 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
2835 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2836 uint64_t StackSize = MFI.getStackSize();
2837 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo().usesWindowsCFI();
2838 int64_t FPDelta = 0;
2839
2840 // In an x86 interrupt, remove the offset we added to account for the return
2841 // address from any stack object allocated in the caller's frame. Interrupts
2842 // do not have a standard return address. Fixed objects in the current frame,
2843 // such as SSE register spills, should not get this treatment.
2844 if (MF.getFunction().getCallingConv() == CallingConv::X86_INTR &&
2845 Offset >= 0) {
2846 Offset += getOffsetOfLocalArea();
2847 }
2848
2849 if (IsWin64Prologue) {
2850 assert(!MFI.hasCalls() || (StackSize % 16) == 8);
2851
2852 // Calculate required stack adjustment.
2853 uint64_t FrameSize = StackSize - SlotSize;
2854 // If required, include space for extra hidden slot for stashing base
2855 // pointer.
2856 if (X86FI->getRestoreBasePointer())
2857 FrameSize += SlotSize;
2858 uint64_t NumBytes = FrameSize - CSSize;
2859
2860 uint64_t SEHFrameOffset = calculateSetFPREG(SPAdjust: NumBytes);
2861 if (FI && FI == X86FI->getFAIndex())
2862 return StackOffset::getFixed(Fixed: -SEHFrameOffset);
2863
2864 // FPDelta is the offset from the "traditional" FP location of the old base
2865 // pointer followed by return address and the location required by the
2866 // restricted Win64 prologue.
2867 // Add FPDelta to all offsets below that go through the frame pointer.
2868 FPDelta = FrameSize - SEHFrameOffset;
2869 assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
2870 "FPDelta isn't aligned per the Win64 ABI!");
2871 }
2872
2873 if (FrameReg == TRI->getFramePtr()) {
2874 // Skip saved EBP/RBP
2875 Offset += SlotSize;
2876
2877 // Account for restricted Windows prologue.
2878 Offset += FPDelta;
2879
2880 // Skip the RETADDR move area
2881 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2882 if (TailCallReturnAddrDelta < 0)
2883 Offset -= TailCallReturnAddrDelta;
2884
2885 return StackOffset::getFixed(Fixed: Offset);
2886 }
2887
2888 // FrameReg is either the stack pointer or a base pointer. But the base is
2889 // located at the end of the statically known StackSize so the distinction
2890 // doesn't really matter.
2891 if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
2892 assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
2893 return StackOffset::getFixed(Fixed: Offset + StackSize);
2894}
2895
2896int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, int FI,
2897 Register &FrameReg) const {
2898 const MachineFrameInfo &MFI = MF.getFrameInfo();
2899 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
2900 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2901 const auto it = WinEHXMMSlotInfo.find(Val: FI);
2902
2903 if (it == WinEHXMMSlotInfo.end())
2904 return getFrameIndexReference(MF, FI, FrameReg).getFixed();
2905
2906 FrameReg = TRI->getStackRegister();
2907 return alignDown(Value: MFI.getMaxCallFrameSize(), Align: getStackAlign().value()) +
2908 it->second;
2909}
2910
2911StackOffset
2912X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF, int FI,
2913 Register &FrameReg,
2914 int Adjustment) const {
2915 const MachineFrameInfo &MFI = MF.getFrameInfo();
2916 FrameReg = TRI->getStackRegister();
2917 return StackOffset::getFixed(Fixed: MFI.getObjectOffset(ObjectIdx: FI) -
2918 getOffsetOfLocalArea() + Adjustment);
2919}
2920
2921StackOffset
2922X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF,
2923 int FI, Register &FrameReg,
2924 bool IgnoreSPUpdates) const {
2925
2926 const MachineFrameInfo &MFI = MF.getFrameInfo();
2927 // Does not include any dynamic realign.
2928 const uint64_t StackSize = MFI.getStackSize();
2929 // LLVM arranges the stack as follows:
2930 // ...
2931 // ARG2
2932 // ARG1
2933 // RETADDR
2934 // PUSH RBP <-- RBP points here
2935 // PUSH CSRs
2936 // ~~~~~~~ <-- possible stack realignment (non-win64)
2937 // ...
2938 // STACK OBJECTS
2939 // ... <-- RSP after prologue points here
2940 // ~~~~~~~ <-- possible stack realignment (win64)
2941 //
2942 // if (hasVarSizedObjects()):
2943 // ... <-- "base pointer" (ESI/RBX) points here
2944 // DYNAMIC ALLOCAS
2945 // ... <-- RSP points here
2946 //
2947 // Case 1: In the simple case of no stack realignment and no dynamic
2948 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
2949 // with fixed offsets from RSP.
2950 //
2951 // Case 2: In the case of stack realignment with no dynamic allocas, fixed
2952 // stack objects are addressed with RBP and regular stack objects with RSP.
2953 //
2954 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
2955 // to address stack arguments for outgoing calls and nothing else. The "base
2956 // pointer" points to local variables, and RBP points to fixed objects.
2957 //
2958 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
2959 // answer we give is relative to the SP after the prologue, and not the
2960 // SP in the middle of the function.
2961
2962 if (MFI.isFixedObjectIndex(ObjectIdx: FI) && TRI->hasStackRealignment(MF) &&
2963 !STI.isTargetWin64())
2964 return getFrameIndexReference(MF, FI, FrameReg);
2965
2966 // If !hasReservedCallFrame the function might have SP adjustement in the
2967 // body. So, even though the offset is statically known, it depends on where
2968 // we are in the function.
2969 if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
2970 return getFrameIndexReference(MF, FI, FrameReg);
2971
2972 // We don't handle tail calls, and shouldn't be seeing them either.
2973 assert(MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta() >= 0 &&
2974 "we don't handle this case!");
2975
2976 // This is how the math works out:
2977 //
2978 // %rsp grows (i.e. gets lower) left to right. Each box below is
2979 // one word (eight bytes). Obj0 is the stack slot we're trying to
2980 // get to.
2981 //
2982 // ----------------------------------
2983 // | BP | Obj0 | Obj1 | ... | ObjN |
2984 // ----------------------------------
2985 // ^ ^ ^ ^
2986 // A B C E
2987 //
2988 // A is the incoming stack pointer.
2989 // (B - A) is the local area offset (-8 for x86-64) [1]
2990 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
2991 //
2992 // |(E - B)| is the StackSize (absolute value, positive). For a
2993 // stack that grown down, this works out to be (B - E). [3]
2994 //
2995 // E is also the value of %rsp after stack has been set up, and we
2996 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
2997 // (C - E) == (C - A) - (B - A) + (B - E)
2998 // { Using [1], [2] and [3] above }
2999 // == getObjectOffset - LocalAreaOffset + StackSize
3000
3001 return getFrameIndexReferenceSP(MF, FI, FrameReg, Adjustment: StackSize);
3002}
3003
3004bool X86FrameLowering::assignCalleeSavedSpillSlots(
3005 MachineFunction &MF, const TargetRegisterInfo *TRI,
3006 std::vector<CalleeSavedInfo> &CSI) const {
3007 MachineFrameInfo &MFI = MF.getFrameInfo();
3008 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
3009
3010 unsigned CalleeSavedFrameSize = 0;
3011 unsigned XMMCalleeSavedFrameSize = 0;
3012 auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
3013 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
3014
3015 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
3016
3017 if (TailCallReturnAddrDelta < 0) {
3018 // create RETURNADDR area
3019 // arg
3020 // arg
3021 // RETADDR
3022 // { ...
3023 // RETADDR area
3024 // ...
3025 // }
3026 // [EBP]
3027 MFI.CreateFixedObject(Size: -TailCallReturnAddrDelta,
3028 SPOffset: TailCallReturnAddrDelta - SlotSize, IsImmutable: true);
3029 }
3030
3031 // Spill the BasePtr if it's used.
3032 if (this->TRI->hasBasePointer(MF)) {
3033 // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
3034 if (MF.hasEHFunclets()) {
3035 int FI = MFI.CreateSpillStackObject(Size: SlotSize, Alignment: Align(SlotSize));
3036 X86FI->setHasSEHFramePtrSave(true);
3037 X86FI->setSEHFramePtrSaveIndex(FI);
3038 }
3039 }
3040
3041 bool IsFPRemovedFromCSI = false;
3042 if (hasFP(MF)) {
3043 // emitPrologue always spills frame register the first thing.
3044 SpillSlotOffset -= SlotSize;
3045 MFI.CreateFixedSpillStackObject(Size: SlotSize, SPOffset: SpillSlotOffset);
3046
3047 // The async context lives directly before the frame pointer, and we
3048 // allocate a second slot to preserve stack alignment.
3049 if (X86FI->hasSwiftAsyncContext()) {
3050 SpillSlotOffset -= SlotSize;
3051 MFI.CreateFixedSpillStackObject(Size: SlotSize, SPOffset: SpillSlotOffset);
3052 SpillSlotOffset -= SlotSize;
3053 }
3054
3055 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
3056 // the frame register, we can delete it from CSI list and not have to worry
3057 // about avoiding it later.
3058 Register FPReg = TRI->getFrameRegister(MF);
3059 for (unsigned i = 0; i < CSI.size(); ++i) {
3060 if (TRI->regsOverlap(RegA: CSI[i].getReg(), RegB: FPReg)) {
3061 CSI.erase(position: CSI.begin() + i);
3062 IsFPRemovedFromCSI = true;
3063 break;
3064 }
3065 }
3066 }
3067
3068 // Strategy:
3069 // 1. Use push2 when
3070 // a) number of CSR > 1 if no need padding
3071 // b) number of CSR > 2 if need padding
3072 // c) stack alignment >= 16 bytes
3073 // 2. When the number of CSR push is odd
3074 // a. Start to use push2 from the 1st push if stack is 16B aligned.
3075 // b. Start to use push2 from the 2nd push if stack is not 16B aligned.
3076 // 3. When the number of CSR push is even, start to use push2 from the 1st
3077 // push and make the stack 16B aligned before the push
3078 unsigned NumRegsForPush2 = 0;
3079 if (STI.hasPush2Pop2() && getStackAlignment() >= 16) {
3080 unsigned NumCSGPR = llvm::count_if(Range&: CSI, P: [](const CalleeSavedInfo &I) {
3081 return X86::GR64RegClass.contains(Reg: I.getReg());
3082 });
3083 bool UsePush2Pop2 = !IsFPRemovedFromCSI ? NumCSGPR > 2 : NumCSGPR > 1;
3084 NumRegsForPush2 =
3085 UsePush2Pop2
3086 ? alignDown(Value: IsFPRemovedFromCSI ? NumCSGPR : NumCSGPR - 1, Align: 2)
3087 : 0;
3088 }
3089
3090 // Assign slots for GPRs. It increases frame size.
3091 for (CalleeSavedInfo &I : llvm::reverse(C&: CSI)) {
3092 MCRegister Reg = I.getReg();
3093
3094 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3095 continue;
3096
3097 // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned
3098 // or only an odd number of registers in the candidates.
3099 if (X86FI->getNumCandidatesForPush2Pop2() < NumRegsForPush2 &&
3100 (SpillSlotOffset % 16 == 0 ||
3101 X86FI->getNumCandidatesForPush2Pop2() % 2))
3102 X86FI->addCandidateForPush2Pop2(Reg);
3103
3104 SpillSlotOffset -= SlotSize;
3105 CalleeSavedFrameSize += SlotSize;
3106
3107 int SlotIndex = MFI.CreateFixedSpillStackObject(Size: SlotSize, SPOffset: SpillSlotOffset);
3108 I.setFrameIdx(SlotIndex);
3109 }
3110
3111 // Adjust the offset of spill slot as we know the accurate callee saved frame
3112 // size.
3113 if (X86FI->getRestoreBasePointer()) {
3114 SpillSlotOffset -= SlotSize;
3115 CalleeSavedFrameSize += SlotSize;
3116
3117 MFI.CreateFixedSpillStackObject(Size: SlotSize, SPOffset: SpillSlotOffset);
3118 // TODO: saving the slot index is better?
3119 X86FI->setRestoreBasePointer(CalleeSavedFrameSize);
3120 }
3121 assert(X86FI->getNumCandidatesForPush2Pop2() % 2 == 0 &&
3122 "Expect even candidates for push2/pop2");
3123 if (X86FI->getNumCandidatesForPush2Pop2())
3124 ++NumFunctionUsingPush2Pop2;
3125 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
3126 MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
3127
3128 // Assign slots for XMMs.
3129 for (CalleeSavedInfo &I : llvm::reverse(C&: CSI)) {
3130 MCRegister Reg = I.getReg();
3131 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3132 continue;
3133
3134 const TargetRegisterClass *RC = getCalleeSavedSpillRC(Reg, STI, TRI: *TRI);
3135 unsigned Size = TRI->getSpillSize(RC: *RC);
3136 Align Alignment = TRI->getSpillAlign(RC: *RC);
3137 // ensure alignment
3138 assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
3139 SpillSlotOffset = -alignTo(Size: -SpillSlotOffset, A: Alignment);
3140
3141 // spill into slot
3142 SpillSlotOffset -= Size;
3143 int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SPOffset: SpillSlotOffset);
3144 I.setFrameIdx(SlotIndex);
3145 MFI.ensureMaxAlignment(Alignment);
3146
3147 // Save the start offset and size of XMM in stack frame for funclets.
3148 if (X86::VR128RegClass.contains(Reg)) {
3149 WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
3150 XMMCalleeSavedFrameSize += Size;
3151 }
3152 }
3153
3154 return true;
3155}
3156
3157bool X86FrameLowering::spillCalleeSavedRegisters(
3158 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
3159 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
3160 DebugLoc DL = MBB.findDebugLoc(MBBI: MI);
3161
3162 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
3163 // for us, and there are no XMM CSRs on Win32.
3164 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
3165 return true;
3166
3167 // Push GPRs. It increases frame size.
3168 const MachineFunction &MF = *MBB.getParent();
3169 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
3170
3171 // Update LiveIn of the basic block and decide whether we can add a kill flag
3172 // to the use.
3173 auto UpdateLiveInCheckCanKill = [&](Register Reg) {
3174 const MachineRegisterInfo &MRI = MF.getRegInfo();
3175 // Do not set a kill flag on values that are also marked as live-in. This
3176 // happens with the @llvm-returnaddress intrinsic and with arguments
3177 // passed in callee saved registers.
3178 // Omitting the kill flags is conservatively correct even if the live-in
3179 // is not used after all.
3180 if (MRI.isLiveIn(Reg))
3181 return false;
3182 MBB.addLiveIn(PhysReg: Reg);
3183 // Check if any subregister is live-in
3184 for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg)
3185 if (MRI.isLiveIn(Reg: *AReg))
3186 return false;
3187 return true;
3188 };
3189 auto UpdateLiveInGetKillRegState = [&](Register Reg) {
3190 return getKillRegState(B: UpdateLiveInCheckCanKill(Reg));
3191 };
3192
3193 for (auto RI = CSI.rbegin(), RE = CSI.rend(); RI != RE; ++RI) {
3194 MCRegister Reg = RI->getReg();
3195 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3196 continue;
3197
3198 if (X86FI->isCandidateForPush2Pop2(Reg)) {
3199 MCRegister Reg2 = (++RI)->getReg();
3200 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: getPUSH2Opcode(ST: STI)))
3201 .addReg(RegNo: Reg, Flags: UpdateLiveInGetKillRegState(Reg))
3202 .addReg(RegNo: Reg2, Flags: UpdateLiveInGetKillRegState(Reg2))
3203 .setMIFlag(MachineInstr::FrameSetup);
3204 } else {
3205 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: getPUSHOpcode(ST: STI)))
3206 .addReg(RegNo: Reg, Flags: UpdateLiveInGetKillRegState(Reg))
3207 .setMIFlag(MachineInstr::FrameSetup);
3208 }
3209 }
3210
3211 if (X86FI->getRestoreBasePointer()) {
3212 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
3213 Register BaseReg = this->TRI->getBaseRegister();
3214 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc))
3215 .addReg(RegNo: BaseReg, Flags: getKillRegState(B: true))
3216 .setMIFlag(MachineInstr::FrameSetup);
3217 }
3218
3219 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
3220 // It can be done by spilling XMMs to stack frame.
3221 for (const CalleeSavedInfo &I : llvm::reverse(C&: CSI)) {
3222 MCRegister Reg = I.getReg();
3223 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3224 continue;
3225
3226 // Add the callee-saved register as live-in. It's killed at the spill.
3227 MBB.addLiveIn(PhysReg: Reg);
3228 const TargetRegisterClass *RC = getCalleeSavedSpillRC(Reg, STI, TRI: *TRI);
3229
3230 TII.storeRegToStackSlot(MBB, MI, SrcReg: Reg, isKill: true, FrameIndex: I.getFrameIdx(), RC, VReg: Register(),
3231 Flags: MachineInstr::FrameSetup);
3232 }
3233
3234 return true;
3235}
3236
3237void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
3238 MachineBasicBlock::iterator MBBI,
3239 MachineInstr *CatchRet) const {
3240 // SEH shouldn't use catchret.
3241 assert(!isAsynchronousEHPersonality(classifyEHPersonality(
3242 MBB.getParent()->getFunction().getPersonalityFn())) &&
3243 "SEH should not use CATCHRET");
3244 const DebugLoc &DL = CatchRet->getDebugLoc();
3245 MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(i: 0).getMBB();
3246
3247 // Fill EAX/RAX with the address of the target block.
3248 if (STI.is64Bit()) {
3249 // LEA64r CatchRetTarget(%rip), %rax
3250 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::LEA64r), DestReg: X86::RAX)
3251 .addReg(RegNo: X86::RIP)
3252 .addImm(Val: 0)
3253 .addReg(RegNo: 0)
3254 .addMBB(MBB: CatchRetTarget)
3255 .addReg(RegNo: 0);
3256 } else {
3257 // MOV32ri $CatchRetTarget, %eax
3258 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV32ri), DestReg: X86::EAX)
3259 .addMBB(MBB: CatchRetTarget);
3260 }
3261
3262 // Record that we've taken the address of CatchRetTarget and no longer just
3263 // reference it in a terminator.
3264 CatchRetTarget->setMachineBlockAddressTaken();
3265}
3266
3267bool X86FrameLowering::restoreCalleeSavedRegisters(
3268 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
3269 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
3270 if (CSI.empty())
3271 return false;
3272
3273 if (MI != MBB.end() && isFuncletReturnInstr(MI&: *MI) && STI.isOSWindows()) {
3274 // Don't restore CSRs in 32-bit EH funclets. Matches
3275 // spillCalleeSavedRegisters.
3276 if (STI.is32Bit())
3277 return true;
3278 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
3279 // funclets. emitEpilogue transforms these to normal jumps.
3280 if (MI->getOpcode() == X86::CATCHRET) {
3281 const Function &F = MBB.getParent()->getFunction();
3282 bool IsSEH = isAsynchronousEHPersonality(
3283 Pers: classifyEHPersonality(Pers: F.getPersonalityFn()));
3284 if (IsSEH)
3285 return true;
3286 }
3287 }
3288
3289 DebugLoc DL = MBB.findDebugLoc(MBBI: MI);
3290 MachineFunction &MF = *MBB.getParent();
3291 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
3292
3293 bool NeedsWin64CFI =
3294 isWin64Prologue(MF) && MF.getFunction().needsUnwindTableEntry();
3295 bool IsWin64UnwindV3 =
3296 NeedsWin64CFI && MF.getFunction().getParent()->getWinX64EHUnwindMode() ==
3297 WinX64EHUnwindMode::V3;
3298
3299 // Reload XMMs from stack frame.
3300 for (const CalleeSavedInfo &I : CSI) {
3301 MCRegister Reg = I.getReg();
3302 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3303 continue;
3304
3305 const TargetRegisterClass *RC = getCalleeSavedSpillRC(Reg, STI, TRI: *TRI);
3306 TII.loadRegFromStackSlot(MBB, MI, DestReg: Reg, FrameIndex: I.getFrameIdx(), RC, VReg: Register(), SubReg: 0,
3307 Flags: MachineInstr::FrameDestroy);
3308 }
3309
3310 // Clear the stack slot for spill base pointer register.
3311 if (X86FI->getRestoreBasePointer()) {
3312 if (IsWin64UnwindV3)
3313 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_PushReg))
3314 .addImm(Val: this->TRI->getBaseRegister())
3315 .setMIFlag(MachineInstr::FrameDestroy);
3316 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
3317 Register BaseReg = this->TRI->getBaseRegister();
3318 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: BaseReg)
3319 .setMIFlag(MachineInstr::FrameDestroy);
3320 }
3321
3322 // POP GPRs.
3323 for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) {
3324 MCRegister Reg = I->getReg();
3325 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3326 continue;
3327
3328 if (X86FI->isCandidateForPush2Pop2(Reg)) {
3329 MCRegister Reg2 = (++I)->getReg();
3330 if (IsWin64UnwindV3) {
3331 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_Push2Regs))
3332 .addImm(Val: Reg)
3333 .addImm(Val: Reg2)
3334 .setMIFlag(MachineInstr::FrameDestroy);
3335 }
3336 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: getPOP2Opcode(ST: STI)), DestReg: Reg)
3337 .addReg(RegNo: Reg2, Flags: RegState::Define)
3338 .setMIFlag(MachineInstr::FrameDestroy);
3339 } else {
3340 if (IsWin64UnwindV3)
3341 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_PushReg))
3342 .addImm(Val: Reg)
3343 .setMIFlag(MachineInstr::FrameDestroy);
3344 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: getPOPOpcode(ST: STI)), DestReg: Reg)
3345 .setMIFlag(MachineInstr::FrameDestroy);
3346 }
3347 }
3348
3349 return true;
3350}
3351
3352void X86FrameLowering::determineCalleeSaves(MachineFunction &MF,
3353 BitVector &SavedRegs,
3354 RegScavenger *RS) const {
3355 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
3356
3357 // Spill the BasePtr if it's used.
3358 if (TRI->hasBasePointer(MF)) {
3359 Register BasePtr = TRI->getBaseRegister();
3360 if (STI.isTarget64BitILP32())
3361 BasePtr = getX86SubSuperRegister(Reg: BasePtr, Size: 64);
3362 SavedRegs.set(BasePtr);
3363 }
3364 if (STI.hasUserReservedRegisters()) {
3365 for (int Reg = SavedRegs.find_first(); Reg != -1;
3366 Reg = SavedRegs.find_next(Prev: Reg)) {
3367 if (STI.isRegisterReservedByUser(i: Reg)) {
3368 SavedRegs.reset(Idx: Reg);
3369 }
3370 }
3371 }
3372}
3373
3374static bool HasNestArgument(const MachineFunction *MF) {
3375 const Function &F = MF->getFunction();
3376 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
3377 I++) {
3378 if (I->hasNestAttr() && !I->use_empty())
3379 return true;
3380 }
3381 return false;
3382}
3383
3384/// GetScratchRegister - Get a temp register for performing work in the
3385/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
3386/// and the properties of the function either one or two registers will be
3387/// needed. Set primary to true for the first register, false for the second.
3388static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64,
3389 const MachineFunction &MF, bool Primary) {
3390 CallingConv::ID CallingConvention = MF.getFunction().getCallingConv();
3391
3392 // Erlang stuff.
3393 if (CallingConvention == CallingConv::HiPE) {
3394 if (Is64Bit)
3395 return Primary ? X86::R14 : X86::R13;
3396 else
3397 return Primary ? X86::EBX : X86::EDI;
3398 }
3399
3400 if (Is64Bit) {
3401 if (IsLP64)
3402 return Primary ? X86::R11 : X86::R12;
3403 else
3404 return Primary ? X86::R11D : X86::R12D;
3405 }
3406
3407 bool IsNested = HasNestArgument(MF: &MF);
3408
3409 if (CallingConvention == CallingConv::X86_FastCall ||
3410 CallingConvention == CallingConv::Fast ||
3411 CallingConvention == CallingConv::Tail) {
3412 if (IsNested)
3413 report_fatal_error(reason: "Segmented stacks does not support fastcall with "
3414 "nested function.");
3415 return Primary ? X86::EAX : X86::ECX;
3416 }
3417 if (IsNested)
3418 return Primary ? X86::EDX : X86::EAX;
3419 return Primary ? X86::ECX : X86::EAX;
3420}
3421
3422// The stack limit in the TCB is set to this many bytes above the actual stack
3423// limit.
3424static const uint64_t kSplitStackAvailable = 256;
3425
3426void X86FrameLowering::adjustForSegmentedStacks(
3427 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3428 MachineFrameInfo &MFI = MF.getFrameInfo();
3429 uint64_t StackSize;
3430 unsigned TlsReg, TlsOffset;
3431 DebugLoc DL;
3432
3433 // To support shrink-wrapping we would need to insert the new blocks
3434 // at the right place and update the branches to PrologueMBB.
3435 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3436
3437 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, Primary: true);
3438 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3439 "Scratch register is live-in");
3440
3441 if (MF.getFunction().isVarArg())
3442 report_fatal_error(reason: "Segmented stacks do not support vararg functions.");
3443 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
3444 !STI.isTargetWin64() && !STI.isTargetFreeBSD() &&
3445 !STI.isTargetDragonFly())
3446 report_fatal_error(reason: "Segmented stacks not supported on this platform.");
3447
3448 // Eventually StackSize will be calculated by a link-time pass; which will
3449 // also decide whether checking code needs to be injected into this particular
3450 // prologue.
3451 StackSize = MFI.getStackSize();
3452
3453 if (!MFI.needsSplitStackProlog())
3454 return;
3455
3456 MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
3457 MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
3458 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
3459 bool IsNested = false;
3460
3461 // We need to know if the function has a nest argument only in 64 bit mode.
3462 if (Is64Bit)
3463 IsNested = HasNestArgument(MF: &MF);
3464
3465 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
3466 // allocMBB needs to be last (terminating) instruction.
3467
3468 for (const auto &LI : PrologueMBB.liveins()) {
3469 allocMBB->addLiveIn(RegMaskPair: LI);
3470 checkMBB->addLiveIn(RegMaskPair: LI);
3471 }
3472
3473 if (IsNested)
3474 allocMBB->addLiveIn(PhysReg: IsLP64 ? X86::R10 : X86::R10D);
3475
3476 MF.push_front(MBB: allocMBB);
3477 MF.push_front(MBB: checkMBB);
3478
3479 // When the frame size is less than 256 we just compare the stack
3480 // boundary directly to the value of the stack pointer, per gcc.
3481 bool CompareStackPointer = StackSize < kSplitStackAvailable;
3482
3483 // Read the limit off the current stacklet off the stack_guard location.
3484 if (Is64Bit) {
3485 if (STI.isTargetLinux()) {
3486 TlsReg = X86::FS;
3487 TlsOffset = IsLP64 ? 0x70 : 0x40;
3488 } else if (STI.isTargetDarwin()) {
3489 TlsReg = X86::GS;
3490 TlsOffset = 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90.
3491 } else if (STI.isTargetWin64()) {
3492 TlsReg = X86::GS;
3493 TlsOffset = 0x28; // pvArbitrary, reserved for application use
3494 } else if (STI.isTargetFreeBSD()) {
3495 TlsReg = X86::FS;
3496 TlsOffset = 0x18;
3497 } else if (STI.isTargetDragonFly()) {
3498 TlsReg = X86::FS;
3499 TlsOffset = 0x20; // use tls_tcb.tcb_segstack
3500 } else {
3501 report_fatal_error(reason: "Segmented stacks not supported on this platform.");
3502 }
3503
3504 if (CompareStackPointer)
3505 ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
3506 else
3507 BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: IsLP64 ? X86::LEA64r : X86::LEA64_32r),
3508 DestReg: ScratchReg)
3509 .addReg(RegNo: X86::RSP)
3510 .addImm(Val: 1)
3511 .addReg(RegNo: 0)
3512 .addImm(Val: -StackSize)
3513 .addReg(RegNo: 0);
3514
3515 BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: IsLP64 ? X86::CMP64rm : X86::CMP32rm))
3516 .addReg(RegNo: ScratchReg)
3517 .addReg(RegNo: 0)
3518 .addImm(Val: 1)
3519 .addReg(RegNo: 0)
3520 .addImm(Val: TlsOffset)
3521 .addReg(RegNo: TlsReg);
3522 } else {
3523 if (STI.isTargetLinux()) {
3524 TlsReg = X86::GS;
3525 TlsOffset = 0x30;
3526 } else if (STI.isTargetDarwin()) {
3527 TlsReg = X86::GS;
3528 TlsOffset = 0x48 + 90 * 4;
3529 } else if (STI.isTargetWin32()) {
3530 TlsReg = X86::FS;
3531 TlsOffset = 0x14; // pvArbitrary, reserved for application use
3532 } else if (STI.isTargetDragonFly()) {
3533 TlsReg = X86::FS;
3534 TlsOffset = 0x10; // use tls_tcb.tcb_segstack
3535 } else if (STI.isTargetFreeBSD()) {
3536 report_fatal_error(reason: "Segmented stacks not supported on FreeBSD i386.");
3537 } else {
3538 report_fatal_error(reason: "Segmented stacks not supported on this platform.");
3539 }
3540
3541 if (CompareStackPointer)
3542 ScratchReg = X86::ESP;
3543 else
3544 BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: X86::LEA32r), DestReg: ScratchReg)
3545 .addReg(RegNo: X86::ESP)
3546 .addImm(Val: 1)
3547 .addReg(RegNo: 0)
3548 .addImm(Val: -StackSize)
3549 .addReg(RegNo: 0);
3550
3551 if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() ||
3552 STI.isTargetDragonFly()) {
3553 BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: X86::CMP32rm))
3554 .addReg(RegNo: ScratchReg)
3555 .addReg(RegNo: 0)
3556 .addImm(Val: 0)
3557 .addReg(RegNo: 0)
3558 .addImm(Val: TlsOffset)
3559 .addReg(RegNo: TlsReg);
3560 } else if (STI.isTargetDarwin()) {
3561
3562 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
3563 unsigned ScratchReg2;
3564 bool SaveScratch2;
3565 if (CompareStackPointer) {
3566 // The primary scratch register is available for holding the TLS offset.
3567 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, Primary: true);
3568 SaveScratch2 = false;
3569 } else {
3570 // Need to use a second register to hold the TLS offset
3571 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, Primary: false);
3572
3573 // Unfortunately, with fastcc the second scratch register may hold an
3574 // argument.
3575 SaveScratch2 = MF.getRegInfo().isLiveIn(Reg: ScratchReg2);
3576 }
3577
3578 // If Scratch2 is live-in then it needs to be saved.
3579 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
3580 "Scratch register is live-in and not saved");
3581
3582 if (SaveScratch2)
3583 BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH32r))
3584 .addReg(RegNo: ScratchReg2, Flags: RegState::Kill);
3585
3586 BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: X86::MOV32ri), DestReg: ScratchReg2)
3587 .addImm(Val: TlsOffset);
3588 BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: X86::CMP32rm))
3589 .addReg(RegNo: ScratchReg)
3590 .addReg(RegNo: ScratchReg2)
3591 .addImm(Val: 1)
3592 .addReg(RegNo: 0)
3593 .addImm(Val: 0)
3594 .addReg(RegNo: TlsReg);
3595
3596 if (SaveScratch2)
3597 BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: X86::POP32r), DestReg: ScratchReg2);
3598 }
3599 }
3600
3601 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
3602 // It jumps to normal execution of the function body.
3603 BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1))
3604 .addMBB(MBB: &PrologueMBB)
3605 .addImm(Val: X86::COND_A);
3606
3607 // On 32 bit we first push the arguments size and then the frame size. On 64
3608 // bit, we pass the stack frame size in r10 and the argument size in r11.
3609 if (Is64Bit) {
3610 // Functions with nested arguments use R10, so it needs to be saved across
3611 // the call to _morestack
3612
3613 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
3614 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
3615 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
3616 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
3617
3618 if (IsNested)
3619 BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: MOVrr), DestReg: RegAX).addReg(RegNo: Reg10);
3620
3621 BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::getMOVriOpcode(Use64BitReg: IsLP64, Imm: StackSize)),
3622 DestReg: Reg10)
3623 .addImm(Val: StackSize);
3624 BuildMI(BB: allocMBB, MIMD: DL,
3625 MCID: TII.get(Opcode: X86::getMOVriOpcode(Use64BitReg: IsLP64, Imm: X86FI->getArgumentStackSize())),
3626 DestReg: Reg11)
3627 .addImm(Val: X86FI->getArgumentStackSize());
3628 } else {
3629 BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH32i))
3630 .addImm(Val: X86FI->getArgumentStackSize());
3631 BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH32i)).addImm(Val: StackSize);
3632 }
3633
3634 // __morestack is in libgcc
3635 if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
3636 // Under the large code model, we cannot assume that __morestack lives
3637 // within 2^31 bytes of the call site, so we cannot use pc-relative
3638 // addressing. We cannot perform the call via a temporary register,
3639 // as the rax register may be used to store the static chain, and all
3640 // other suitable registers may be either callee-save or used for
3641 // parameter passing. We cannot use the stack at this point either
3642 // because __morestack manipulates the stack directly.
3643 //
3644 // To avoid these issues, perform an indirect call via a read-only memory
3645 // location containing the address.
3646 //
3647 // This solution is not perfect, as it assumes that the .rodata section
3648 // is laid out within 2^31 bytes of each function body, but this seems
3649 // to be sufficient for JIT.
3650 // FIXME: Add retpoline support and remove the error here..
3651 if (STI.useIndirectThunkCalls())
3652 report_fatal_error(reason: "Emitting morestack calls on 64-bit with the large "
3653 "code model and thunks not yet implemented.");
3654 BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::CALL64m))
3655 .addReg(RegNo: X86::RIP)
3656 .addImm(Val: 0)
3657 .addReg(RegNo: 0)
3658 .addExternalSymbol(FnName: "__morestack_addr")
3659 .addReg(RegNo: 0);
3660 } else {
3661 if (Is64Bit)
3662 BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::CALL64pcrel32))
3663 .addExternalSymbol(FnName: "__morestack");
3664 else
3665 BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::CALLpcrel32))
3666 .addExternalSymbol(FnName: "__morestack");
3667 }
3668
3669 if (IsNested)
3670 BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::MORESTACK_RET_RESTORE_R10));
3671 else
3672 BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::MORESTACK_RET));
3673
3674 allocMBB->addSuccessor(Succ: &PrologueMBB);
3675
3676 checkMBB->addSuccessor(Succ: allocMBB, Prob: BranchProbability::getZero());
3677 checkMBB->addSuccessor(Succ: &PrologueMBB, Prob: BranchProbability::getOne());
3678
3679#ifdef EXPENSIVE_CHECKS
3680 MF.verify();
3681#endif
3682}
3683
3684/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
3685/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
3686/// to fields it needs, through a named metadata node "hipe.literals" containing
3687/// name-value pairs.
3688static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD,
3689 const StringRef LiteralName) {
3690 for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
3691 MDNode *Node = HiPELiteralsMD->getOperand(i);
3692 if (Node->getNumOperands() != 2)
3693 continue;
3694 MDString *NodeName = dyn_cast<MDString>(Val: Node->getOperand(I: 0));
3695 ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Val: Node->getOperand(I: 1));
3696 if (!NodeName || !NodeVal)
3697 continue;
3698 ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(Val: NodeVal->getValue());
3699 if (ValConst && NodeName->getString() == LiteralName) {
3700 return ValConst->getZExtValue();
3701 }
3702 }
3703
3704 report_fatal_error(reason: "HiPE literal " + LiteralName +
3705 " required but not provided");
3706}
3707
3708// Return true if there are no non-ehpad successors to MBB and there are no
3709// non-meta instructions between MBBI and MBB.end().
3710static bool blockEndIsUnreachable(const MachineBasicBlock &MBB,
3711 MachineBasicBlock::const_iterator MBBI) {
3712 return llvm::all_of(
3713 Range: MBB.successors(),
3714 P: [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
3715 std::all_of(first: MBBI, last: MBB.end(), pred: [](const MachineInstr &MI) {
3716 return MI.isMetaInstruction();
3717 });
3718}
3719
3720/// Erlang programs may need a special prologue to handle the stack size they
3721/// might need at runtime. That is because Erlang/OTP does not implement a C
3722/// stack but uses a custom implementation of hybrid stack/heap architecture.
3723/// (for more information see Eric Stenman's Ph.D. thesis:
3724/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
3725///
3726/// CheckStack:
3727/// temp0 = sp - MaxStack
3728/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3729/// OldStart:
3730/// ...
3731/// IncStack:
3732/// call inc_stack # doubles the stack space
3733/// temp0 = sp - MaxStack
3734/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3735void X86FrameLowering::adjustForHiPEPrologue(
3736 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3737 MachineFrameInfo &MFI = MF.getFrameInfo();
3738 DebugLoc DL;
3739
3740 // To support shrink-wrapping we would need to insert the new blocks
3741 // at the right place and update the branches to PrologueMBB.
3742 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3743
3744 // HiPE-specific values
3745 NamedMDNode *HiPELiteralsMD =
3746 MF.getFunction().getParent()->getNamedMetadata(Name: "hipe.literals");
3747 if (!HiPELiteralsMD)
3748 report_fatal_error(
3749 reason: "Can't generate HiPE prologue without runtime parameters");
3750 const unsigned HipeLeafWords = getHiPELiteral(
3751 HiPELiteralsMD, LiteralName: Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
3752 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
3753 const unsigned Guaranteed = HipeLeafWords * SlotSize;
3754 unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs
3755 ? MF.getFunction().arg_size() - CCRegisteredArgs
3756 : 0;
3757 unsigned MaxStack = MFI.getStackSize() + CallerStkArity * SlotSize + SlotSize;
3758
3759 assert(STI.isTargetLinux() &&
3760 "HiPE prologue is only supported on Linux operating systems.");
3761
3762 // Compute the largest caller's frame that is needed to fit the callees'
3763 // frames. This 'MaxStack' is computed from:
3764 //
3765 // a) the fixed frame size, which is the space needed for all spilled temps,
3766 // b) outgoing on-stack parameter areas, and
3767 // c) the minimum stack space this function needs to make available for the
3768 // functions it calls (a tunable ABI property).
3769 if (MFI.hasCalls()) {
3770 unsigned MoreStackForCalls = 0;
3771
3772 for (auto &MBB : MF) {
3773 for (auto &MI : MBB) {
3774 if (!MI.isCall())
3775 continue;
3776
3777 // Get callee operand.
3778 const MachineOperand &MO = MI.getOperand(i: 0);
3779
3780 // Only take account of global function calls (no closures etc.).
3781 if (!MO.isGlobal())
3782 continue;
3783
3784 const Function *F = dyn_cast<Function>(Val: MO.getGlobal());
3785 if (!F)
3786 continue;
3787
3788 // Do not update 'MaxStack' for primitive and built-in functions
3789 // (encoded with names either starting with "erlang."/"bif_" or not
3790 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
3791 // "_", such as the BIF "suspend_0") as they are executed on another
3792 // stack.
3793 if (F->getName().contains(Other: "erlang.") || F->getName().contains(Other: "bif_") ||
3794 F->getName().find_first_of(Chars: "._") == StringRef::npos)
3795 continue;
3796
3797 unsigned CalleeStkArity = F->arg_size() > CCRegisteredArgs
3798 ? F->arg_size() - CCRegisteredArgs
3799 : 0;
3800 if (HipeLeafWords - 1 > CalleeStkArity)
3801 MoreStackForCalls =
3802 std::max(a: MoreStackForCalls,
3803 b: (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
3804 }
3805 }
3806 MaxStack += MoreStackForCalls;
3807 }
3808
3809 // If the stack frame needed is larger than the guaranteed then runtime checks
3810 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
3811 if (MaxStack > Guaranteed) {
3812 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
3813 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
3814
3815 for (const auto &LI : PrologueMBB.liveins()) {
3816 stackCheckMBB->addLiveIn(RegMaskPair: LI);
3817 incStackMBB->addLiveIn(RegMaskPair: LI);
3818 }
3819
3820 MF.push_front(MBB: incStackMBB);
3821 MF.push_front(MBB: stackCheckMBB);
3822
3823 unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
3824 unsigned LEAop, CMPop, CALLop;
3825 SPLimitOffset = getHiPELiteral(HiPELiteralsMD, LiteralName: "P_NSP_LIMIT");
3826 if (Is64Bit) {
3827 SPReg = X86::RSP;
3828 PReg = X86::RBP;
3829 LEAop = X86::LEA64r;
3830 CMPop = X86::CMP64rm;
3831 CALLop = X86::CALL64pcrel32;
3832 } else {
3833 SPReg = X86::ESP;
3834 PReg = X86::EBP;
3835 LEAop = X86::LEA32r;
3836 CMPop = X86::CMP32rm;
3837 CALLop = X86::CALLpcrel32;
3838 }
3839
3840 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, Primary: true);
3841 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3842 "HiPE prologue scratch register is live-in");
3843
3844 // Create new MBB for StackCheck:
3845 addRegOffset(MIB: BuildMI(BB: stackCheckMBB, MIMD: DL, MCID: TII.get(Opcode: LEAop), DestReg: ScratchReg), Reg: SPReg,
3846 isKill: false, Offset: -MaxStack);
3847 // SPLimitOffset is in a fixed heap location (pointed by BP).
3848 addRegOffset(MIB: BuildMI(BB: stackCheckMBB, MIMD: DL, MCID: TII.get(Opcode: CMPop)).addReg(RegNo: ScratchReg),
3849 Reg: PReg, isKill: false, Offset: SPLimitOffset);
3850 BuildMI(BB: stackCheckMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1))
3851 .addMBB(MBB: &PrologueMBB)
3852 .addImm(Val: X86::COND_AE);
3853
3854 // Create new MBB for IncStack:
3855 BuildMI(BB: incStackMBB, MIMD: DL, MCID: TII.get(Opcode: CALLop)).addExternalSymbol(FnName: "inc_stack_0");
3856 addRegOffset(MIB: BuildMI(BB: incStackMBB, MIMD: DL, MCID: TII.get(Opcode: LEAop), DestReg: ScratchReg), Reg: SPReg,
3857 isKill: false, Offset: -MaxStack);
3858 addRegOffset(MIB: BuildMI(BB: incStackMBB, MIMD: DL, MCID: TII.get(Opcode: CMPop)).addReg(RegNo: ScratchReg),
3859 Reg: PReg, isKill: false, Offset: SPLimitOffset);
3860 BuildMI(BB: incStackMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1))
3861 .addMBB(MBB: incStackMBB)
3862 .addImm(Val: X86::COND_LE);
3863
3864 stackCheckMBB->addSuccessor(Succ: &PrologueMBB, Prob: {99, 100});
3865 stackCheckMBB->addSuccessor(Succ: incStackMBB, Prob: {1, 100});
3866 incStackMBB->addSuccessor(Succ: &PrologueMBB, Prob: {99, 100});
3867 incStackMBB->addSuccessor(Succ: incStackMBB, Prob: {1, 100});
3868 }
3869#ifdef EXPENSIVE_CHECKS
3870 MF.verify();
3871#endif
3872}
3873
3874bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
3875 MachineBasicBlock::iterator MBBI,
3876 const DebugLoc &DL,
3877 int Offset) const {
3878 if (Offset <= 0)
3879 return false;
3880
3881 if (Offset % SlotSize)
3882 return false;
3883
3884 int NumPops = Offset / SlotSize;
3885 // This is only worth it if we have at most 2 pops.
3886 if (NumPops != 1 && NumPops != 2)
3887 return false;
3888
3889 // Handle only the trivial case where the adjustment directly follows
3890 // a call. This is the most common one, anyway.
3891 if (MBBI == MBB.begin())
3892 return false;
3893 MachineBasicBlock::iterator Prev = std::prev(x: MBBI);
3894 if (!Prev->isCall() || !Prev->getOperand(i: 1).isRegMask())
3895 return false;
3896
3897 unsigned Regs[2];
3898 unsigned FoundRegs = 0;
3899
3900 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3901 const MachineOperand &RegMask = Prev->getOperand(i: 1);
3902
3903 auto &RegClass =
3904 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
3905 // Try to find up to NumPops free registers.
3906 for (auto Candidate : RegClass) {
3907 // Poor man's liveness:
3908 // Since we're immediately after a call, any register that is clobbered
3909 // by the call and not defined by it can be considered dead.
3910 if (!RegMask.clobbersPhysReg(PhysReg: Candidate))
3911 continue;
3912
3913 // Don't clobber reserved registers
3914 if (MRI.isReserved(PhysReg: Candidate))
3915 continue;
3916
3917 bool IsDef = false;
3918 for (const MachineOperand &MO : Prev->implicit_operands()) {
3919 if (MO.isReg() && MO.isDef() &&
3920 TRI->isSuperOrSubRegisterEq(RegA: MO.getReg(), RegB: Candidate)) {
3921 IsDef = true;
3922 break;
3923 }
3924 }
3925
3926 if (IsDef)
3927 continue;
3928
3929 Regs[FoundRegs++] = Candidate;
3930 if (FoundRegs == (unsigned)NumPops)
3931 break;
3932 }
3933
3934 if (FoundRegs == 0)
3935 return false;
3936
3937 // If we found only one free register, but need two, reuse the same one twice.
3938 while (FoundRegs < (unsigned)NumPops)
3939 Regs[FoundRegs++] = Regs[0];
3940
3941 for (int i = 0; i < NumPops; ++i)
3942 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: STI.is64Bit() ? X86::POP64r : X86::POP32r),
3943 DestReg: Regs[i]);
3944
3945 return true;
3946}
3947
3948MachineBasicBlock::iterator X86FrameLowering::eliminateCallFramePseudoInstr(
3949 MachineFunction &MF, MachineBasicBlock &MBB,
3950 MachineBasicBlock::iterator I) const {
3951 bool reserveCallFrame = hasReservedCallFrame(MF);
3952 unsigned Opcode = I->getOpcode();
3953 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
3954 DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
3955 uint64_t Amount = TII.getFrameSize(I: *I);
3956 uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(I: *I) : 0;
3957 I = MBB.erase(I);
3958 auto InsertPos = skipDebugInstructionsForward(It: I, End: MBB.end());
3959
3960 // Try to avoid emitting dead SP adjustments if the block end is unreachable,
3961 // typically because the function is marked noreturn (abort, throw,
3962 // assert_fail, etc).
3963 if (isDestroy && blockEndIsUnreachable(MBB, MBBI: I))
3964 return I;
3965
3966 if (!reserveCallFrame) {
3967 // If the stack pointer can be changed after prologue, turn the
3968 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
3969 // adjcallstackdown instruction into 'add ESP, <amt>'
3970
3971 // We need to keep the stack aligned properly. To do this, we round the
3972 // amount of space needed for the outgoing arguments up to the next
3973 // alignment boundary.
3974 Amount = alignTo(Size: Amount, A: getStackAlign());
3975
3976 const Function &F = MF.getFunction();
3977 bool WindowsCFI = MF.getTarget().getMCAsmInfo().usesWindowsCFI();
3978 bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
3979
3980 // If we have any exception handlers in this function, and we adjust
3981 // the SP before calls, we may need to indicate this to the unwinder
3982 // using GNU_ARGS_SIZE. Note that this may be necessary even when
3983 // Amount == 0, because the preceding function may have set a non-0
3984 // GNU_ARGS_SIZE.
3985 // TODO: We don't need to reset this between subsequent functions,
3986 // if it didn't change.
3987 bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
3988
3989 if (HasDwarfEHHandlers && !isDestroy &&
3990 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences())
3991 BuildCFI(MBB, MBBI: InsertPos, DL,
3992 CFIInst: MCCFIInstruction::createGnuArgsSize(L: nullptr, Size: Amount));
3993
3994 if (Amount == 0)
3995 return I;
3996
3997 // Factor out the amount that gets handled inside the sequence
3998 // (Pushes of argument for frame setup, callee pops for frame destroy)
3999 Amount -= InternalAmt;
4000
4001 // TODO: This is needed only if we require precise CFA.
4002 // If this is a callee-pop calling convention, emit a CFA adjust for
4003 // the amount the callee popped.
4004 if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
4005 BuildCFI(MBB, MBBI: InsertPos, DL,
4006 CFIInst: MCCFIInstruction::createAdjustCfaOffset(L: nullptr, Adjustment: -InternalAmt));
4007
4008 // Add Amount to SP to destroy a frame, or subtract to setup.
4009 int64_t StackAdjustment = isDestroy ? Amount : -Amount;
4010 int64_t CfaAdjustment = StackAdjustment;
4011
4012 if (StackAdjustment) {
4013 // Merge with any previous or following adjustment instruction. Note: the
4014 // instructions merged with here do not have CFI, so their stack
4015 // adjustments do not feed into CfaAdjustment
4016
4017 auto CalcCfaAdjust = [&CfaAdjustment](MachineBasicBlock::iterator PI,
4018 int64_t Offset) {
4019 CfaAdjustment += Offset;
4020 };
4021 auto CalcNewOffset = [&StackAdjustment](int64_t Offset) {
4022 return StackAdjustment + Offset;
4023 };
4024 StackAdjustment =
4025 mergeSPUpdates(MBB, MBBI&: InsertPos, FoundStackAdjust: CalcCfaAdjust, CalcNewOffset, doMergeWithPrevious: true);
4026 StackAdjustment =
4027 mergeSPUpdates(MBB, MBBI&: InsertPos, FoundStackAdjust: CalcCfaAdjust, CalcNewOffset, doMergeWithPrevious: false);
4028
4029 if (StackAdjustment) {
4030 if (!(F.hasMinSize() &&
4031 adjustStackWithPops(MBB, MBBI: InsertPos, DL, Offset: StackAdjustment)))
4032 BuildStackAdjustment(MBB, MBBI: InsertPos, DL, Offset: StackAdjustment,
4033 /*InEpilogue=*/false);
4034 }
4035 }
4036
4037 if (DwarfCFI && !hasFP(MF) && CfaAdjustment) {
4038 // If we don't have FP, but need to generate unwind information,
4039 // we need to set the correct CFA offset after the stack adjustment.
4040 // How much we adjust the CFA offset depends on whether we're emitting
4041 // CFI only for EH purposes or for debugging. EH only requires the CFA
4042 // offset to be correct at each call site, while for debugging we want
4043 // it to be more precise.
4044
4045 // TODO: When not using precise CFA, we also need to adjust for the
4046 // InternalAmt here.
4047 BuildCFI(
4048 MBB, MBBI: InsertPos, DL,
4049 CFIInst: MCCFIInstruction::createAdjustCfaOffset(L: nullptr, Adjustment: -CfaAdjustment));
4050 }
4051
4052 return I;
4053 }
4054
4055 if (InternalAmt) {
4056 MachineBasicBlock::iterator CI = I;
4057 MachineBasicBlock::iterator B = MBB.begin();
4058 while (CI != B && !std::prev(x: CI)->isCall())
4059 --CI;
4060 BuildStackAdjustment(MBB, MBBI: CI, DL, Offset: -InternalAmt, /*InEpilogue=*/false);
4061 }
4062
4063 return I;
4064}
4065
4066bool X86FrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
4067 assert(MBB.getParent() && "Block is not attached to a function!");
4068 const MachineFunction &MF = *MBB.getParent();
4069 if (!MBB.isLiveIn(Reg: X86::EFLAGS))
4070 return true;
4071
4072 // If stack probes have to loop inline or call, that will clobber EFLAGS.
4073 // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
4074 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
4075 const X86TargetLowering &TLI = *STI.getTargetLowering();
4076 if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
4077 return false;
4078
4079 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4080 return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
4081}
4082
4083bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
4084 assert(MBB.getParent() && "Block is not attached to a function!");
4085
4086 // Win64 has strict requirements in terms of epilogue and we are
4087 // not taking a chance at messing with them.
4088 // I.e., unless this block is already an exit block, we can't use
4089 // it as an epilogue.
4090 if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
4091 return false;
4092
4093 // Swift async context epilogue has a BTR instruction that clobbers parts of
4094 // EFLAGS.
4095 const MachineFunction &MF = *MBB.getParent();
4096 if (MF.getInfo<X86MachineFunctionInfo>()->hasSwiftAsyncContext())
4097 return !flagsNeedToBePreservedBeforeTheTerminators(MBB);
4098
4099 if (canUseLEAForSPInEpilogue(MF: *MBB.getParent()))
4100 return true;
4101
4102 // If we cannot use LEA to adjust SP, we may need to use ADD, which
4103 // clobbers the EFLAGS. Check that we do not need to preserve it,
4104 // otherwise, conservatively assume this is not
4105 // safe to insert the epilogue here.
4106 return !flagsNeedToBePreservedBeforeTheTerminators(MBB);
4107}
4108
4109bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
4110 // If we may need to emit frameless compact unwind information, give
4111 // up as this is currently broken: PR25614.
4112 bool CompactUnwind =
4113 MF.getContext().getObjectFileInfo()->getCompactUnwindSection() != nullptr;
4114 return (MF.getFunction().hasFnAttribute(Kind: Attribute::NoUnwind) || hasFP(MF) ||
4115 !CompactUnwind) &&
4116 // The lowering of segmented stack and HiPE only support entry
4117 // blocks as prologue blocks: PR26107. This limitation may be
4118 // lifted if we fix:
4119 // - adjustForSegmentedStacks
4120 // - adjustForHiPEPrologue
4121 MF.getFunction().getCallingConv() != CallingConv::HiPE &&
4122 !MF.shouldSplitStack();
4123}
4124
4125MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers(
4126 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
4127 const DebugLoc &DL, bool RestoreSP) const {
4128 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
4129 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
4130 assert(STI.is32Bit() && !Uses64BitFramePtr &&
4131 "restoring EBP/ESI on non-32-bit target");
4132
4133 MachineFunction &MF = *MBB.getParent();
4134 Register FramePtr = TRI->getFrameRegister(MF);
4135 Register BasePtr = TRI->getBaseRegister();
4136 WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
4137 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4138 MachineFrameInfo &MFI = MF.getFrameInfo();
4139
4140 // FIXME: Don't set FrameSetup flag in catchret case.
4141
4142 int FI = FuncInfo.EHRegNodeFrameIndex;
4143 int EHRegSize = MFI.getObjectSize(ObjectIdx: FI);
4144
4145 if (RestoreSP) {
4146 // MOV32rm -EHRegSize(%ebp), %esp
4147 addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV32rm), DestReg: X86::ESP),
4148 Reg: X86::EBP, isKill: true, Offset: -EHRegSize)
4149 .setMIFlag(MachineInstr::FrameSetup);
4150 }
4151
4152 Register UsedReg;
4153 int EHRegOffset = getFrameIndexReference(MF, FI, FrameReg&: UsedReg).getFixed();
4154 int EndOffset = -EHRegOffset - EHRegSize;
4155 FuncInfo.EHRegNodeEndOffset = EndOffset;
4156
4157 if (UsedReg == FramePtr) {
4158 // ADD $offset, %ebp
4159 unsigned ADDri = getADDriOpcode(IsLP64: false);
4160 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ADDri), DestReg: FramePtr)
4161 .addReg(RegNo: FramePtr)
4162 .addImm(Val: EndOffset)
4163 .setMIFlag(MachineInstr::FrameSetup)
4164 ->getOperand(i: 3)
4165 .setIsDead();
4166 assert(EndOffset >= 0 &&
4167 "end of registration object above normal EBP position!");
4168 } else if (UsedReg == BasePtr) {
4169 // LEA offset(%ebp), %esi
4170 addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::LEA32r), DestReg: BasePtr),
4171 Reg: FramePtr, isKill: false, Offset: EndOffset)
4172 .setMIFlag(MachineInstr::FrameSetup);
4173 // MOV32rm SavedEBPOffset(%esi), %ebp
4174 assert(X86FI->getHasSEHFramePtrSave());
4175 int Offset =
4176 getFrameIndexReference(MF, FI: X86FI->getSEHFramePtrSaveIndex(), FrameReg&: UsedReg)
4177 .getFixed();
4178 assert(UsedReg == BasePtr);
4179 addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV32rm), DestReg: FramePtr),
4180 Reg: UsedReg, isKill: true, Offset)
4181 .setMIFlag(MachineInstr::FrameSetup);
4182 } else {
4183 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
4184 }
4185 return MBBI;
4186}
4187
4188int X86FrameLowering::getInitialCFAOffset(const MachineFunction &MF) const {
4189 return TRI->getSlotSize();
4190}
4191
4192Register
4193X86FrameLowering::getInitialCFARegister(const MachineFunction &MF) const {
4194 return StackPtr;
4195}
4196
4197TargetFrameLowering::DwarfFrameBase
4198X86FrameLowering::getDwarfFrameBase(const MachineFunction &MF) const {
4199 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
4200 Register FrameRegister = RI->getFrameRegister(MF);
4201 if (getInitialCFARegister(MF) == FrameRegister &&
4202 MF.getInfo<X86MachineFunctionInfo>()->hasCFIAdjustCfa()) {
4203 DwarfFrameBase FrameBase;
4204 FrameBase.Kind = DwarfFrameBase::CFA;
4205 FrameBase.Location.Offset =
4206 -MF.getFrameInfo().getStackSize() - getInitialCFAOffset(MF);
4207 return FrameBase;
4208 }
4209
4210 return DwarfFrameBase{.Kind: DwarfFrameBase::Register, .Location: {.Reg: FrameRegister}};
4211}
4212
4213namespace {
4214// Struct used by orderFrameObjects to help sort the stack objects.
4215struct X86FrameSortingObject {
4216 bool IsValid = false; // true if we care about this Object.
4217 unsigned ObjectIndex = 0; // Index of Object into MFI list.
4218 unsigned ObjectSize = 0; // Size of Object in bytes.
4219 Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
4220 unsigned ObjectNumUses = 0; // Object static number of uses.
4221};
4222
4223// The comparison function we use for std::sort to order our local
4224// stack symbols. The current algorithm is to use an estimated
4225// "density". This takes into consideration the size and number of
4226// uses each object has in order to roughly minimize code size.
4227// So, for example, an object of size 16B that is referenced 5 times
4228// will get higher priority than 4 4B objects referenced 1 time each.
4229// It's not perfect and we may be able to squeeze a few more bytes out of
4230// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
4231// fringe end can have special consideration, given their size is less
4232// important, etc.), but the algorithmic complexity grows too much to be
4233// worth the extra gains we get. This gets us pretty close.
4234// The final order leaves us with objects with highest priority going
4235// at the end of our list.
4236struct X86FrameSortingComparator {
4237 inline bool operator()(const X86FrameSortingObject &A,
4238 const X86FrameSortingObject &B) const {
4239 uint64_t DensityAScaled, DensityBScaled;
4240
4241 // For consistency in our comparison, all invalid objects are placed
4242 // at the end. This also allows us to stop walking when we hit the
4243 // first invalid item after it's all sorted.
4244 if (!A.IsValid)
4245 return false;
4246 if (!B.IsValid)
4247 return true;
4248
4249 // The density is calculated by doing :
4250 // (double)DensityA = A.ObjectNumUses / A.ObjectSize
4251 // (double)DensityB = B.ObjectNumUses / B.ObjectSize
4252 // Since this approach may cause inconsistencies in
4253 // the floating point <, >, == comparisons, depending on the floating
4254 // point model with which the compiler was built, we're going
4255 // to scale both sides by multiplying with
4256 // A.ObjectSize * B.ObjectSize. This ends up factoring away
4257 // the division and, with it, the need for any floating point
4258 // arithmetic.
4259 DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
4260 static_cast<uint64_t>(B.ObjectSize);
4261 DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
4262 static_cast<uint64_t>(A.ObjectSize);
4263
4264 // If the two densities are equal, prioritize highest alignment
4265 // objects. This allows for similar alignment objects
4266 // to be packed together (given the same density).
4267 // There's room for improvement here, also, since we can pack
4268 // similar alignment (different density) objects next to each
4269 // other to save padding. This will also require further
4270 // complexity/iterations, and the overall gain isn't worth it,
4271 // in general. Something to keep in mind, though.
4272 if (DensityAScaled == DensityBScaled)
4273 return A.ObjectAlignment < B.ObjectAlignment;
4274
4275 return DensityAScaled < DensityBScaled;
4276 }
4277};
4278} // namespace
4279
4280// Order the symbols in the local stack.
4281// We want to place the local stack objects in some sort of sensible order.
4282// The heuristic we use is to try and pack them according to static number
4283// of uses and size of object in order to minimize code size.
4284void X86FrameLowering::orderFrameObjects(
4285 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
4286 const MachineFrameInfo &MFI = MF.getFrameInfo();
4287
4288 // Don't waste time if there's nothing to do.
4289 if (ObjectsToAllocate.empty())
4290 return;
4291
4292 // Create an array of all MFI objects. We won't need all of these
4293 // objects, but we're going to create a full array of them to make
4294 // it easier to index into when we're counting "uses" down below.
4295 // We want to be able to easily/cheaply access an object by simply
4296 // indexing into it, instead of having to search for it every time.
4297 std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
4298
4299 // Walk the objects we care about and mark them as such in our working
4300 // struct.
4301 for (auto &Obj : ObjectsToAllocate) {
4302 SortingObjects[Obj].IsValid = true;
4303 SortingObjects[Obj].ObjectIndex = Obj;
4304 SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(ObjectIdx: Obj);
4305 // Set the size.
4306 int ObjectSize = MFI.getObjectSize(ObjectIdx: Obj);
4307 if (ObjectSize == 0)
4308 // Variable size. Just use 4.
4309 SortingObjects[Obj].ObjectSize = 4;
4310 else
4311 SortingObjects[Obj].ObjectSize = ObjectSize;
4312 }
4313
4314 // Count the number of uses for each object.
4315 for (auto &MBB : MF) {
4316 for (auto &MI : MBB) {
4317 if (MI.isDebugInstr())
4318 continue;
4319 for (const MachineOperand &MO : MI.operands()) {
4320 // Check to see if it's a local stack symbol.
4321 if (!MO.isFI())
4322 continue;
4323 int Index = MO.getIndex();
4324 // Check to see if it falls within our range, and is tagged
4325 // to require ordering.
4326 if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
4327 SortingObjects[Index].IsValid)
4328 SortingObjects[Index].ObjectNumUses++;
4329 }
4330 }
4331 }
4332
4333 // Sort the objects using X86FrameSortingAlgorithm (see its comment for
4334 // info).
4335 llvm::stable_sort(Range&: SortingObjects, C: X86FrameSortingComparator());
4336
4337 // Now modify the original list to represent the final order that
4338 // we want. The order will depend on whether we're going to access them
4339 // from the stack pointer or the frame pointer. For SP, the list should
4340 // end up with the END containing objects that we want with smaller offsets.
4341 // For FP, it should be flipped.
4342 int i = 0;
4343 for (auto &Obj : SortingObjects) {
4344 // All invalid items are sorted at the end, so it's safe to stop.
4345 if (!Obj.IsValid)
4346 break;
4347 ObjectsToAllocate[i++] = Obj.ObjectIndex;
4348 }
4349
4350 // Flip it if we're accessing off of the FP.
4351 if (!TRI->hasStackRealignment(MF) && hasFP(MF))
4352 std::reverse(first: ObjectsToAllocate.begin(), last: ObjectsToAllocate.end());
4353}
4354
4355unsigned
4356X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const {
4357 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
4358 unsigned Offset = 16;
4359 // RBP is immediately pushed.
4360 Offset += SlotSize;
4361 // All callee-saved registers are then pushed.
4362 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
4363 // Every funclet allocates enough stack space for the largest outgoing call.
4364 Offset += getWinEHFuncletFrameSize(MF);
4365 return Offset;
4366}
4367
4368void X86FrameLowering::processFunctionBeforeFrameFinalized(
4369 MachineFunction &MF, RegScavenger *RS) const {
4370 // Mark the function as not having WinCFI. We will set it back to true in
4371 // emitPrologue if it gets called and emits CFI.
4372 MF.setHasWinCFI(false);
4373
4374 MachineFrameInfo &MFI = MF.getFrameInfo();
4375 // If the frame is big enough that we might need to scavenge a register to
4376 // handle huge offsets, reserve a stack slot for that now.
4377 if (!isInt<32>(x: MFI.estimateStackSize(MF))) {
4378 int FI = MFI.CreateStackObject(Size: SlotSize, Alignment: Align(SlotSize), isSpillSlot: false);
4379 RS->addScavengingFrameIndex(FI);
4380 }
4381
4382 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
4383 // aligned. The format doesn't support misaligned stack adjustments.
4384 if (MF.getTarget().getMCAsmInfo().usesWindowsCFI())
4385 MF.getFrameInfo().ensureMaxAlignment(Alignment: Align(SlotSize));
4386
4387 // If this function isn't doing Win64-style C++ EH, we don't need to do
4388 // anything.
4389 if (STI.is64Bit() && MF.hasEHFunclets() &&
4390 classifyEHPersonality(Pers: MF.getFunction().getPersonalityFn()) ==
4391 EHPersonality::MSVC_CXX) {
4392 adjustFrameForMsvcCxxEh(MF);
4393 }
4394}
4395
4396void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
4397 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
4398 // relative to RSP after the prologue. Find the offset of the last fixed
4399 // object, so that we can allocate a slot immediately following it. If there
4400 // were no fixed objects, use offset -SlotSize, which is immediately after the
4401 // return address. Fixed objects have negative frame indices.
4402 MachineFrameInfo &MFI = MF.getFrameInfo();
4403 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
4404 int64_t MinFixedObjOffset = -SlotSize;
4405 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
4406 MinFixedObjOffset = std::min(a: MinFixedObjOffset, b: MFI.getObjectOffset(ObjectIdx: I));
4407
4408 for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
4409 for (WinEHHandlerType &H : TBME.HandlerArray) {
4410 int FrameIndex = H.CatchObj.FrameIndex;
4411 if ((FrameIndex != INT_MAX) && MFI.getObjectOffset(ObjectIdx: FrameIndex) == 0) {
4412 // Ensure alignment.
4413 unsigned Align = MFI.getObjectAlign(ObjectIdx: FrameIndex).value();
4414 MinFixedObjOffset -= std::abs(i: MinFixedObjOffset) % Align;
4415 MinFixedObjOffset -= MFI.getObjectSize(ObjectIdx: FrameIndex);
4416 MFI.setObjectOffset(ObjectIdx: FrameIndex, SPOffset: MinFixedObjOffset);
4417 }
4418 }
4419 }
4420
4421 // Ensure alignment.
4422 MinFixedObjOffset -= std::abs(i: MinFixedObjOffset) % 8;
4423 int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
4424 int UnwindHelpFI =
4425 MFI.CreateFixedObject(Size: SlotSize, SPOffset: UnwindHelpOffset, /*IsImmutable=*/false);
4426 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
4427
4428 // Store -2 into UnwindHelp on function entry. We have to scan forwards past
4429 // other frame setup instructions.
4430 MachineBasicBlock &MBB = MF.front();
4431 auto MBBI = MBB.begin();
4432 while (MBBI != MBB.end() && MBBI->getFlag(Flag: MachineInstr::FrameSetup))
4433 ++MBBI;
4434
4435 DebugLoc DL = MBB.findDebugLoc(MBBI);
4436 addFrameReference(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64mi32)),
4437 FI: UnwindHelpFI)
4438 .addImm(Val: -2);
4439}
4440
4441void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced(
4442 MachineFunction &MF, RegScavenger *RS) const {
4443 auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4444
4445 if (STI.is32Bit() && MF.hasEHFunclets())
4446 restoreWinEHStackPointersInParent(MF);
4447 // We have emitted prolog and epilog. Don't need stack pointer saving
4448 // instruction any more.
4449 if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) {
4450 MI->eraseFromParent();
4451 X86FI->setStackPtrSaveMI(nullptr);
4452 }
4453}
4454
4455void X86FrameLowering::restoreWinEHStackPointersInParent(
4456 MachineFunction &MF) const {
4457 // 32-bit functions have to restore stack pointers when control is transferred
4458 // back to the parent function. These blocks are identified as eh pads that
4459 // are not funclet entries.
4460 bool IsSEH = isAsynchronousEHPersonality(
4461 Pers: classifyEHPersonality(Pers: MF.getFunction().getPersonalityFn()));
4462 for (MachineBasicBlock &MBB : MF) {
4463 bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
4464 if (NeedsRestore)
4465 restoreWin32EHStackPointers(MBB, MBBI: MBB.begin(), DL: DebugLoc(),
4466 /*RestoreSP=*/IsSEH);
4467 }
4468}
4469
4470// Compute the alignment gap between current SP after spilling FP/BP and the
4471// next properly aligned stack offset.
4472static int computeFPBPAlignmentGap(MachineFunction &MF,
4473 const TargetRegisterClass *RC,
4474 unsigned NumSpilledRegs) {
4475 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
4476 unsigned AllocSize = TRI->getSpillSize(RC: *RC) * NumSpilledRegs;
4477 Align StackAlign = MF.getSubtarget().getFrameLowering()->getStackAlign();
4478 unsigned AlignedSize = alignTo(Size: AllocSize, A: StackAlign);
4479 return AlignedSize - AllocSize;
4480}
4481
4482void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
4483 MachineBasicBlock::iterator BeforeMI,
4484 Register FP, Register BP,
4485 int SPAdjust) const {
4486 assert(FP.isValid() || BP.isValid());
4487
4488 MachineBasicBlock *MBB = BeforeMI->getParent();
4489 DebugLoc DL = BeforeMI->getDebugLoc();
4490
4491 // Spill FP.
4492 if (FP.isValid()) {
4493 BuildMI(BB&: *MBB, I: BeforeMI, MIMD: DL,
4494 MCID: TII.get(Opcode: getPUSHOpcode(ST: MF.getSubtarget<X86Subtarget>())))
4495 .addReg(RegNo: FP);
4496 }
4497
4498 // Spill BP.
4499 if (BP.isValid()) {
4500 BuildMI(BB&: *MBB, I: BeforeMI, MIMD: DL,
4501 MCID: TII.get(Opcode: getPUSHOpcode(ST: MF.getSubtarget<X86Subtarget>())))
4502 .addReg(RegNo: BP);
4503 }
4504
4505 // Make sure SP is aligned.
4506 if (SPAdjust)
4507 emitSPUpdate(MBB&: *MBB, MBBI&: BeforeMI, DL, NumBytes: -SPAdjust, InEpilogue: false);
4508
4509 // Emit unwinding information.
4510 if (FP.isValid() && needsDwarfCFI(MF)) {
4511 // Emit .cfi_remember_state to remember old frame.
4512 unsigned CFIIndex =
4513 MF.addFrameInst(Inst: MCCFIInstruction::createRememberState(L: nullptr));
4514 BuildMI(BB&: *MBB, I: BeforeMI, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
4515 .addCFIIndex(CFIIndex);
4516
4517 // Setup new CFA value with DW_CFA_def_cfa_expression:
4518 // DW_OP_breg7+offset, DW_OP_deref, DW_OP_consts 16, DW_OP_plus
4519 SmallString<64> CfaExpr;
4520 uint8_t buffer[16];
4521 int Offset = SPAdjust;
4522 if (BP.isValid())
4523 Offset += TRI->getSpillSize(RC: *TRI->getMinimalPhysRegClass(Reg: BP));
4524 // If BeforeMI is a frame setup instruction, we need to adjust the position
4525 // and offset of the new cfi instruction.
4526 if (TII.isFrameSetup(I: *BeforeMI)) {
4527 Offset += alignTo(Size: TII.getFrameSize(I: *BeforeMI), A: getStackAlign());
4528 BeforeMI = std::next(x: BeforeMI);
4529 }
4530 Register StackPtr = TRI->getStackRegister();
4531 if (STI.isTarget64BitILP32())
4532 StackPtr = Register(getX86SubSuperRegister(Reg: StackPtr, Size: 64));
4533 unsigned DwarfStackPtr = TRI->getDwarfRegNum(Reg: StackPtr, isEH: true);
4534 CfaExpr.push_back(Elt: (uint8_t)(dwarf::DW_OP_breg0 + DwarfStackPtr));
4535 CfaExpr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: Offset, p: buffer));
4536 CfaExpr.push_back(Elt: dwarf::DW_OP_deref);
4537 CfaExpr.push_back(Elt: dwarf::DW_OP_consts);
4538 CfaExpr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: SlotSize * 2, p: buffer));
4539 CfaExpr.push_back(Elt: (uint8_t)dwarf::DW_OP_plus);
4540
4541 SmallString<64> DefCfaExpr;
4542 DefCfaExpr.push_back(Elt: dwarf::DW_CFA_def_cfa_expression);
4543 DefCfaExpr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: CfaExpr.size(), p: buffer));
4544 DefCfaExpr.append(RHS: CfaExpr.str());
4545 BuildCFI(MBB&: *MBB, MBBI: BeforeMI, DL,
4546 CFIInst: MCCFIInstruction::createEscape(L: nullptr, Vals: DefCfaExpr.str()),
4547 Flag: MachineInstr::FrameSetup);
4548 }
4549}
4550
4551void X86FrameLowering::restoreFPBPUsingSP(MachineFunction &MF,
4552 MachineBasicBlock::iterator AfterMI,
4553 Register FP, Register BP,
4554 int SPAdjust) const {
4555 assert(FP.isValid() || BP.isValid());
4556
4557 // Adjust SP so it points to spilled FP or BP.
4558 MachineBasicBlock *MBB = AfterMI->getParent();
4559 MachineBasicBlock::iterator Pos = std::next(x: AfterMI);
4560 DebugLoc DL = AfterMI->getDebugLoc();
4561 if (SPAdjust)
4562 emitSPUpdate(MBB&: *MBB, MBBI&: Pos, DL, NumBytes: SPAdjust, InEpilogue: false);
4563
4564 // Restore BP.
4565 if (BP.isValid()) {
4566 BuildMI(BB&: *MBB, I: Pos, MIMD: DL,
4567 MCID: TII.get(Opcode: getPOPOpcode(ST: MF.getSubtarget<X86Subtarget>())), DestReg: BP);
4568 }
4569
4570 // Restore FP.
4571 if (FP.isValid()) {
4572 BuildMI(BB&: *MBB, I: Pos, MIMD: DL,
4573 MCID: TII.get(Opcode: getPOPOpcode(ST: MF.getSubtarget<X86Subtarget>())), DestReg: FP);
4574
4575 // Emit unwinding information.
4576 if (needsDwarfCFI(MF)) {
4577 // Restore original frame with .cfi_restore_state.
4578 unsigned CFIIndex =
4579 MF.addFrameInst(Inst: MCCFIInstruction::createRestoreState(L: nullptr));
4580 BuildMI(BB&: *MBB, I: Pos, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
4581 .addCFIIndex(CFIIndex);
4582 }
4583 }
4584}
4585
4586void X86FrameLowering::saveAndRestoreFPBPUsingSP(
4587 MachineFunction &MF, MachineBasicBlock::iterator BeforeMI,
4588 MachineBasicBlock::iterator AfterMI, bool SpillFP, bool SpillBP) const {
4589 assert(SpillFP || SpillBP);
4590
4591 Register FP, BP;
4592 const TargetRegisterClass *RC;
4593 unsigned NumRegs = 0;
4594
4595 if (SpillFP) {
4596 FP = TRI->getFrameRegister(MF);
4597 if (STI.isTarget64BitILP32())
4598 FP = Register(getX86SubSuperRegister(Reg: FP, Size: 64));
4599 RC = TRI->getMinimalPhysRegClass(Reg: FP);
4600 ++NumRegs;
4601 }
4602 if (SpillBP) {
4603 BP = TRI->getBaseRegister();
4604 if (STI.isTarget64BitILP32())
4605 BP = Register(getX86SubSuperRegister(Reg: BP, Size: 64));
4606 RC = TRI->getMinimalPhysRegClass(Reg: BP);
4607 ++NumRegs;
4608 }
4609 int SPAdjust = computeFPBPAlignmentGap(MF, RC, NumSpilledRegs: NumRegs);
4610
4611 spillFPBPUsingSP(MF, BeforeMI, FP, BP, SPAdjust);
4612 restoreFPBPUsingSP(MF, AfterMI, FP, BP, SPAdjust);
4613}
4614
4615bool X86FrameLowering::skipSpillFPBP(
4616 MachineFunction &MF, MachineBasicBlock::reverse_iterator &MI) const {
4617 if (MI->getOpcode() == X86::LCMPXCHG16B_SAVE_RBX) {
4618 // The pseudo instruction LCMPXCHG16B_SAVE_RBX is generated in the form
4619 // SaveRbx = COPY RBX
4620 // SaveRbx = LCMPXCHG16B_SAVE_RBX ..., SaveRbx, implicit-def rbx
4621 // And later LCMPXCHG16B_SAVE_RBX is expanded to restore RBX from SaveRbx.
4622 // We should skip this instruction sequence.
4623 int FI;
4624 Register Reg;
4625 while (!(MI->getOpcode() == TargetOpcode::COPY &&
4626 MI->getOperand(i: 1).getReg() == X86::RBX) &&
4627 !((Reg = TII.isStoreToStackSlot(MI: *MI, FrameIndex&: FI)) && Reg == X86::RBX))
4628 ++MI;
4629 return true;
4630 }
4631 return false;
4632}
4633
4634static bool isFPBPAccess(const MachineInstr &MI, Register FP, Register BP,
4635 const TargetRegisterInfo *TRI, bool &AccessFP,
4636 bool &AccessBP) {
4637 AccessFP = AccessBP = false;
4638 if (FP) {
4639 if (MI.findRegisterUseOperandIdx(Reg: FP, TRI, isKill: false) != -1 ||
4640 MI.findRegisterDefOperandIdx(Reg: FP, TRI, isDead: false, Overlap: true) != -1)
4641 AccessFP = true;
4642 }
4643 if (BP) {
4644 if (MI.findRegisterUseOperandIdx(Reg: BP, TRI, isKill: false) != -1 ||
4645 MI.findRegisterDefOperandIdx(Reg: BP, TRI, isDead: false, Overlap: true) != -1)
4646 AccessBP = true;
4647 }
4648 return AccessFP || AccessBP;
4649}
4650
4651// Invoke instruction has been lowered to normal function call. We try to figure
4652// out if MI comes from Invoke.
4653// Do we have any better method?
4654static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels) {
4655 if (!MI.isCall())
4656 return false;
4657 if (InsideEHLabels)
4658 return true;
4659
4660 const MachineBasicBlock *MBB = MI.getParent();
4661 if (!MBB->hasEHPadSuccessor())
4662 return false;
4663
4664 // Check if there is another call instruction from MI to the end of MBB.
4665 MachineBasicBlock::const_iterator MBBI = MI, ME = MBB->end();
4666 for (++MBBI; MBBI != ME; ++MBBI)
4667 if (MBBI->isCall())
4668 return false;
4669 return true;
4670}
4671
4672/// Given the live range of FP or BP (DefMI, KillMI), check if there is any
4673/// interfered stack access in the range, usually generated by register spill.
4674void X86FrameLowering::checkInterferedAccess(
4675 MachineFunction &MF, MachineBasicBlock::reverse_iterator DefMI,
4676 MachineBasicBlock::reverse_iterator KillMI, bool SpillFP,
4677 bool SpillBP) const {
4678 if (DefMI == KillMI)
4679 return;
4680 if (TRI->hasBasePointer(MF)) {
4681 if (!SpillBP)
4682 return;
4683 } else {
4684 if (!SpillFP)
4685 return;
4686 }
4687
4688 auto MI = KillMI;
4689 while (MI != DefMI) {
4690 if (any_of(Range: MI->operands(),
4691 P: [](const MachineOperand &MO) { return MO.isFI(); }))
4692 MF.getContext().reportError(L: SMLoc(),
4693 Msg: "Interference usage of base pointer/frame "
4694 "pointer.");
4695 MI++;
4696 }
4697}
4698
4699/// If a function uses base pointer and the base pointer is clobbered by inline
4700/// asm, RA doesn't detect this case, and after the inline asm, the base pointer
4701/// contains garbage value.
4702/// For example if a 32b x86 function uses base pointer esi, and esi is
4703/// clobbered by following inline asm
4704/// asm("rep movsb" : "+D"(ptr), "+S"(x), "+c"(c)::"memory");
4705/// We need to save esi before the asm and restore it after the asm.
4706///
4707/// The problem can also occur to frame pointer if there is a function call, and
4708/// the callee uses a different calling convention and clobbers the fp.
4709///
4710/// Because normal frame objects (spill slots) are accessed through fp/bp
4711/// register, so we can't spill fp/bp to normal spill slots.
4712///
4713/// FIXME: There are 2 possible enhancements:
4714/// 1. In many cases there are different physical registers not clobbered by
4715/// inline asm, we can use one of them as base pointer. Or use a virtual
4716/// register as base pointer and let RA allocate a physical register to it.
4717/// 2. If there is no other instructions access stack with fp/bp from the
4718/// inline asm to the epilog, and no cfi requirement for a correct fp, we can
4719/// skip the save and restore operations.
4720void X86FrameLowering::spillFPBP(MachineFunction &MF) const {
4721 Register FP, BP;
4722 const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
4723 if (TFI.hasFP(MF))
4724 FP = TRI->getFrameRegister(MF);
4725 if (TRI->hasBasePointer(MF))
4726 BP = TRI->getBaseRegister();
4727
4728 // Currently only inline asm and function call can clobbers fp/bp. So we can
4729 // do some quick test and return early.
4730 if (!MF.hasInlineAsm()) {
4731 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4732 if (!X86FI->getFPClobberedByCall())
4733 FP = 0;
4734 if (!X86FI->getBPClobberedByCall())
4735 BP = 0;
4736 }
4737 if (!FP && !BP)
4738 return;
4739
4740 for (MachineBasicBlock &MBB : MF) {
4741 bool InsideEHLabels = false;
4742 auto MI = MBB.rbegin(), ME = MBB.rend();
4743 auto TermMI = MBB.getFirstTerminator();
4744 if (TermMI == MBB.begin())
4745 continue;
4746 MI = *(std::prev(x: TermMI));
4747
4748 while (MI != ME) {
4749 // Skip frame setup/destroy instructions.
4750 // Skip Invoke (call inside try block) instructions.
4751 // Skip instructions handled by target.
4752 if (MI->getFlag(Flag: MachineInstr::MIFlag::FrameSetup) ||
4753 MI->getFlag(Flag: MachineInstr::MIFlag::FrameDestroy) ||
4754 isInvoke(MI: *MI, InsideEHLabels) || skipSpillFPBP(MF, MI)) {
4755 ++MI;
4756 continue;
4757 }
4758
4759 if (MI->getOpcode() == TargetOpcode::EH_LABEL) {
4760 InsideEHLabels = !InsideEHLabels;
4761 ++MI;
4762 continue;
4763 }
4764
4765 bool AccessFP, AccessBP;
4766 // Check if fp or bp is used in MI.
4767 if (!isFPBPAccess(MI: *MI, FP, BP, TRI, AccessFP, AccessBP)) {
4768 ++MI;
4769 continue;
4770 }
4771
4772 // Look for the range [DefMI, KillMI] in which fp or bp is defined and
4773 // used.
4774 bool FPLive = false, BPLive = false;
4775 bool SpillFP = false, SpillBP = false;
4776 auto DefMI = MI, KillMI = MI;
4777 do {
4778 SpillFP |= AccessFP;
4779 SpillBP |= AccessBP;
4780
4781 // Maintain FPLive and BPLive.
4782 if (FPLive && MI->findRegisterDefOperandIdx(Reg: FP, TRI, isDead: false, Overlap: true) != -1)
4783 FPLive = false;
4784 if (FP && MI->findRegisterUseOperandIdx(Reg: FP, TRI, isKill: false) != -1)
4785 FPLive = true;
4786 if (BPLive && MI->findRegisterDefOperandIdx(Reg: BP, TRI, isDead: false, Overlap: true) != -1)
4787 BPLive = false;
4788 if (BP && MI->findRegisterUseOperandIdx(Reg: BP, TRI, isKill: false) != -1)
4789 BPLive = true;
4790
4791 DefMI = MI++;
4792 } while ((MI != ME) &&
4793 (FPLive || BPLive ||
4794 isFPBPAccess(MI: *MI, FP, BP, TRI, AccessFP, AccessBP)));
4795
4796 // Don't need to save/restore if FP is accessed through llvm.frameaddress.
4797 if (FPLive && !SpillBP)
4798 continue;
4799
4800 // If the bp is clobbered by a call, we should save and restore outside of
4801 // the frame setup instructions.
4802 if (KillMI->isCall() && DefMI != ME) {
4803 auto FrameSetup = std::next(x: DefMI);
4804 // Look for frame setup instruction toward the start of the BB.
4805 // If we reach another call instruction, it means no frame setup
4806 // instruction for the current call instruction.
4807 while (FrameSetup != ME && !TII.isFrameSetup(I: *FrameSetup) &&
4808 !FrameSetup->isCall())
4809 ++FrameSetup;
4810 // If a frame setup instruction is found, we need to find out the
4811 // corresponding frame destroy instruction.
4812 if (FrameSetup != ME && TII.isFrameSetup(I: *FrameSetup) &&
4813 (TII.getFrameSize(I: *FrameSetup) ||
4814 TII.getFrameAdjustment(I: *FrameSetup))) {
4815 while (!TII.isFrameInstr(I: *KillMI))
4816 --KillMI;
4817 DefMI = FrameSetup;
4818 MI = DefMI;
4819 ++MI;
4820 }
4821 }
4822
4823 checkInterferedAccess(MF, DefMI, KillMI, SpillFP, SpillBP);
4824
4825 // Call target function to spill and restore FP and BP registers.
4826 saveAndRestoreFPBPUsingSP(MF, BeforeMI: &(*DefMI), AfterMI: &(*KillMI), SpillFP, SpillBP);
4827 }
4828 }
4829}
4830