1 | //===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the X86 implementation of TargetFrameLowering class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "X86FrameLowering.h" |
14 | #include "MCTargetDesc/X86MCTargetDesc.h" |
15 | #include "X86InstrBuilder.h" |
16 | #include "X86InstrInfo.h" |
17 | #include "X86MachineFunctionInfo.h" |
18 | #include "X86Subtarget.h" |
19 | #include "X86TargetMachine.h" |
20 | #include "llvm/ADT/Statistic.h" |
21 | #include "llvm/CodeGen/LivePhysRegs.h" |
22 | #include "llvm/CodeGen/MachineFrameInfo.h" |
23 | #include "llvm/CodeGen/MachineFunction.h" |
24 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
25 | #include "llvm/CodeGen/MachineModuleInfo.h" |
26 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
27 | #include "llvm/CodeGen/WinEHFuncInfo.h" |
28 | #include "llvm/IR/DataLayout.h" |
29 | #include "llvm/IR/EHPersonalities.h" |
30 | #include "llvm/IR/Function.h" |
31 | #include "llvm/IR/Module.h" |
32 | #include "llvm/MC/MCAsmInfo.h" |
33 | #include "llvm/MC/MCObjectFileInfo.h" |
34 | #include "llvm/MC/MCSymbol.h" |
35 | #include "llvm/Support/LEB128.h" |
36 | #include "llvm/Target/TargetOptions.h" |
37 | #include <cstdlib> |
38 | |
39 | #define DEBUG_TYPE "x86-fl" |
40 | |
41 | STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue" ); |
42 | STATISTIC(, |
43 | "Number of extra stack probes generated in prologue" ); |
44 | STATISTIC(NumFunctionUsingPush2Pop2, "Number of functions using push2/pop2" ); |
45 | |
46 | using namespace llvm; |
47 | |
48 | X86FrameLowering::X86FrameLowering(const X86Subtarget &STI, |
49 | MaybeAlign StackAlignOverride) |
50 | : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(), |
51 | STI.is64Bit() ? -8 : -4), |
52 | STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) { |
53 | // Cache a bunch of frame-related predicates for this subtarget. |
54 | SlotSize = TRI->getSlotSize(); |
55 | Is64Bit = STI.is64Bit(); |
56 | IsLP64 = STI.isTarget64BitLP64(); |
57 | // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. |
58 | Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); |
59 | StackPtr = TRI->getStackRegister(); |
60 | } |
61 | |
62 | bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { |
63 | return !MF.getFrameInfo().hasVarSizedObjects() && |
64 | !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() && |
65 | !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall(); |
66 | } |
67 | |
68 | /// canSimplifyCallFramePseudos - If there is a reserved call frame, the |
69 | /// call frame pseudos can be simplified. Having a FP, as in the default |
70 | /// implementation, is not sufficient here since we can't always use it. |
71 | /// Use a more nuanced condition. |
72 | bool X86FrameLowering::canSimplifyCallFramePseudos( |
73 | const MachineFunction &MF) const { |
74 | return hasReservedCallFrame(MF) || |
75 | MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() || |
76 | (hasFP(MF) && !TRI->hasStackRealignment(MF)) || |
77 | TRI->hasBasePointer(MF); |
78 | } |
79 | |
80 | // needsFrameIndexResolution - Do we need to perform FI resolution for |
81 | // this function. Normally, this is required only when the function |
82 | // has any stack objects. However, FI resolution actually has another job, |
83 | // not apparent from the title - it resolves callframesetup/destroy |
84 | // that were not simplified earlier. |
85 | // So, this is required for x86 functions that have push sequences even |
86 | // when there are no stack objects. |
87 | bool X86FrameLowering::needsFrameIndexResolution( |
88 | const MachineFunction &MF) const { |
89 | return MF.getFrameInfo().hasStackObjects() || |
90 | MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); |
91 | } |
92 | |
93 | /// hasFPImpl - Return true if the specified function should have a dedicated |
94 | /// frame pointer register. This is true if the function has variable sized |
95 | /// allocas or if frame pointer elimination is disabled. |
96 | bool X86FrameLowering::hasFPImpl(const MachineFunction &MF) const { |
97 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
98 | return (MF.getTarget().Options.DisableFramePointerElim(MF) || |
99 | TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() || |
100 | MFI.isFrameAddressTaken() || MFI.hasOpaqueSPAdjustment() || |
101 | MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || |
102 | MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() || |
103 | MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() || |
104 | MFI.hasStackMap() || MFI.hasPatchPoint() || |
105 | (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment())); |
106 | } |
107 | |
108 | static unsigned getSUBriOpcode(bool IsLP64) { |
109 | return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri; |
110 | } |
111 | |
112 | static unsigned getADDriOpcode(bool IsLP64) { |
113 | return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri; |
114 | } |
115 | |
116 | static unsigned getSUBrrOpcode(bool IsLP64) { |
117 | return IsLP64 ? X86::SUB64rr : X86::SUB32rr; |
118 | } |
119 | |
120 | static unsigned getADDrrOpcode(bool IsLP64) { |
121 | return IsLP64 ? X86::ADD64rr : X86::ADD32rr; |
122 | } |
123 | |
124 | static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) { |
125 | return IsLP64 ? X86::AND64ri32 : X86::AND32ri; |
126 | } |
127 | |
128 | static unsigned getLEArOpcode(bool IsLP64) { |
129 | return IsLP64 ? X86::LEA64r : X86::LEA32r; |
130 | } |
131 | |
132 | static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) { |
133 | if (Use64BitReg) { |
134 | if (isUInt<32>(x: Imm)) |
135 | return X86::MOV32ri64; |
136 | if (isInt<32>(x: Imm)) |
137 | return X86::MOV64ri32; |
138 | return X86::MOV64ri; |
139 | } |
140 | return X86::MOV32ri; |
141 | } |
142 | |
143 | // Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the |
144 | // value written by the PUSH from the stack. The processor tracks these marked |
145 | // instructions internally and fast-forwards register data between matching PUSH |
146 | // and POP instructions, without going through memory or through the training |
147 | // loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient |
148 | // memory-renaming optimization can be used. |
149 | // |
150 | // The PPX hint is purely a performance hint. Instructions with this hint have |
151 | // the same functional semantics as those without. PPX hints set by the |
152 | // compiler that violate the balancing rule may turn off the PPX optimization, |
153 | // but they will not affect program semantics. |
154 | // |
155 | // Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp |
156 | // are not considered). |
157 | // |
158 | // PUSH2 and POP2 are instructions for (respectively) pushing/popping 2 |
159 | // GPRs at a time to/from the stack. |
160 | static unsigned getPUSHOpcode(const X86Subtarget &ST) { |
161 | return ST.is64Bit() ? (ST.hasPPX() ? X86::PUSHP64r : X86::PUSH64r) |
162 | : X86::PUSH32r; |
163 | } |
164 | static unsigned getPOPOpcode(const X86Subtarget &ST) { |
165 | return ST.is64Bit() ? (ST.hasPPX() ? X86::POPP64r : X86::POP64r) |
166 | : X86::POP32r; |
167 | } |
168 | static unsigned getPUSH2Opcode(const X86Subtarget &ST) { |
169 | return ST.hasPPX() ? X86::PUSH2P : X86::PUSH2; |
170 | } |
171 | static unsigned getPOP2Opcode(const X86Subtarget &ST) { |
172 | return ST.hasPPX() ? X86::POP2P : X86::POP2; |
173 | } |
174 | |
175 | static bool isEAXLiveIn(MachineBasicBlock &MBB) { |
176 | for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) { |
177 | MCRegister Reg = RegMask.PhysReg; |
178 | |
179 | if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX || |
180 | Reg == X86::AH || Reg == X86::AL) |
181 | return true; |
182 | } |
183 | |
184 | return false; |
185 | } |
186 | |
187 | /// Check if the flags need to be preserved before the terminators. |
188 | /// This would be the case, if the eflags is live-in of the region |
189 | /// composed by the terminators or live-out of that region, without |
190 | /// being defined by a terminator. |
191 | static bool |
192 | flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) { |
193 | for (const MachineInstr &MI : MBB.terminators()) { |
194 | bool BreakNext = false; |
195 | for (const MachineOperand &MO : MI.operands()) { |
196 | if (!MO.isReg()) |
197 | continue; |
198 | Register Reg = MO.getReg(); |
199 | if (Reg != X86::EFLAGS) |
200 | continue; |
201 | |
202 | // This terminator needs an eflags that is not defined |
203 | // by a previous another terminator: |
204 | // EFLAGS is live-in of the region composed by the terminators. |
205 | if (!MO.isDef()) |
206 | return true; |
207 | // This terminator defines the eflags, i.e., we don't need to preserve it. |
208 | // However, we still need to check this specific terminator does not |
209 | // read a live-in value. |
210 | BreakNext = true; |
211 | } |
212 | // We found a definition of the eflags, no need to preserve them. |
213 | if (BreakNext) |
214 | return false; |
215 | } |
216 | |
217 | // None of the terminators use or define the eflags. |
218 | // Check if they are live-out, that would imply we need to preserve them. |
219 | for (const MachineBasicBlock *Succ : MBB.successors()) |
220 | if (Succ->isLiveIn(Reg: X86::EFLAGS)) |
221 | return true; |
222 | |
223 | return false; |
224 | } |
225 | |
226 | constexpr int64_t MaxSPChunk = (1LL << 31) - 1; |
227 | |
228 | /// emitSPUpdate - Emit a series of instructions to increment / decrement the |
229 | /// stack pointer by a constant value. |
230 | void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, |
231 | MachineBasicBlock::iterator &MBBI, |
232 | const DebugLoc &DL, int64_t NumBytes, |
233 | bool InEpilogue) const { |
234 | bool isSub = NumBytes < 0; |
235 | uint64_t Offset = isSub ? -NumBytes : NumBytes; |
236 | MachineInstr::MIFlag Flag = |
237 | isSub ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy; |
238 | |
239 | if (!Uses64BitFramePtr && !isUInt<32>(x: Offset)) { |
240 | // We're being asked to adjust a 32-bit stack pointer by 4 GiB or more. |
241 | // This might be unreachable code, so don't complain now; just trap if |
242 | // it's reached at runtime. |
243 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::TRAP)); |
244 | return; |
245 | } |
246 | |
247 | uint64_t Chunk = MaxSPChunk; |
248 | |
249 | MachineFunction &MF = *MBB.getParent(); |
250 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
251 | const X86TargetLowering &TLI = *STI.getTargetLowering(); |
252 | const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF); |
253 | |
254 | // It's ok to not take into account large chunks when probing, as the |
255 | // allocation is split in smaller chunks anyway. |
256 | if (EmitInlineStackProbe && !InEpilogue) { |
257 | |
258 | // This pseudo-instruction is going to be expanded, potentially using a |
259 | // loop, by inlineStackProbe(). |
260 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::STACKALLOC_W_PROBING)).addImm(Val: Offset); |
261 | return; |
262 | } else if (Offset > Chunk) { |
263 | // Rather than emit a long series of instructions for large offsets, |
264 | // load the offset into a register and do one sub/add |
265 | unsigned Reg = 0; |
266 | unsigned Rax = (unsigned)(Uses64BitFramePtr ? X86::RAX : X86::EAX); |
267 | |
268 | if (isSub && !isEAXLiveIn(MBB)) |
269 | Reg = Rax; |
270 | else |
271 | Reg = getX86SubSuperRegister(Reg: TRI->findDeadCallerSavedReg(MBB, MBBI), |
272 | Size: Uses64BitFramePtr ? 64 : 32); |
273 | |
274 | unsigned AddSubRROpc = isSub ? getSUBrrOpcode(IsLP64: Uses64BitFramePtr) |
275 | : getADDrrOpcode(IsLP64: Uses64BitFramePtr); |
276 | if (Reg) { |
277 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: getMOVriOpcode(Use64BitReg: Uses64BitFramePtr, Imm: Offset)), |
278 | DestReg: Reg) |
279 | .addImm(Val: Offset) |
280 | .setMIFlag(Flag); |
281 | MachineInstr *MI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: AddSubRROpc), DestReg: StackPtr) |
282 | .addReg(RegNo: StackPtr) |
283 | .addReg(RegNo: Reg); |
284 | MI->getOperand(i: 3).setIsDead(); // The EFLAGS implicit def is dead. |
285 | return; |
286 | } else if (Offset > 8 * Chunk) { |
287 | // If we would need more than 8 add or sub instructions (a >16GB stack |
288 | // frame), it's worth spilling RAX to materialize this immediate. |
289 | // pushq %rax |
290 | // movabsq +-$Offset+-SlotSize, %rax |
291 | // addq %rsp, %rax |
292 | // xchg %rax, (%rsp) |
293 | // movq (%rsp), %rsp |
294 | assert(Uses64BitFramePtr && "can't have 32-bit 16GB stack frame" ); |
295 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH64r)) |
296 | .addReg(RegNo: Rax, flags: RegState::Kill) |
297 | .setMIFlag(Flag); |
298 | // Subtract is not commutative, so negate the offset and always use add. |
299 | // Subtract 8 less and add 8 more to account for the PUSH we just did. |
300 | if (isSub) |
301 | Offset = -(Offset - SlotSize); |
302 | else |
303 | Offset = Offset + SlotSize; |
304 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: getMOVriOpcode(Use64BitReg: Uses64BitFramePtr, Imm: Offset)), |
305 | DestReg: Rax) |
306 | .addImm(Val: Offset) |
307 | .setMIFlag(Flag); |
308 | MachineInstr *MI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::ADD64rr), DestReg: Rax) |
309 | .addReg(RegNo: Rax) |
310 | .addReg(RegNo: StackPtr); |
311 | MI->getOperand(i: 3).setIsDead(); // The EFLAGS implicit def is dead. |
312 | // Exchange the new SP in RAX with the top of the stack. |
313 | addRegOffset( |
314 | MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::XCHG64rm), DestReg: Rax).addReg(RegNo: Rax), |
315 | Reg: StackPtr, isKill: false, Offset: 0); |
316 | // Load new SP from the top of the stack into RSP. |
317 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64rm), DestReg: StackPtr), |
318 | Reg: StackPtr, isKill: false, Offset: 0); |
319 | return; |
320 | } |
321 | } |
322 | |
323 | while (Offset) { |
324 | uint64_t ThisVal = std::min(a: Offset, b: Chunk); |
325 | if (ThisVal == SlotSize) { |
326 | // Use push / pop for slot sized adjustments as a size optimization. We |
327 | // need to find a dead register when using pop. |
328 | unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX) |
329 | : TRI->findDeadCallerSavedReg(MBB, MBBI); |
330 | if (Reg) { |
331 | unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r) |
332 | : (Is64Bit ? X86::POP64r : X86::POP32r); |
333 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc)) |
334 | .addReg(RegNo: Reg, flags: getDefRegState(B: !isSub) | getUndefRegState(B: isSub)) |
335 | .setMIFlag(Flag); |
336 | Offset -= ThisVal; |
337 | continue; |
338 | } |
339 | } |
340 | |
341 | BuildStackAdjustment(MBB, MBBI, DL, Offset: isSub ? -ThisVal : ThisVal, InEpilogue) |
342 | .setMIFlag(Flag); |
343 | |
344 | Offset -= ThisVal; |
345 | } |
346 | } |
347 | |
348 | MachineInstrBuilder X86FrameLowering::BuildStackAdjustment( |
349 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
350 | const DebugLoc &DL, int64_t Offset, bool InEpilogue) const { |
351 | assert(Offset != 0 && "zero offset stack adjustment requested" ); |
352 | |
353 | // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue |
354 | // is tricky. |
355 | bool UseLEA; |
356 | if (!InEpilogue) { |
357 | // Check if inserting the prologue at the beginning |
358 | // of MBB would require to use LEA operations. |
359 | // We need to use LEA operations if EFLAGS is live in, because |
360 | // it means an instruction will read it before it gets defined. |
361 | UseLEA = STI.useLeaForSP() || MBB.isLiveIn(Reg: X86::EFLAGS); |
362 | } else { |
363 | // If we can use LEA for SP but we shouldn't, check that none |
364 | // of the terminators uses the eflags. Otherwise we will insert |
365 | // a ADD that will redefine the eflags and break the condition. |
366 | // Alternatively, we could move the ADD, but this may not be possible |
367 | // and is an optimization anyway. |
368 | UseLEA = canUseLEAForSPInEpilogue(MF: *MBB.getParent()); |
369 | if (UseLEA && !STI.useLeaForSP()) |
370 | UseLEA = flagsNeedToBePreservedBeforeTheTerminators(MBB); |
371 | // If that assert breaks, that means we do not do the right thing |
372 | // in canUseAsEpilogue. |
373 | assert((UseLEA || !flagsNeedToBePreservedBeforeTheTerminators(MBB)) && |
374 | "We shouldn't have allowed this insertion point" ); |
375 | } |
376 | |
377 | MachineInstrBuilder MI; |
378 | if (UseLEA) { |
379 | MI = addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, |
380 | MCID: TII.get(Opcode: getLEArOpcode(IsLP64: Uses64BitFramePtr)), |
381 | DestReg: StackPtr), |
382 | Reg: StackPtr, isKill: false, Offset); |
383 | } else { |
384 | bool IsSub = Offset < 0; |
385 | uint64_t AbsOffset = IsSub ? -Offset : Offset; |
386 | const unsigned Opc = IsSub ? getSUBriOpcode(IsLP64: Uses64BitFramePtr) |
387 | : getADDriOpcode(IsLP64: Uses64BitFramePtr); |
388 | MI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: StackPtr) |
389 | .addReg(RegNo: StackPtr) |
390 | .addImm(Val: AbsOffset); |
391 | MI->getOperand(i: 3).setIsDead(); // The EFLAGS implicit def is dead. |
392 | } |
393 | return MI; |
394 | } |
395 | |
396 | template <typename FoundT, typename CalcT> |
397 | int64_t X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, |
398 | MachineBasicBlock::iterator &MBBI, |
399 | FoundT FoundStackAdjust, |
400 | CalcT CalcNewOffset, |
401 | bool doMergeWithPrevious) const { |
402 | if ((doMergeWithPrevious && MBBI == MBB.begin()) || |
403 | (!doMergeWithPrevious && MBBI == MBB.end())) |
404 | return CalcNewOffset(0); |
405 | |
406 | MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(x: MBBI) : MBBI; |
407 | |
408 | PI = skipDebugInstructionsBackward(It: PI, Begin: MBB.begin()); |
409 | // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI |
410 | // instruction, and that there are no DBG_VALUE or other instructions between |
411 | // ADD/SUB/LEA and its corresponding CFI instruction. |
412 | /* TODO: Add support for the case where there are multiple CFI instructions |
413 | below the ADD/SUB/LEA, e.g.: |
414 | ... |
415 | add |
416 | cfi_def_cfa_offset |
417 | cfi_offset |
418 | ... |
419 | */ |
420 | if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction()) |
421 | PI = std::prev(x: PI); |
422 | |
423 | int64_t Offset = 0; |
424 | for (;;) { |
425 | unsigned Opc = PI->getOpcode(); |
426 | |
427 | if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) && |
428 | PI->getOperand(i: 0).getReg() == StackPtr) { |
429 | assert(PI->getOperand(1).getReg() == StackPtr); |
430 | Offset = PI->getOperand(i: 2).getImm(); |
431 | } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) && |
432 | PI->getOperand(i: 0).getReg() == StackPtr && |
433 | PI->getOperand(i: 1).getReg() == StackPtr && |
434 | PI->getOperand(i: 2).getImm() == 1 && |
435 | PI->getOperand(i: 3).getReg() == X86::NoRegister && |
436 | PI->getOperand(i: 5).getReg() == X86::NoRegister) { |
437 | // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg. |
438 | Offset = PI->getOperand(i: 4).getImm(); |
439 | } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) && |
440 | PI->getOperand(i: 0).getReg() == StackPtr) { |
441 | assert(PI->getOperand(1).getReg() == StackPtr); |
442 | Offset = -PI->getOperand(i: 2).getImm(); |
443 | } else |
444 | return CalcNewOffset(0); |
445 | |
446 | FoundStackAdjust(PI, Offset); |
447 | if (std::abs(i: (int64_t)CalcNewOffset(Offset)) < MaxSPChunk) |
448 | break; |
449 | |
450 | if (doMergeWithPrevious ? (PI == MBB.begin()) : (PI == MBB.end())) |
451 | return CalcNewOffset(0); |
452 | |
453 | PI = doMergeWithPrevious ? std::prev(x: PI) : std::next(x: PI); |
454 | } |
455 | |
456 | PI = MBB.erase(I: PI); |
457 | if (PI != MBB.end() && PI->isCFIInstruction()) { |
458 | auto CIs = MBB.getParent()->getFrameInstructions(); |
459 | MCCFIInstruction CI = CIs[PI->getOperand(i: 0).getCFIIndex()]; |
460 | if (CI.getOperation() == MCCFIInstruction::OpDefCfaOffset || |
461 | CI.getOperation() == MCCFIInstruction::OpAdjustCfaOffset) |
462 | PI = MBB.erase(I: PI); |
463 | } |
464 | if (!doMergeWithPrevious) |
465 | MBBI = skipDebugInstructionsForward(It: PI, End: MBB.end()); |
466 | |
467 | return CalcNewOffset(Offset); |
468 | } |
469 | |
470 | int64_t X86FrameLowering::mergeSPAdd(MachineBasicBlock &MBB, |
471 | MachineBasicBlock::iterator &MBBI, |
472 | int64_t AddOffset, |
473 | bool doMergeWithPrevious) const { |
474 | return mergeSPUpdates( |
475 | MBB, MBBI, CalcNewOffset: [AddOffset](int64_t Offset) { return AddOffset + Offset; }, |
476 | doMergeWithPrevious); |
477 | } |
478 | |
479 | void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB, |
480 | MachineBasicBlock::iterator MBBI, |
481 | const DebugLoc &DL, |
482 | const MCCFIInstruction &CFIInst, |
483 | MachineInstr::MIFlag Flag) const { |
484 | MachineFunction &MF = *MBB.getParent(); |
485 | unsigned CFIIndex = MF.addFrameInst(Inst: CFIInst); |
486 | |
487 | if (CFIInst.getOperation() == MCCFIInstruction::OpAdjustCfaOffset) |
488 | MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true); |
489 | |
490 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
491 | .addCFIIndex(CFIIndex) |
492 | .setMIFlag(Flag); |
493 | } |
494 | |
495 | /// Emits Dwarf Info specifying offsets of callee saved registers and |
496 | /// frame pointer. This is called only when basic block sections are enabled. |
497 | void X86FrameLowering::emitCalleeSavedFrameMovesFullCFA( |
498 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { |
499 | MachineFunction &MF = *MBB.getParent(); |
500 | if (!hasFP(MF)) { |
501 | emitCalleeSavedFrameMoves(MBB, MBBI, DL: DebugLoc{}, IsPrologue: true); |
502 | return; |
503 | } |
504 | const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo(); |
505 | const Register FramePtr = TRI->getFrameRegister(MF); |
506 | const Register MachineFramePtr = |
507 | STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(Reg: FramePtr, Size: 64)) |
508 | : FramePtr; |
509 | unsigned DwarfReg = MRI->getDwarfRegNum(RegNum: MachineFramePtr, isEH: true); |
510 | // Offset = space for return address + size of the frame pointer itself. |
511 | int64_t Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4); |
512 | BuildCFI(MBB, MBBI, DL: DebugLoc{}, |
513 | CFIInst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfReg, Offset: -Offset)); |
514 | emitCalleeSavedFrameMoves(MBB, MBBI, DL: DebugLoc{}, IsPrologue: true); |
515 | } |
516 | |
517 | void X86FrameLowering::emitCalleeSavedFrameMoves( |
518 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
519 | const DebugLoc &DL, bool IsPrologue) const { |
520 | MachineFunction &MF = *MBB.getParent(); |
521 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
522 | const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo(); |
523 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
524 | |
525 | // Add callee saved registers to move list. |
526 | const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); |
527 | |
528 | // Calculate offsets. |
529 | for (const CalleeSavedInfo &I : CSI) { |
530 | int64_t Offset = MFI.getObjectOffset(ObjectIdx: I.getFrameIdx()); |
531 | MCRegister Reg = I.getReg(); |
532 | unsigned DwarfReg = MRI->getDwarfRegNum(RegNum: Reg, isEH: true); |
533 | |
534 | if (IsPrologue) { |
535 | if (X86FI->getStackPtrSaveMI()) { |
536 | // +2*SlotSize because there is return address and ebp at the bottom |
537 | // of the stack. |
538 | // | retaddr | |
539 | // | ebp | |
540 | // | |<--ebp |
541 | Offset += 2 * SlotSize; |
542 | SmallString<64> CfaExpr; |
543 | CfaExpr.push_back(Elt: dwarf::DW_CFA_expression); |
544 | uint8_t buffer[16]; |
545 | CfaExpr.append(in_start: buffer, in_end: buffer + encodeULEB128(Value: DwarfReg, p: buffer)); |
546 | CfaExpr.push_back(Elt: 2); |
547 | Register FramePtr = TRI->getFrameRegister(MF); |
548 | const Register MachineFramePtr = |
549 | STI.isTarget64BitILP32() |
550 | ? Register(getX86SubSuperRegister(Reg: FramePtr, Size: 64)) |
551 | : FramePtr; |
552 | unsigned DwarfFramePtr = MRI->getDwarfRegNum(RegNum: MachineFramePtr, isEH: true); |
553 | CfaExpr.push_back(Elt: (uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr)); |
554 | CfaExpr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: Offset, p: buffer)); |
555 | BuildCFI(MBB, MBBI, DL, |
556 | CFIInst: MCCFIInstruction::createEscape(L: nullptr, Vals: CfaExpr.str()), |
557 | Flag: MachineInstr::FrameSetup); |
558 | } else { |
559 | BuildCFI(MBB, MBBI, DL, |
560 | CFIInst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfReg, Offset)); |
561 | } |
562 | } else { |
563 | BuildCFI(MBB, MBBI, DL, |
564 | CFIInst: MCCFIInstruction::createRestore(L: nullptr, Register: DwarfReg)); |
565 | } |
566 | } |
567 | if (auto *MI = X86FI->getStackPtrSaveMI()) { |
568 | int FI = MI->getOperand(i: 1).getIndex(); |
569 | int64_t Offset = MFI.getObjectOffset(ObjectIdx: FI) + 2 * SlotSize; |
570 | SmallString<64> CfaExpr; |
571 | Register FramePtr = TRI->getFrameRegister(MF); |
572 | const Register MachineFramePtr = |
573 | STI.isTarget64BitILP32() |
574 | ? Register(getX86SubSuperRegister(Reg: FramePtr, Size: 64)) |
575 | : FramePtr; |
576 | unsigned DwarfFramePtr = MRI->getDwarfRegNum(RegNum: MachineFramePtr, isEH: true); |
577 | CfaExpr.push_back(Elt: (uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr)); |
578 | uint8_t buffer[16]; |
579 | CfaExpr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: Offset, p: buffer)); |
580 | CfaExpr.push_back(Elt: dwarf::DW_OP_deref); |
581 | |
582 | SmallString<64> DefCfaExpr; |
583 | DefCfaExpr.push_back(Elt: dwarf::DW_CFA_def_cfa_expression); |
584 | DefCfaExpr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: CfaExpr.size(), p: buffer)); |
585 | DefCfaExpr.append(RHS: CfaExpr.str()); |
586 | // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref |
587 | BuildCFI(MBB, MBBI, DL, |
588 | CFIInst: MCCFIInstruction::createEscape(L: nullptr, Vals: DefCfaExpr.str()), |
589 | Flag: MachineInstr::FrameSetup); |
590 | } |
591 | } |
592 | |
593 | void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, |
594 | MachineBasicBlock &MBB) const { |
595 | const MachineFunction &MF = *MBB.getParent(); |
596 | |
597 | // Insertion point. |
598 | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); |
599 | |
600 | // Fake a debug loc. |
601 | DebugLoc DL; |
602 | if (MBBI != MBB.end()) |
603 | DL = MBBI->getDebugLoc(); |
604 | |
605 | // Zero out FP stack if referenced. Do this outside of the loop below so that |
606 | // it's done only once. |
607 | const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); |
608 | for (MCRegister Reg : RegsToZero.set_bits()) { |
609 | if (!X86::RFP80RegClass.contains(Reg)) |
610 | continue; |
611 | |
612 | unsigned NumFPRegs = ST.is64Bit() ? 8 : 7; |
613 | for (unsigned i = 0; i != NumFPRegs; ++i) |
614 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::LD_F0)); |
615 | |
616 | for (unsigned i = 0; i != NumFPRegs; ++i) |
617 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::ST_FPrr)).addReg(RegNo: X86::ST0); |
618 | break; |
619 | } |
620 | |
621 | // For GPRs, we only care to clear out the 32-bit register. |
622 | BitVector GPRsToZero(TRI->getNumRegs()); |
623 | for (MCRegister Reg : RegsToZero.set_bits()) |
624 | if (TRI->isGeneralPurposeRegister(MF, Reg)) { |
625 | GPRsToZero.set(getX86SubSuperRegister(Reg, Size: 32)); |
626 | RegsToZero.reset(Idx: Reg); |
627 | } |
628 | |
629 | // Zero out the GPRs first. |
630 | for (MCRegister Reg : GPRsToZero.set_bits()) |
631 | TII.buildClearRegister(Reg, MBB, Iter: MBBI, DL); |
632 | |
633 | // Zero out the remaining registers. |
634 | for (MCRegister Reg : RegsToZero.set_bits()) |
635 | TII.buildClearRegister(Reg, MBB, Iter: MBBI, DL); |
636 | } |
637 | |
638 | void X86FrameLowering::emitStackProbe( |
639 | MachineFunction &MF, MachineBasicBlock &MBB, |
640 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, |
641 | std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const { |
642 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
643 | if (STI.isTargetWindowsCoreCLR()) { |
644 | if (InProlog) { |
645 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::STACKALLOC_W_PROBING)) |
646 | .addImm(Val: 0 /* no explicit stack size */); |
647 | } else { |
648 | emitStackProbeInline(MF, MBB, MBBI, DL, InProlog: false); |
649 | } |
650 | } else { |
651 | emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum); |
652 | } |
653 | } |
654 | |
655 | bool X86FrameLowering::stackProbeFunctionModifiesSP() const { |
656 | return STI.isOSWindows() && !STI.isTargetWin64(); |
657 | } |
658 | |
659 | void X86FrameLowering::inlineStackProbe(MachineFunction &MF, |
660 | MachineBasicBlock &PrologMBB) const { |
661 | auto Where = llvm::find_if(Range&: PrologMBB, P: [](MachineInstr &MI) { |
662 | return MI.getOpcode() == X86::STACKALLOC_W_PROBING; |
663 | }); |
664 | if (Where != PrologMBB.end()) { |
665 | DebugLoc DL = PrologMBB.findDebugLoc(MBBI: Where); |
666 | emitStackProbeInline(MF, MBB&: PrologMBB, MBBI: Where, DL, InProlog: true); |
667 | Where->eraseFromParent(); |
668 | } |
669 | } |
670 | |
671 | void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, |
672 | MachineBasicBlock &MBB, |
673 | MachineBasicBlock::iterator MBBI, |
674 | const DebugLoc &DL, |
675 | bool InProlog) const { |
676 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
677 | if (STI.isTargetWindowsCoreCLR() && STI.is64Bit()) |
678 | emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog); |
679 | else |
680 | emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog); |
681 | } |
682 | |
683 | void X86FrameLowering::emitStackProbeInlineGeneric( |
684 | MachineFunction &MF, MachineBasicBlock &MBB, |
685 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const { |
686 | MachineInstr &AllocWithProbe = *MBBI; |
687 | uint64_t Offset = AllocWithProbe.getOperand(i: 0).getImm(); |
688 | |
689 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
690 | const X86TargetLowering &TLI = *STI.getTargetLowering(); |
691 | assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) && |
692 | "different expansion expected for CoreCLR 64 bit" ); |
693 | |
694 | const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); |
695 | uint64_t ProbeChunk = StackProbeSize * 8; |
696 | |
697 | uint64_t MaxAlign = |
698 | TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0; |
699 | |
700 | // Synthesize a loop or unroll it, depending on the number of iterations. |
701 | // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left |
702 | // between the unaligned rsp and current rsp. |
703 | if (Offset > ProbeChunk) { |
704 | emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset, |
705 | Align: MaxAlign % StackProbeSize); |
706 | } else { |
707 | emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset, |
708 | Align: MaxAlign % StackProbeSize); |
709 | } |
710 | } |
711 | |
712 | void X86FrameLowering::emitStackProbeInlineGenericBlock( |
713 | MachineFunction &MF, MachineBasicBlock &MBB, |
714 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset, |
715 | uint64_t AlignOffset) const { |
716 | |
717 | const bool NeedsDwarfCFI = needsDwarfCFI(MF); |
718 | const bool HasFP = hasFP(MF); |
719 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
720 | const X86TargetLowering &TLI = *STI.getTargetLowering(); |
721 | const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; |
722 | const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); |
723 | |
724 | uint64_t CurrentOffset = 0; |
725 | |
726 | assert(AlignOffset < StackProbeSize); |
727 | |
728 | // If the offset is so small it fits within a page, there's nothing to do. |
729 | if (StackProbeSize < Offset + AlignOffset) { |
730 | |
731 | uint64_t StackAdjustment = StackProbeSize - AlignOffset; |
732 | BuildStackAdjustment(MBB, MBBI, DL, Offset: -StackAdjustment, /*InEpilogue=*/false) |
733 | .setMIFlag(MachineInstr::FrameSetup); |
734 | if (!HasFP && NeedsDwarfCFI) { |
735 | BuildCFI( |
736 | MBB, MBBI, DL, |
737 | CFIInst: MCCFIInstruction::createAdjustCfaOffset(L: nullptr, Adjustment: StackAdjustment)); |
738 | } |
739 | |
740 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: MovMIOpc)) |
741 | .setMIFlag(MachineInstr::FrameSetup), |
742 | Reg: StackPtr, isKill: false, Offset: 0) |
743 | .addImm(Val: 0) |
744 | .setMIFlag(MachineInstr::FrameSetup); |
745 | NumFrameExtraProbe++; |
746 | CurrentOffset = StackProbeSize - AlignOffset; |
747 | } |
748 | |
749 | // For the next N - 1 pages, just probe. I tried to take advantage of |
750 | // natural probes but it implies much more logic and there was very few |
751 | // interesting natural probes to interleave. |
752 | while (CurrentOffset + StackProbeSize < Offset) { |
753 | BuildStackAdjustment(MBB, MBBI, DL, Offset: -StackProbeSize, /*InEpilogue=*/false) |
754 | .setMIFlag(MachineInstr::FrameSetup); |
755 | |
756 | if (!HasFP && NeedsDwarfCFI) { |
757 | BuildCFI( |
758 | MBB, MBBI, DL, |
759 | CFIInst: MCCFIInstruction::createAdjustCfaOffset(L: nullptr, Adjustment: StackProbeSize)); |
760 | } |
761 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: MovMIOpc)) |
762 | .setMIFlag(MachineInstr::FrameSetup), |
763 | Reg: StackPtr, isKill: false, Offset: 0) |
764 | .addImm(Val: 0) |
765 | .setMIFlag(MachineInstr::FrameSetup); |
766 | NumFrameExtraProbe++; |
767 | CurrentOffset += StackProbeSize; |
768 | } |
769 | |
770 | // No need to probe the tail, it is smaller than a Page. |
771 | uint64_t ChunkSize = Offset - CurrentOffset; |
772 | if (ChunkSize == SlotSize) { |
773 | // Use push for slot sized adjustments as a size optimization, |
774 | // like emitSPUpdate does when not probing. |
775 | unsigned Reg = Is64Bit ? X86::RAX : X86::EAX; |
776 | unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r; |
777 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc)) |
778 | .addReg(RegNo: Reg, flags: RegState::Undef) |
779 | .setMIFlag(MachineInstr::FrameSetup); |
780 | } else { |
781 | BuildStackAdjustment(MBB, MBBI, DL, Offset: -ChunkSize, /*InEpilogue=*/false) |
782 | .setMIFlag(MachineInstr::FrameSetup); |
783 | } |
784 | // No need to adjust Dwarf CFA offset here, the last position of the stack has |
785 | // been defined |
786 | } |
787 | |
788 | void X86FrameLowering::emitStackProbeInlineGenericLoop( |
789 | MachineFunction &MF, MachineBasicBlock &MBB, |
790 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset, |
791 | uint64_t AlignOffset) const { |
792 | assert(Offset && "null offset" ); |
793 | |
794 | assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) != |
795 | MachineBasicBlock::LQR_Live && |
796 | "Inline stack probe loop will clobber live EFLAGS." ); |
797 | |
798 | const bool NeedsDwarfCFI = needsDwarfCFI(MF); |
799 | const bool HasFP = hasFP(MF); |
800 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
801 | const X86TargetLowering &TLI = *STI.getTargetLowering(); |
802 | const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; |
803 | const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); |
804 | |
805 | if (AlignOffset) { |
806 | if (AlignOffset < StackProbeSize) { |
807 | // Perform a first smaller allocation followed by a probe. |
808 | BuildStackAdjustment(MBB, MBBI, DL, Offset: -AlignOffset, /*InEpilogue=*/false) |
809 | .setMIFlag(MachineInstr::FrameSetup); |
810 | |
811 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: MovMIOpc)) |
812 | .setMIFlag(MachineInstr::FrameSetup), |
813 | Reg: StackPtr, isKill: false, Offset: 0) |
814 | .addImm(Val: 0) |
815 | .setMIFlag(MachineInstr::FrameSetup); |
816 | NumFrameExtraProbe++; |
817 | Offset -= AlignOffset; |
818 | } |
819 | } |
820 | |
821 | // Synthesize a loop |
822 | NumFrameLoopProbe++; |
823 | const BasicBlock *LLVM_BB = MBB.getBasicBlock(); |
824 | |
825 | MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(BB: LLVM_BB); |
826 | MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(BB: LLVM_BB); |
827 | |
828 | MachineFunction::iterator MBBIter = ++MBB.getIterator(); |
829 | MF.insert(MBBI: MBBIter, MBB: testMBB); |
830 | MF.insert(MBBI: MBBIter, MBB: tailMBB); |
831 | |
832 | Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 |
833 | : Is64Bit ? X86::R11D |
834 | : X86::EAX; |
835 | |
836 | // save loop bound |
837 | { |
838 | const uint64_t BoundOffset = alignDown(Value: Offset, Align: StackProbeSize); |
839 | |
840 | // Can we calculate the loop bound using SUB with a 32-bit immediate? |
841 | // Note that the immediate gets sign-extended when used with a 64-bit |
842 | // register, so in that case we only have 31 bits to work with. |
843 | bool canUseSub = |
844 | Uses64BitFramePtr ? isUInt<31>(x: BoundOffset) : isUInt<32>(x: BoundOffset); |
845 | |
846 | if (canUseSub) { |
847 | const unsigned SUBOpc = getSUBriOpcode(IsLP64: Uses64BitFramePtr); |
848 | |
849 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: FinalStackProbed) |
850 | .addReg(RegNo: StackPtr) |
851 | .setMIFlag(MachineInstr::FrameSetup); |
852 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: SUBOpc), DestReg: FinalStackProbed) |
853 | .addReg(RegNo: FinalStackProbed) |
854 | .addImm(Val: BoundOffset) |
855 | .setMIFlag(MachineInstr::FrameSetup); |
856 | } else if (Uses64BitFramePtr) { |
857 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64ri), DestReg: FinalStackProbed) |
858 | .addImm(Val: -BoundOffset) |
859 | .setMIFlag(MachineInstr::FrameSetup); |
860 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::ADD64rr), DestReg: FinalStackProbed) |
861 | .addReg(RegNo: FinalStackProbed) |
862 | .addReg(RegNo: StackPtr) |
863 | .setMIFlag(MachineInstr::FrameSetup); |
864 | } else { |
865 | llvm_unreachable("Offset too large for 32-bit stack pointer" ); |
866 | } |
867 | |
868 | // while in the loop, use loop-invariant reg for CFI, |
869 | // instead of the stack pointer, which changes during the loop |
870 | if (!HasFP && NeedsDwarfCFI) { |
871 | // x32 uses the same DWARF register numbers as x86-64, |
872 | // so there isn't a register number for r11d, we must use r11 instead |
873 | const Register DwarfFinalStackProbed = |
874 | STI.isTarget64BitILP32() |
875 | ? Register(getX86SubSuperRegister(Reg: FinalStackProbed, Size: 64)) |
876 | : FinalStackProbed; |
877 | |
878 | BuildCFI(MBB, MBBI, DL, |
879 | CFIInst: MCCFIInstruction::createDefCfaRegister( |
880 | L: nullptr, Register: TRI->getDwarfRegNum(RegNum: DwarfFinalStackProbed, isEH: true))); |
881 | BuildCFI(MBB, MBBI, DL, |
882 | CFIInst: MCCFIInstruction::createAdjustCfaOffset(L: nullptr, Adjustment: BoundOffset)); |
883 | } |
884 | } |
885 | |
886 | // allocate a page |
887 | BuildStackAdjustment(MBB&: *testMBB, MBBI: testMBB->end(), DL, Offset: -StackProbeSize, |
888 | /*InEpilogue=*/false) |
889 | .setMIFlag(MachineInstr::FrameSetup); |
890 | |
891 | // touch the page |
892 | addRegOffset(MIB: BuildMI(BB: testMBB, MIMD: DL, MCID: TII.get(Opcode: MovMIOpc)) |
893 | .setMIFlag(MachineInstr::FrameSetup), |
894 | Reg: StackPtr, isKill: false, Offset: 0) |
895 | .addImm(Val: 0) |
896 | .setMIFlag(MachineInstr::FrameSetup); |
897 | |
898 | // cmp with stack pointer bound |
899 | BuildMI(BB: testMBB, MIMD: DL, MCID: TII.get(Opcode: Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) |
900 | .addReg(RegNo: StackPtr) |
901 | .addReg(RegNo: FinalStackProbed) |
902 | .setMIFlag(MachineInstr::FrameSetup); |
903 | |
904 | // jump |
905 | BuildMI(BB: testMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1)) |
906 | .addMBB(MBB: testMBB) |
907 | .addImm(Val: X86::COND_NE) |
908 | .setMIFlag(MachineInstr::FrameSetup); |
909 | testMBB->addSuccessor(Succ: testMBB); |
910 | testMBB->addSuccessor(Succ: tailMBB); |
911 | |
912 | // BB management |
913 | tailMBB->splice(Where: tailMBB->end(), Other: &MBB, From: MBBI, To: MBB.end()); |
914 | tailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB); |
915 | MBB.addSuccessor(Succ: testMBB); |
916 | |
917 | // handle tail |
918 | const uint64_t TailOffset = Offset % StackProbeSize; |
919 | MachineBasicBlock::iterator TailMBBIter = tailMBB->begin(); |
920 | if (TailOffset) { |
921 | BuildStackAdjustment(MBB&: *tailMBB, MBBI: TailMBBIter, DL, Offset: -TailOffset, |
922 | /*InEpilogue=*/false) |
923 | .setMIFlag(MachineInstr::FrameSetup); |
924 | } |
925 | |
926 | // after the loop, switch back to stack pointer for CFI |
927 | if (!HasFP && NeedsDwarfCFI) { |
928 | // x32 uses the same DWARF register numbers as x86-64, |
929 | // so there isn't a register number for esp, we must use rsp instead |
930 | const Register DwarfStackPtr = |
931 | STI.isTarget64BitILP32() |
932 | ? Register(getX86SubSuperRegister(Reg: StackPtr, Size: 64)) |
933 | : Register(StackPtr); |
934 | |
935 | BuildCFI(MBB&: *tailMBB, MBBI: TailMBBIter, DL, |
936 | CFIInst: MCCFIInstruction::createDefCfaRegister( |
937 | L: nullptr, Register: TRI->getDwarfRegNum(RegNum: DwarfStackPtr, isEH: true))); |
938 | } |
939 | |
940 | // Update Live In information |
941 | fullyRecomputeLiveIns(MBBs: {tailMBB, testMBB}); |
942 | } |
943 | |
944 | void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64( |
945 | MachineFunction &MF, MachineBasicBlock &MBB, |
946 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const { |
947 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
948 | assert(STI.is64Bit() && "different expansion needed for 32 bit" ); |
949 | assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR" ); |
950 | const TargetInstrInfo &TII = *STI.getInstrInfo(); |
951 | const BasicBlock *LLVM_BB = MBB.getBasicBlock(); |
952 | |
953 | assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) != |
954 | MachineBasicBlock::LQR_Live && |
955 | "Inline stack probe loop will clobber live EFLAGS." ); |
956 | |
957 | // RAX contains the number of bytes of desired stack adjustment. |
958 | // The handling here assumes this value has already been updated so as to |
959 | // maintain stack alignment. |
960 | // |
961 | // We need to exit with RSP modified by this amount and execute suitable |
962 | // page touches to notify the OS that we're growing the stack responsibly. |
963 | // All stack probing must be done without modifying RSP. |
964 | // |
965 | // MBB: |
966 | // SizeReg = RAX; |
967 | // ZeroReg = 0 |
968 | // CopyReg = RSP |
969 | // Flags, TestReg = CopyReg - SizeReg |
970 | // FinalReg = !Flags.Ovf ? TestReg : ZeroReg |
971 | // LimitReg = gs magic thread env access |
972 | // if FinalReg >= LimitReg goto ContinueMBB |
973 | // RoundBB: |
974 | // RoundReg = page address of FinalReg |
975 | // LoopMBB: |
976 | // LoopReg = PHI(LimitReg,ProbeReg) |
977 | // ProbeReg = LoopReg - PageSize |
978 | // [ProbeReg] = 0 |
979 | // if (ProbeReg > RoundReg) goto LoopMBB |
980 | // ContinueMBB: |
981 | // RSP = RSP - RAX |
982 | // [rest of original MBB] |
983 | |
984 | // Set up the new basic blocks |
985 | MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(BB: LLVM_BB); |
986 | MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(BB: LLVM_BB); |
987 | MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(BB: LLVM_BB); |
988 | |
989 | MachineFunction::iterator MBBIter = std::next(x: MBB.getIterator()); |
990 | MF.insert(MBBI: MBBIter, MBB: RoundMBB); |
991 | MF.insert(MBBI: MBBIter, MBB: LoopMBB); |
992 | MF.insert(MBBI: MBBIter, MBB: ContinueMBB); |
993 | |
994 | // Split MBB and move the tail portion down to ContinueMBB. |
995 | MachineBasicBlock::iterator BeforeMBBI = std::prev(x: MBBI); |
996 | ContinueMBB->splice(Where: ContinueMBB->begin(), Other: &MBB, From: MBBI, To: MBB.end()); |
997 | ContinueMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB); |
998 | |
999 | // Some useful constants |
1000 | const int64_t ThreadEnvironmentStackLimit = 0x10; |
1001 | const int64_t PageSize = 0x1000; |
1002 | const int64_t PageMask = ~(PageSize - 1); |
1003 | |
1004 | // Registers we need. For the normal case we use virtual |
1005 | // registers. For the prolog expansion we use RAX, RCX and RDX. |
1006 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
1007 | const TargetRegisterClass *RegClass = &X86::GR64RegClass; |
1008 | const Register |
1009 | SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister(RegClass), |
1010 | ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass), |
1011 | CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass), |
1012 | TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass), |
1013 | FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass), |
1014 | RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass), |
1015 | LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass), |
1016 | JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass), |
1017 | ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass); |
1018 | |
1019 | // SP-relative offsets where we can save RCX and RDX. |
1020 | int64_t RCXShadowSlot = 0; |
1021 | int64_t RDXShadowSlot = 0; |
1022 | |
1023 | // If inlining in the prolog, save RCX and RDX. |
1024 | if (InProlog) { |
1025 | // Compute the offsets. We need to account for things already |
1026 | // pushed onto the stack at this point: return address, frame |
1027 | // pointer (if used), and callee saves. |
1028 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
1029 | const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize(); |
1030 | const bool HasFP = hasFP(MF); |
1031 | |
1032 | // Check if we need to spill RCX and/or RDX. |
1033 | // Here we assume that no earlier prologue instruction changes RCX and/or |
1034 | // RDX, so checking the block live-ins is enough. |
1035 | const bool IsRCXLiveIn = MBB.isLiveIn(Reg: X86::RCX); |
1036 | const bool IsRDXLiveIn = MBB.isLiveIn(Reg: X86::RDX); |
1037 | int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0); |
1038 | // Assign the initial slot to both registers, then change RDX's slot if both |
1039 | // need to be spilled. |
1040 | if (IsRCXLiveIn) |
1041 | RCXShadowSlot = InitSlot; |
1042 | if (IsRDXLiveIn) |
1043 | RDXShadowSlot = InitSlot; |
1044 | if (IsRDXLiveIn && IsRCXLiveIn) |
1045 | RDXShadowSlot += 8; |
1046 | // Emit the saves if needed. |
1047 | if (IsRCXLiveIn) |
1048 | addRegOffset(MIB: BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64mr)), Reg: X86::RSP, isKill: false, |
1049 | Offset: RCXShadowSlot) |
1050 | .addReg(RegNo: X86::RCX); |
1051 | if (IsRDXLiveIn) |
1052 | addRegOffset(MIB: BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64mr)), Reg: X86::RSP, isKill: false, |
1053 | Offset: RDXShadowSlot) |
1054 | .addReg(RegNo: X86::RDX); |
1055 | } else { |
1056 | // Not in the prolog. Copy RAX to a virtual reg. |
1057 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64rr), DestReg: SizeReg).addReg(RegNo: X86::RAX); |
1058 | } |
1059 | |
1060 | // Add code to MBB to check for overflow and set the new target stack pointer |
1061 | // to zero if so. |
1062 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::XOR64rr), DestReg: ZeroReg) |
1063 | .addReg(RegNo: ZeroReg, flags: RegState::Undef) |
1064 | .addReg(RegNo: ZeroReg, flags: RegState::Undef); |
1065 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64rr), DestReg: CopyReg).addReg(RegNo: X86::RSP); |
1066 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::SUB64rr), DestReg: TestReg) |
1067 | .addReg(RegNo: CopyReg) |
1068 | .addReg(RegNo: SizeReg); |
1069 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::CMOV64rr), DestReg: FinalReg) |
1070 | .addReg(RegNo: TestReg) |
1071 | .addReg(RegNo: ZeroReg) |
1072 | .addImm(Val: X86::COND_B); |
1073 | |
1074 | // FinalReg now holds final stack pointer value, or zero if |
1075 | // allocation would overflow. Compare against the current stack |
1076 | // limit from the thread environment block. Note this limit is the |
1077 | // lowest touched page on the stack, not the point at which the OS |
1078 | // will cause an overflow exception, so this is just an optimization |
1079 | // to avoid unnecessarily touching pages that are below the current |
1080 | // SP but already committed to the stack by the OS. |
1081 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64rm), DestReg: LimitReg) |
1082 | .addReg(RegNo: 0) |
1083 | .addImm(Val: 1) |
1084 | .addReg(RegNo: 0) |
1085 | .addImm(Val: ThreadEnvironmentStackLimit) |
1086 | .addReg(RegNo: X86::GS); |
1087 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::CMP64rr)).addReg(RegNo: FinalReg).addReg(RegNo: LimitReg); |
1088 | // Jump if the desired stack pointer is at or above the stack limit. |
1089 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1)) |
1090 | .addMBB(MBB: ContinueMBB) |
1091 | .addImm(Val: X86::COND_AE); |
1092 | |
1093 | // Add code to roundMBB to round the final stack pointer to a page boundary. |
1094 | if (InProlog) |
1095 | RoundMBB->addLiveIn(PhysReg: FinalReg); |
1096 | BuildMI(BB: RoundMBB, MIMD: DL, MCID: TII.get(Opcode: X86::AND64ri32), DestReg: RoundedReg) |
1097 | .addReg(RegNo: FinalReg) |
1098 | .addImm(Val: PageMask); |
1099 | BuildMI(BB: RoundMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JMP_1)).addMBB(MBB: LoopMBB); |
1100 | |
1101 | // LimitReg now holds the current stack limit, RoundedReg page-rounded |
1102 | // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page |
1103 | // and probe until we reach RoundedReg. |
1104 | if (!InProlog) { |
1105 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII.get(Opcode: X86::PHI), DestReg: JoinReg) |
1106 | .addReg(RegNo: LimitReg) |
1107 | .addMBB(MBB: RoundMBB) |
1108 | .addReg(RegNo: ProbeReg) |
1109 | .addMBB(MBB: LoopMBB); |
1110 | } |
1111 | |
1112 | if (InProlog) |
1113 | LoopMBB->addLiveIn(PhysReg: JoinReg); |
1114 | addRegOffset(MIB: BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII.get(Opcode: X86::LEA64r), DestReg: ProbeReg), Reg: JoinReg, |
1115 | isKill: false, Offset: -PageSize); |
1116 | |
1117 | // Probe by storing a byte onto the stack. |
1118 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII.get(Opcode: X86::MOV8mi)) |
1119 | .addReg(RegNo: ProbeReg) |
1120 | .addImm(Val: 1) |
1121 | .addReg(RegNo: 0) |
1122 | .addImm(Val: 0) |
1123 | .addReg(RegNo: 0) |
1124 | .addImm(Val: 0); |
1125 | |
1126 | if (InProlog) |
1127 | LoopMBB->addLiveIn(PhysReg: RoundedReg); |
1128 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII.get(Opcode: X86::CMP64rr)) |
1129 | .addReg(RegNo: RoundedReg) |
1130 | .addReg(RegNo: ProbeReg); |
1131 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1)) |
1132 | .addMBB(MBB: LoopMBB) |
1133 | .addImm(Val: X86::COND_NE); |
1134 | |
1135 | MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI(); |
1136 | |
1137 | // If in prolog, restore RDX and RCX. |
1138 | if (InProlog) { |
1139 | if (RCXShadowSlot) // It means we spilled RCX in the prologue. |
1140 | addRegOffset(MIB: BuildMI(BB&: *ContinueMBB, I: ContinueMBBI, MIMD: DL, |
1141 | MCID: TII.get(Opcode: X86::MOV64rm), DestReg: X86::RCX), |
1142 | Reg: X86::RSP, isKill: false, Offset: RCXShadowSlot); |
1143 | if (RDXShadowSlot) // It means we spilled RDX in the prologue. |
1144 | addRegOffset(MIB: BuildMI(BB&: *ContinueMBB, I: ContinueMBBI, MIMD: DL, |
1145 | MCID: TII.get(Opcode: X86::MOV64rm), DestReg: X86::RDX), |
1146 | Reg: X86::RSP, isKill: false, Offset: RDXShadowSlot); |
1147 | } |
1148 | |
1149 | // Now that the probing is done, add code to continueMBB to update |
1150 | // the stack pointer for real. |
1151 | BuildMI(BB&: *ContinueMBB, I: ContinueMBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SUB64rr), DestReg: X86::RSP) |
1152 | .addReg(RegNo: X86::RSP) |
1153 | .addReg(RegNo: SizeReg); |
1154 | |
1155 | // Add the control flow edges we need. |
1156 | MBB.addSuccessor(Succ: ContinueMBB); |
1157 | MBB.addSuccessor(Succ: RoundMBB); |
1158 | RoundMBB->addSuccessor(Succ: LoopMBB); |
1159 | LoopMBB->addSuccessor(Succ: ContinueMBB); |
1160 | LoopMBB->addSuccessor(Succ: LoopMBB); |
1161 | |
1162 | if (InProlog) { |
1163 | LivePhysRegs LiveRegs; |
1164 | computeAndAddLiveIns(LiveRegs, MBB&: *ContinueMBB); |
1165 | } |
1166 | |
1167 | // Mark all the instructions added to the prolog as frame setup. |
1168 | if (InProlog) { |
1169 | for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) { |
1170 | BeforeMBBI->setFlag(MachineInstr::FrameSetup); |
1171 | } |
1172 | for (MachineInstr &MI : *RoundMBB) { |
1173 | MI.setFlag(MachineInstr::FrameSetup); |
1174 | } |
1175 | for (MachineInstr &MI : *LoopMBB) { |
1176 | MI.setFlag(MachineInstr::FrameSetup); |
1177 | } |
1178 | for (MachineInstr &MI : |
1179 | llvm::make_range(x: ContinueMBB->begin(), y: ContinueMBBI)) { |
1180 | MI.setFlag(MachineInstr::FrameSetup); |
1181 | } |
1182 | } |
1183 | } |
1184 | |
1185 | void X86FrameLowering::emitStackProbeCall( |
1186 | MachineFunction &MF, MachineBasicBlock &MBB, |
1187 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, |
1188 | std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const { |
1189 | bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; |
1190 | |
1191 | // FIXME: Add indirect thunk support and remove this. |
1192 | if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls()) |
1193 | report_fatal_error(reason: "Emitting stack probe calls on 64-bit with the large " |
1194 | "code model and indirect thunks not yet implemented." ); |
1195 | |
1196 | assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) != |
1197 | MachineBasicBlock::LQR_Live && |
1198 | "Stack probe calls will clobber live EFLAGS." ); |
1199 | |
1200 | unsigned CallOp; |
1201 | if (Is64Bit) |
1202 | CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32; |
1203 | else |
1204 | CallOp = X86::CALLpcrel32; |
1205 | |
1206 | StringRef Symbol = STI.getTargetLowering()->getStackProbeSymbolName(MF); |
1207 | |
1208 | MachineInstrBuilder CI; |
1209 | MachineBasicBlock::iterator ExpansionMBBI = std::prev(x: MBBI); |
1210 | |
1211 | // All current stack probes take AX and SP as input, clobber flags, and |
1212 | // preserve all registers. x86_64 probes leave RSP unmodified. |
1213 | if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { |
1214 | // For the large code model, we have to call through a register. Use R11, |
1215 | // as it is scratch in all supported calling conventions. |
1216 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64ri), DestReg: X86::R11) |
1217 | .addExternalSymbol(FnName: MF.createExternalSymbolName(Name: Symbol)); |
1218 | CI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: CallOp)).addReg(RegNo: X86::R11); |
1219 | } else { |
1220 | CI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: CallOp)) |
1221 | .addExternalSymbol(FnName: MF.createExternalSymbolName(Name: Symbol)); |
1222 | } |
1223 | |
1224 | unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX; |
1225 | unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP; |
1226 | CI.addReg(RegNo: AX, flags: RegState::Implicit) |
1227 | .addReg(RegNo: SP, flags: RegState::Implicit) |
1228 | .addReg(RegNo: AX, flags: RegState::Define | RegState::Implicit) |
1229 | .addReg(RegNo: SP, flags: RegState::Define | RegState::Implicit) |
1230 | .addReg(RegNo: X86::EFLAGS, flags: RegState::Define | RegState::Implicit); |
1231 | |
1232 | MachineInstr *ModInst = CI; |
1233 | if (STI.isTargetWin64() || !STI.isOSWindows()) { |
1234 | // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves. |
1235 | // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp |
1236 | // themselves. They also does not clobber %rax so we can reuse it when |
1237 | // adjusting %rsp. |
1238 | // All other platforms do not specify a particular ABI for the stack probe |
1239 | // function, so we arbitrarily define it to not adjust %esp/%rsp itself. |
1240 | ModInst = |
1241 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: getSUBrrOpcode(IsLP64: Uses64BitFramePtr)), DestReg: SP) |
1242 | .addReg(RegNo: SP) |
1243 | .addReg(RegNo: AX); |
1244 | } |
1245 | |
1246 | // DebugInfo variable locations -- if there's an instruction number for the |
1247 | // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that |
1248 | // modifies SP. |
1249 | if (InstrNum) { |
1250 | if (STI.isTargetWin64() || !STI.isOSWindows()) { |
1251 | // Label destination operand of the subtract. |
1252 | MF.makeDebugValueSubstitution(*InstrNum, |
1253 | {ModInst->getDebugInstrNum(), 0}); |
1254 | } else { |
1255 | // Label the call. The operand number is the penultimate operand, zero |
1256 | // based. |
1257 | unsigned SPDefOperand = ModInst->getNumOperands() - 2; |
1258 | MF.makeDebugValueSubstitution( |
1259 | *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand}); |
1260 | } |
1261 | } |
1262 | |
1263 | if (InProlog) { |
1264 | // Apply the frame setup flag to all inserted instrs. |
1265 | for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI) |
1266 | ExpansionMBBI->setFlag(MachineInstr::FrameSetup); |
1267 | } |
1268 | } |
1269 | |
1270 | static unsigned calculateSetFPREG(uint64_t SPAdjust) { |
1271 | // Win64 ABI has a less restrictive limitation of 240; 128 works equally well |
1272 | // and might require smaller successive adjustments. |
1273 | const uint64_t Win64MaxSEHOffset = 128; |
1274 | uint64_t SEHFrameOffset = std::min(a: SPAdjust, b: Win64MaxSEHOffset); |
1275 | // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode. |
1276 | return SEHFrameOffset & -16; |
1277 | } |
1278 | |
1279 | // If we're forcing a stack realignment we can't rely on just the frame |
1280 | // info, we need to know the ABI stack alignment as well in case we |
1281 | // have a call out. Otherwise just make sure we have some alignment - we'll |
1282 | // go with the minimum SlotSize. |
1283 | uint64_t |
1284 | X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const { |
1285 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
1286 | Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment. |
1287 | Align StackAlign = getStackAlign(); |
1288 | bool HasRealign = MF.getFunction().hasFnAttribute(Kind: "stackrealign" ); |
1289 | if (HasRealign) { |
1290 | if (MFI.hasCalls()) |
1291 | MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; |
1292 | else if (MaxAlign < SlotSize) |
1293 | MaxAlign = Align(SlotSize); |
1294 | } |
1295 | |
1296 | if (!Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR) { |
1297 | if (HasRealign) |
1298 | MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16); |
1299 | else |
1300 | MaxAlign = Align(16); |
1301 | } |
1302 | return MaxAlign.value(); |
1303 | } |
1304 | |
1305 | void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, |
1306 | MachineBasicBlock::iterator MBBI, |
1307 | const DebugLoc &DL, Register Reg, |
1308 | uint64_t MaxAlign) const { |
1309 | uint64_t Val = -MaxAlign; |
1310 | unsigned AndOp = getANDriOpcode(IsLP64: Uses64BitFramePtr, Imm: Val); |
1311 | |
1312 | MachineFunction &MF = *MBB.getParent(); |
1313 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
1314 | const X86TargetLowering &TLI = *STI.getTargetLowering(); |
1315 | const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); |
1316 | const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF); |
1317 | |
1318 | // We want to make sure that (in worst case) less than StackProbeSize bytes |
1319 | // are not probed after the AND. This assumption is used in |
1320 | // emitStackProbeInlineGeneric. |
1321 | if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) { |
1322 | { |
1323 | NumFrameLoopProbe++; |
1324 | MachineBasicBlock *entryMBB = |
1325 | MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
1326 | MachineBasicBlock *headMBB = |
1327 | MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
1328 | MachineBasicBlock *bodyMBB = |
1329 | MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
1330 | MachineBasicBlock * = |
1331 | MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock()); |
1332 | |
1333 | MachineFunction::iterator MBBIter = MBB.getIterator(); |
1334 | MF.insert(MBBI: MBBIter, MBB: entryMBB); |
1335 | MF.insert(MBBI: MBBIter, MBB: headMBB); |
1336 | MF.insert(MBBI: MBBIter, MBB: bodyMBB); |
1337 | MF.insert(MBBI: MBBIter, MBB: footMBB); |
1338 | const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; |
1339 | Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 |
1340 | : Is64Bit ? X86::R11D |
1341 | : X86::EAX; |
1342 | |
1343 | // Setup entry block |
1344 | { |
1345 | |
1346 | entryMBB->splice(Where: entryMBB->end(), Other: &MBB, From: MBB.begin(), To: MBBI); |
1347 | BuildMI(BB: entryMBB, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: FinalStackProbed) |
1348 | .addReg(RegNo: StackPtr) |
1349 | .setMIFlag(MachineInstr::FrameSetup); |
1350 | MachineInstr *MI = |
1351 | BuildMI(BB: entryMBB, MIMD: DL, MCID: TII.get(Opcode: AndOp), DestReg: FinalStackProbed) |
1352 | .addReg(RegNo: FinalStackProbed) |
1353 | .addImm(Val) |
1354 | .setMIFlag(MachineInstr::FrameSetup); |
1355 | |
1356 | // The EFLAGS implicit def is dead. |
1357 | MI->getOperand(i: 3).setIsDead(); |
1358 | |
1359 | BuildMI(BB: entryMBB, MIMD: DL, |
1360 | MCID: TII.get(Opcode: Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) |
1361 | .addReg(RegNo: FinalStackProbed) |
1362 | .addReg(RegNo: StackPtr) |
1363 | .setMIFlag(MachineInstr::FrameSetup); |
1364 | BuildMI(BB: entryMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1)) |
1365 | .addMBB(MBB: &MBB) |
1366 | .addImm(Val: X86::COND_E) |
1367 | .setMIFlag(MachineInstr::FrameSetup); |
1368 | entryMBB->addSuccessor(Succ: headMBB); |
1369 | entryMBB->addSuccessor(Succ: &MBB); |
1370 | } |
1371 | |
1372 | // Loop entry block |
1373 | |
1374 | { |
1375 | const unsigned SUBOpc = getSUBriOpcode(IsLP64: Uses64BitFramePtr); |
1376 | BuildMI(BB: headMBB, MIMD: DL, MCID: TII.get(Opcode: SUBOpc), DestReg: StackPtr) |
1377 | .addReg(RegNo: StackPtr) |
1378 | .addImm(Val: StackProbeSize) |
1379 | .setMIFlag(MachineInstr::FrameSetup); |
1380 | |
1381 | BuildMI(BB: headMBB, MIMD: DL, |
1382 | MCID: TII.get(Opcode: Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) |
1383 | .addReg(RegNo: StackPtr) |
1384 | .addReg(RegNo: FinalStackProbed) |
1385 | .setMIFlag(MachineInstr::FrameSetup); |
1386 | |
1387 | // jump to the footer if StackPtr < FinalStackProbed |
1388 | BuildMI(BB: headMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1)) |
1389 | .addMBB(MBB: footMBB) |
1390 | .addImm(Val: X86::COND_B) |
1391 | .setMIFlag(MachineInstr::FrameSetup); |
1392 | |
1393 | headMBB->addSuccessor(Succ: bodyMBB); |
1394 | headMBB->addSuccessor(Succ: footMBB); |
1395 | } |
1396 | |
1397 | // setup loop body |
1398 | { |
1399 | addRegOffset(MIB: BuildMI(BB: bodyMBB, MIMD: DL, MCID: TII.get(Opcode: MovMIOpc)) |
1400 | .setMIFlag(MachineInstr::FrameSetup), |
1401 | Reg: StackPtr, isKill: false, Offset: 0) |
1402 | .addImm(Val: 0) |
1403 | .setMIFlag(MachineInstr::FrameSetup); |
1404 | |
1405 | const unsigned SUBOpc = getSUBriOpcode(IsLP64: Uses64BitFramePtr); |
1406 | BuildMI(BB: bodyMBB, MIMD: DL, MCID: TII.get(Opcode: SUBOpc), DestReg: StackPtr) |
1407 | .addReg(RegNo: StackPtr) |
1408 | .addImm(Val: StackProbeSize) |
1409 | .setMIFlag(MachineInstr::FrameSetup); |
1410 | |
1411 | // cmp with stack pointer bound |
1412 | BuildMI(BB: bodyMBB, MIMD: DL, |
1413 | MCID: TII.get(Opcode: Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) |
1414 | .addReg(RegNo: FinalStackProbed) |
1415 | .addReg(RegNo: StackPtr) |
1416 | .setMIFlag(MachineInstr::FrameSetup); |
1417 | |
1418 | // jump back while FinalStackProbed < StackPtr |
1419 | BuildMI(BB: bodyMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1)) |
1420 | .addMBB(MBB: bodyMBB) |
1421 | .addImm(Val: X86::COND_B) |
1422 | .setMIFlag(MachineInstr::FrameSetup); |
1423 | bodyMBB->addSuccessor(Succ: bodyMBB); |
1424 | bodyMBB->addSuccessor(Succ: footMBB); |
1425 | } |
1426 | |
1427 | // setup loop footer |
1428 | { |
1429 | BuildMI(BB: footMBB, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: StackPtr) |
1430 | .addReg(RegNo: FinalStackProbed) |
1431 | .setMIFlag(MachineInstr::FrameSetup); |
1432 | addRegOffset(MIB: BuildMI(BB: footMBB, MIMD: DL, MCID: TII.get(Opcode: MovMIOpc)) |
1433 | .setMIFlag(MachineInstr::FrameSetup), |
1434 | Reg: StackPtr, isKill: false, Offset: 0) |
1435 | .addImm(Val: 0) |
1436 | .setMIFlag(MachineInstr::FrameSetup); |
1437 | footMBB->addSuccessor(Succ: &MBB); |
1438 | } |
1439 | |
1440 | fullyRecomputeLiveIns(MBBs: {footMBB, bodyMBB, headMBB, &MBB}); |
1441 | } |
1442 | } else { |
1443 | MachineInstr *MI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: AndOp), DestReg: Reg) |
1444 | .addReg(RegNo: Reg) |
1445 | .addImm(Val) |
1446 | .setMIFlag(MachineInstr::FrameSetup); |
1447 | |
1448 | // The EFLAGS implicit def is dead. |
1449 | MI->getOperand(i: 3).setIsDead(); |
1450 | } |
1451 | } |
1452 | |
1453 | bool X86FrameLowering::has128ByteRedZone(const MachineFunction &MF) const { |
1454 | // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be |
1455 | // clobbered by any interrupt handler. |
1456 | assert(&STI == &MF.getSubtarget<X86Subtarget>() && |
1457 | "MF used frame lowering for wrong subtarget" ); |
1458 | const Function &Fn = MF.getFunction(); |
1459 | const bool IsWin64CC = STI.isCallingConvWin64(CC: Fn.getCallingConv()); |
1460 | return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Kind: Attribute::NoRedZone); |
1461 | } |
1462 | |
1463 | /// Return true if we need to use the restricted Windows x64 prologue and |
1464 | /// epilogue code patterns that can be described with WinCFI (.seh_* |
1465 | /// directives). |
1466 | bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const { |
1467 | return MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); |
1468 | } |
1469 | |
1470 | bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const { |
1471 | return !isWin64Prologue(MF) && MF.needsFrameMoves(); |
1472 | } |
1473 | |
1474 | /// Return true if an opcode is part of the REP group of instructions |
1475 | static bool isOpcodeRep(unsigned Opcode) { |
1476 | switch (Opcode) { |
1477 | case X86::REPNE_PREFIX: |
1478 | case X86::REP_MOVSB_32: |
1479 | case X86::REP_MOVSB_64: |
1480 | case X86::REP_MOVSD_32: |
1481 | case X86::REP_MOVSD_64: |
1482 | case X86::REP_MOVSQ_32: |
1483 | case X86::REP_MOVSQ_64: |
1484 | case X86::REP_MOVSW_32: |
1485 | case X86::REP_MOVSW_64: |
1486 | case X86::REP_PREFIX: |
1487 | case X86::REP_STOSB_32: |
1488 | case X86::REP_STOSB_64: |
1489 | case X86::REP_STOSD_32: |
1490 | case X86::REP_STOSD_64: |
1491 | case X86::REP_STOSQ_32: |
1492 | case X86::REP_STOSQ_64: |
1493 | case X86::REP_STOSW_32: |
1494 | case X86::REP_STOSW_64: |
1495 | return true; |
1496 | default: |
1497 | break; |
1498 | } |
1499 | return false; |
1500 | } |
1501 | |
1502 | /// emitPrologue - Push callee-saved registers onto the stack, which |
1503 | /// automatically adjust the stack pointer. Adjust the stack pointer to allocate |
1504 | /// space for local variables. Also emit labels used by the exception handler to |
1505 | /// generate the exception handling frames. |
1506 | |
1507 | /* |
1508 | Here's a gist of what gets emitted: |
1509 | |
1510 | ; Establish frame pointer, if needed |
1511 | [if needs FP] |
1512 | push %rbp |
1513 | .cfi_def_cfa_offset 16 |
1514 | .cfi_offset %rbp, -16 |
1515 | .seh_pushreg %rpb |
1516 | mov %rsp, %rbp |
1517 | .cfi_def_cfa_register %rbp |
1518 | |
1519 | ; Spill general-purpose registers |
1520 | [for all callee-saved GPRs] |
1521 | pushq %<reg> |
1522 | [if not needs FP] |
1523 | .cfi_def_cfa_offset (offset from RETADDR) |
1524 | .seh_pushreg %<reg> |
1525 | |
1526 | ; If the required stack alignment > default stack alignment |
1527 | ; rsp needs to be re-aligned. This creates a "re-alignment gap" |
1528 | ; of unknown size in the stack frame. |
1529 | [if stack needs re-alignment] |
1530 | and $MASK, %rsp |
1531 | |
1532 | ; Allocate space for locals |
1533 | [if target is Windows and allocated space > 4096 bytes] |
1534 | ; Windows needs special care for allocations larger |
1535 | ; than one page. |
1536 | mov $NNN, %rax |
1537 | call ___chkstk_ms/___chkstk |
1538 | sub %rax, %rsp |
1539 | [else] |
1540 | sub $NNN, %rsp |
1541 | |
1542 | [if needs FP] |
1543 | .seh_stackalloc (size of XMM spill slots) |
1544 | .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots |
1545 | [else] |
1546 | .seh_stackalloc NNN |
1547 | |
1548 | ; Spill XMMs |
1549 | ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved, |
1550 | ; they may get spilled on any platform, if the current function |
1551 | ; calls @llvm.eh.unwind.init |
1552 | [if needs FP] |
1553 | [for all callee-saved XMM registers] |
1554 | movaps %<xmm reg>, -MMM(%rbp) |
1555 | [for all callee-saved XMM registers] |
1556 | .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset) |
1557 | ; i.e. the offset relative to (%rbp - SEHFrameOffset) |
1558 | [else] |
1559 | [for all callee-saved XMM registers] |
1560 | movaps %<xmm reg>, KKK(%rsp) |
1561 | [for all callee-saved XMM registers] |
1562 | .seh_savexmm %<xmm reg>, KKK |
1563 | |
1564 | .seh_endprologue |
1565 | |
1566 | [if needs base pointer] |
1567 | mov %rsp, %rbx |
1568 | [if needs to restore base pointer] |
1569 | mov %rsp, -MMM(%rbp) |
1570 | |
1571 | ; Emit CFI info |
1572 | [if needs FP] |
1573 | [for all callee-saved registers] |
1574 | .cfi_offset %<reg>, (offset from %rbp) |
1575 | [else] |
1576 | .cfi_def_cfa_offset (offset from RETADDR) |
1577 | [for all callee-saved registers] |
1578 | .cfi_offset %<reg>, (offset from %rsp) |
1579 | |
1580 | Notes: |
1581 | - .seh directives are emitted only for Windows 64 ABI |
1582 | - .cv_fpo directives are emitted on win32 when emitting CodeView |
1583 | - .cfi directives are emitted for all other ABIs |
1584 | - for 32-bit code, substitute %e?? registers for %r?? |
1585 | */ |
1586 | |
1587 | void X86FrameLowering::emitPrologue(MachineFunction &MF, |
1588 | MachineBasicBlock &MBB) const { |
1589 | assert(&STI == &MF.getSubtarget<X86Subtarget>() && |
1590 | "MF used frame lowering for wrong subtarget" ); |
1591 | MachineBasicBlock::iterator MBBI = MBB.begin(); |
1592 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1593 | const Function &Fn = MF.getFunction(); |
1594 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
1595 | uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment. |
1596 | uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate. |
1597 | bool IsFunclet = MBB.isEHFuncletEntry(); |
1598 | EHPersonality Personality = EHPersonality::Unknown; |
1599 | if (Fn.hasPersonalityFn()) |
1600 | Personality = classifyEHPersonality(Pers: Fn.getPersonalityFn()); |
1601 | bool FnHasClrFunclet = |
1602 | MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR; |
1603 | bool IsClrFunclet = IsFunclet && FnHasClrFunclet; |
1604 | bool HasFP = hasFP(MF); |
1605 | bool IsWin64Prologue = isWin64Prologue(MF); |
1606 | bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry(); |
1607 | // FIXME: Emit FPO data for EH funclets. |
1608 | bool NeedsWinFPO = !IsFunclet && STI.isTargetWin32() && |
1609 | MF.getFunction().getParent()->getCodeViewFlag(); |
1610 | bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO; |
1611 | bool NeedsDwarfCFI = needsDwarfCFI(MF); |
1612 | Register FramePtr = TRI->getFrameRegister(MF); |
1613 | const Register MachineFramePtr = |
1614 | STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(Reg: FramePtr, Size: 64)) |
1615 | : FramePtr; |
1616 | Register BasePtr = TRI->getBaseRegister(); |
1617 | bool HasWinCFI = false; |
1618 | |
1619 | // Debug location must be unknown since the first debug location is used |
1620 | // to determine the end of the prologue. |
1621 | DebugLoc DL; |
1622 | Register ArgBaseReg; |
1623 | |
1624 | // Emit extra prolog for argument stack slot reference. |
1625 | if (auto *MI = X86FI->getStackPtrSaveMI()) { |
1626 | // MI is lea instruction that created in X86ArgumentStackSlotPass. |
1627 | // Creat extra prolog for stack realignment. |
1628 | ArgBaseReg = MI->getOperand(i: 0).getReg(); |
1629 | // leal 4(%esp), %basereg |
1630 | // .cfi_def_cfa %basereg, 0 |
1631 | // andl $-128, %esp |
1632 | // pushl -4(%basereg) |
1633 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Is64Bit ? X86::LEA64r : X86::LEA32r), |
1634 | DestReg: ArgBaseReg) |
1635 | .addUse(RegNo: StackPtr) |
1636 | .addImm(Val: 1) |
1637 | .addUse(RegNo: X86::NoRegister) |
1638 | .addImm(Val: SlotSize) |
1639 | .addUse(RegNo: X86::NoRegister) |
1640 | .setMIFlag(MachineInstr::FrameSetup); |
1641 | if (NeedsDwarfCFI) { |
1642 | // .cfi_def_cfa %basereg, 0 |
1643 | unsigned DwarfStackPtr = TRI->getDwarfRegNum(RegNum: ArgBaseReg, isEH: true); |
1644 | BuildCFI(MBB, MBBI, DL, |
1645 | CFIInst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: DwarfStackPtr, Offset: 0), |
1646 | Flag: MachineInstr::FrameSetup); |
1647 | } |
1648 | BuildStackAlignAND(MBB, MBBI, DL, Reg: StackPtr, MaxAlign); |
1649 | int64_t Offset = -(int64_t)SlotSize; |
1650 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm)) |
1651 | .addReg(RegNo: ArgBaseReg) |
1652 | .addImm(Val: 1) |
1653 | .addReg(RegNo: X86::NoRegister) |
1654 | .addImm(Val: Offset) |
1655 | .addReg(RegNo: X86::NoRegister) |
1656 | .setMIFlag(MachineInstr::FrameSetup); |
1657 | } |
1658 | |
1659 | // Space reserved for stack-based arguments when making a (ABI-guaranteed) |
1660 | // tail call. |
1661 | unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta(); |
1662 | if (TailCallArgReserveSize && IsWin64Prologue) |
1663 | report_fatal_error(reason: "Can't handle guaranteed tail call under win64 yet" ); |
1664 | |
1665 | const bool EmitStackProbeCall = |
1666 | STI.getTargetLowering()->hasStackProbeSymbol(MF); |
1667 | unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF); |
1668 | |
1669 | if (HasFP && X86FI->hasSwiftAsyncContext()) { |
1670 | switch (MF.getTarget().Options.SwiftAsyncFramePointer) { |
1671 | case SwiftAsyncFramePointerMode::DeploymentBased: |
1672 | if (STI.swiftAsyncContextIsDynamicallySet()) { |
1673 | // The special symbol below is absolute and has a *value* suitable to be |
1674 | // combined with the frame pointer directly. |
1675 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::OR64rm), DestReg: MachineFramePtr) |
1676 | .addUse(RegNo: MachineFramePtr) |
1677 | .addUse(RegNo: X86::RIP) |
1678 | .addImm(Val: 1) |
1679 | .addUse(RegNo: X86::NoRegister) |
1680 | .addExternalSymbol(FnName: "swift_async_extendedFramePointerFlags" , |
1681 | TargetFlags: X86II::MO_GOTPCREL) |
1682 | .addUse(RegNo: X86::NoRegister); |
1683 | break; |
1684 | } |
1685 | [[fallthrough]]; |
1686 | |
1687 | case SwiftAsyncFramePointerMode::Always: |
1688 | assert( |
1689 | !IsWin64Prologue && |
1690 | "win64 prologue does not set the bit 60 in the saved frame pointer" ); |
1691 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::BTS64ri8), DestReg: MachineFramePtr) |
1692 | .addUse(RegNo: MachineFramePtr) |
1693 | .addImm(Val: 60) |
1694 | .setMIFlag(MachineInstr::FrameSetup); |
1695 | break; |
1696 | |
1697 | case SwiftAsyncFramePointerMode::Never: |
1698 | break; |
1699 | } |
1700 | } |
1701 | |
1702 | // Re-align the stack on 64-bit if the x86-interrupt calling convention is |
1703 | // used and an error code was pushed, since the x86-64 ABI requires a 16-byte |
1704 | // stack alignment. |
1705 | if (Fn.getCallingConv() == CallingConv::X86_INTR && Is64Bit && |
1706 | Fn.arg_size() == 2) { |
1707 | StackSize += 8; |
1708 | MFI.setStackSize(StackSize); |
1709 | |
1710 | // Update the stack pointer by pushing a register. This is the instruction |
1711 | // emitted that would be end up being emitted by a call to `emitSPUpdate`. |
1712 | // Hard-coding the update to a push avoids emitting a second |
1713 | // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack |
1714 | // probing isn't needed anyways for an 8-byte update. |
1715 | // Pushing a register leaves us in a similar situation to a regular |
1716 | // function call where we know that the address at (rsp-8) is writeable. |
1717 | // That way we avoid any off-by-ones with stack probing for additional |
1718 | // stack pointer updates later on. |
1719 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH64r)) |
1720 | .addReg(RegNo: X86::RAX, flags: RegState::Undef) |
1721 | .setMIFlag(MachineInstr::FrameSetup); |
1722 | } |
1723 | |
1724 | // If this is x86-64 and the Red Zone is not disabled, if we are a leaf |
1725 | // function, and use up to 128 bytes of stack space, don't have a frame |
1726 | // pointer, calls, or dynamic alloca then we do not need to adjust the |
1727 | // stack pointer (we fit in the Red Zone). We also check that we don't |
1728 | // push and pop from the stack. |
1729 | if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) && |
1730 | !MFI.hasVarSizedObjects() && // No dynamic alloca. |
1731 | !MFI.adjustsStack() && // No calls. |
1732 | !EmitStackProbeCall && // No stack probes. |
1733 | !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop. |
1734 | !MF.shouldSplitStack()) { // Regular stack |
1735 | uint64_t MinSize = |
1736 | X86FI->getCalleeSavedFrameSize() - X86FI->getTCReturnAddrDelta(); |
1737 | if (HasFP) |
1738 | MinSize += SlotSize; |
1739 | X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0); |
1740 | StackSize = std::max(a: MinSize, b: StackSize > 128 ? StackSize - 128 : 0); |
1741 | MFI.setStackSize(StackSize); |
1742 | } |
1743 | |
1744 | // Insert stack pointer adjustment for later moving of return addr. Only |
1745 | // applies to tail call optimized functions where the callee argument stack |
1746 | // size is bigger than the callers. |
1747 | if (TailCallArgReserveSize != 0) { |
1748 | BuildStackAdjustment(MBB, MBBI, DL, Offset: -(int)TailCallArgReserveSize, |
1749 | /*InEpilogue=*/false) |
1750 | .setMIFlag(MachineInstr::FrameSetup); |
1751 | } |
1752 | |
1753 | // Mapping for machine moves: |
1754 | // |
1755 | // DST: VirtualFP AND |
1756 | // SRC: VirtualFP => DW_CFA_def_cfa_offset |
1757 | // ELSE => DW_CFA_def_cfa |
1758 | // |
1759 | // SRC: VirtualFP AND |
1760 | // DST: Register => DW_CFA_def_cfa_register |
1761 | // |
1762 | // ELSE |
1763 | // OFFSET < 0 => DW_CFA_offset_extended_sf |
1764 | // REG < 64 => DW_CFA_offset + Reg |
1765 | // ELSE => DW_CFA_offset_extended |
1766 | |
1767 | uint64_t NumBytes = 0; |
1768 | int stackGrowth = -SlotSize; |
1769 | |
1770 | // Find the funclet establisher parameter |
1771 | MCRegister Establisher; |
1772 | if (IsClrFunclet) |
1773 | Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX; |
1774 | else if (IsFunclet) |
1775 | Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX; |
1776 | |
1777 | if (IsWin64Prologue && IsFunclet && !IsClrFunclet) { |
1778 | // Immediately spill establisher into the home slot. |
1779 | // The runtime cares about this. |
1780 | // MOV64mr %rdx, 16(%rsp) |
1781 | unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; |
1782 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: MOVmr)), Reg: StackPtr, isKill: true, Offset: 16) |
1783 | .addReg(RegNo: Establisher) |
1784 | .setMIFlag(MachineInstr::FrameSetup); |
1785 | MBB.addLiveIn(PhysReg: Establisher); |
1786 | } |
1787 | |
1788 | if (HasFP) { |
1789 | assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved" ); |
1790 | |
1791 | // Calculate required stack adjustment. |
1792 | uint64_t FrameSize = StackSize - SlotSize; |
1793 | NumBytes = |
1794 | FrameSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize); |
1795 | |
1796 | // Callee-saved registers are pushed on stack before the stack is realigned. |
1797 | if (TRI->hasStackRealignment(MF) && !IsWin64Prologue) |
1798 | NumBytes = alignTo(Value: NumBytes, Align: MaxAlign); |
1799 | |
1800 | // Save EBP/RBP into the appropriate stack slot. |
1801 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, |
1802 | MCID: TII.get(Opcode: getPUSHOpcode(ST: MF.getSubtarget<X86Subtarget>()))) |
1803 | .addReg(RegNo: MachineFramePtr, flags: RegState::Kill) |
1804 | .setMIFlag(MachineInstr::FrameSetup); |
1805 | |
1806 | if (NeedsDwarfCFI && !ArgBaseReg.isValid()) { |
1807 | // Mark the place where EBP/RBP was saved. |
1808 | // Define the current CFA rule to use the provided offset. |
1809 | assert(StackSize); |
1810 | BuildCFI(MBB, MBBI, DL, |
1811 | CFIInst: MCCFIInstruction::cfiDefCfaOffset( |
1812 | L: nullptr, Offset: -2 * stackGrowth + (int)TailCallArgReserveSize), |
1813 | Flag: MachineInstr::FrameSetup); |
1814 | |
1815 | // Change the rule for the FramePtr to be an "offset" rule. |
1816 | unsigned DwarfFramePtr = TRI->getDwarfRegNum(RegNum: MachineFramePtr, isEH: true); |
1817 | BuildCFI(MBB, MBBI, DL, |
1818 | CFIInst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfFramePtr, |
1819 | Offset: 2 * stackGrowth - |
1820 | (int)TailCallArgReserveSize), |
1821 | Flag: MachineInstr::FrameSetup); |
1822 | } |
1823 | |
1824 | if (NeedsWinCFI) { |
1825 | HasWinCFI = true; |
1826 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_PushReg)) |
1827 | .addImm(Val: FramePtr) |
1828 | .setMIFlag(MachineInstr::FrameSetup); |
1829 | } |
1830 | |
1831 | if (!IsFunclet) { |
1832 | if (X86FI->hasSwiftAsyncContext()) { |
1833 | assert(!IsWin64Prologue && |
1834 | "win64 prologue does not store async context right below rbp" ); |
1835 | const auto &Attrs = MF.getFunction().getAttributes(); |
1836 | |
1837 | // Before we update the live frame pointer we have to ensure there's a |
1838 | // valid (or null) asynchronous context in its slot just before FP in |
1839 | // the frame record, so store it now. |
1840 | if (Attrs.hasAttrSomewhere(Kind: Attribute::SwiftAsync)) { |
1841 | // We have an initial context in r14, store it just before the frame |
1842 | // pointer. |
1843 | MBB.addLiveIn(PhysReg: X86::R14); |
1844 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH64r)) |
1845 | .addReg(RegNo: X86::R14) |
1846 | .setMIFlag(MachineInstr::FrameSetup); |
1847 | } else { |
1848 | // No initial context, store null so that there's no pointer that |
1849 | // could be misused. |
1850 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH64i32)) |
1851 | .addImm(Val: 0) |
1852 | .setMIFlag(MachineInstr::FrameSetup); |
1853 | } |
1854 | |
1855 | if (NeedsWinCFI) { |
1856 | HasWinCFI = true; |
1857 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_PushReg)) |
1858 | .addImm(Val: X86::R14) |
1859 | .setMIFlag(MachineInstr::FrameSetup); |
1860 | } |
1861 | |
1862 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::LEA64r), DestReg: FramePtr) |
1863 | .addUse(RegNo: X86::RSP) |
1864 | .addImm(Val: 1) |
1865 | .addUse(RegNo: X86::NoRegister) |
1866 | .addImm(Val: 8) |
1867 | .addUse(RegNo: X86::NoRegister) |
1868 | .setMIFlag(MachineInstr::FrameSetup); |
1869 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SUB64ri32), DestReg: X86::RSP) |
1870 | .addUse(RegNo: X86::RSP) |
1871 | .addImm(Val: 8) |
1872 | .setMIFlag(MachineInstr::FrameSetup); |
1873 | } |
1874 | |
1875 | if (!IsWin64Prologue && !IsFunclet) { |
1876 | // Update EBP with the new base value. |
1877 | if (!X86FI->hasSwiftAsyncContext()) |
1878 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, |
1879 | MCID: TII.get(Opcode: Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), |
1880 | DestReg: FramePtr) |
1881 | .addReg(RegNo: StackPtr) |
1882 | .setMIFlag(MachineInstr::FrameSetup); |
1883 | |
1884 | if (NeedsDwarfCFI) { |
1885 | if (ArgBaseReg.isValid()) { |
1886 | SmallString<64> CfaExpr; |
1887 | CfaExpr.push_back(Elt: dwarf::DW_CFA_expression); |
1888 | uint8_t buffer[16]; |
1889 | unsigned DwarfReg = TRI->getDwarfRegNum(RegNum: MachineFramePtr, isEH: true); |
1890 | CfaExpr.append(in_start: buffer, in_end: buffer + encodeULEB128(Value: DwarfReg, p: buffer)); |
1891 | CfaExpr.push_back(Elt: 2); |
1892 | CfaExpr.push_back(Elt: (uint8_t)(dwarf::DW_OP_breg0 + DwarfReg)); |
1893 | CfaExpr.push_back(Elt: 0); |
1894 | // DW_CFA_expression: reg5 DW_OP_breg5 +0 |
1895 | BuildCFI(MBB, MBBI, DL, |
1896 | CFIInst: MCCFIInstruction::createEscape(L: nullptr, Vals: CfaExpr.str()), |
1897 | Flag: MachineInstr::FrameSetup); |
1898 | } else { |
1899 | // Mark effective beginning of when frame pointer becomes valid. |
1900 | // Define the current CFA to use the EBP/RBP register. |
1901 | unsigned DwarfFramePtr = TRI->getDwarfRegNum(RegNum: MachineFramePtr, isEH: true); |
1902 | BuildCFI( |
1903 | MBB, MBBI, DL, |
1904 | CFIInst: MCCFIInstruction::createDefCfaRegister(L: nullptr, Register: DwarfFramePtr), |
1905 | Flag: MachineInstr::FrameSetup); |
1906 | } |
1907 | } |
1908 | |
1909 | if (NeedsWinFPO) { |
1910 | // .cv_fpo_setframe $FramePtr |
1911 | HasWinCFI = true; |
1912 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_SetFrame)) |
1913 | .addImm(Val: FramePtr) |
1914 | .addImm(Val: 0) |
1915 | .setMIFlag(MachineInstr::FrameSetup); |
1916 | } |
1917 | } |
1918 | } |
1919 | } else { |
1920 | assert(!IsFunclet && "funclets without FPs not yet implemented" ); |
1921 | NumBytes = |
1922 | StackSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize); |
1923 | } |
1924 | |
1925 | // Update the offset adjustment, which is mainly used by codeview to translate |
1926 | // from ESP to VFRAME relative local variable offsets. |
1927 | if (!IsFunclet) { |
1928 | if (HasFP && TRI->hasStackRealignment(MF)) |
1929 | MFI.setOffsetAdjustment(-NumBytes); |
1930 | else |
1931 | MFI.setOffsetAdjustment(-StackSize); |
1932 | } |
1933 | |
1934 | // For EH funclets, only allocate enough space for outgoing calls. Save the |
1935 | // NumBytes value that we would've used for the parent frame. |
1936 | unsigned = NumBytes; |
1937 | if (IsFunclet) |
1938 | NumBytes = getWinEHFuncletFrameSize(MF); |
1939 | |
1940 | // Skip the callee-saved push instructions. |
1941 | bool PushedRegs = false; |
1942 | int StackOffset = 2 * stackGrowth; |
1943 | MachineBasicBlock::const_iterator LastCSPush = MBBI; |
1944 | auto IsCSPush = [&](const MachineBasicBlock::iterator &MBBI) { |
1945 | if (MBBI == MBB.end() || !MBBI->getFlag(Flag: MachineInstr::FrameSetup)) |
1946 | return false; |
1947 | unsigned Opc = MBBI->getOpcode(); |
1948 | return Opc == X86::PUSH32r || Opc == X86::PUSH64r || Opc == X86::PUSHP64r || |
1949 | Opc == X86::PUSH2 || Opc == X86::PUSH2P; |
1950 | }; |
1951 | |
1952 | while (IsCSPush(MBBI)) { |
1953 | PushedRegs = true; |
1954 | Register Reg = MBBI->getOperand(i: 0).getReg(); |
1955 | LastCSPush = MBBI; |
1956 | ++MBBI; |
1957 | unsigned Opc = LastCSPush->getOpcode(); |
1958 | |
1959 | if (!HasFP && NeedsDwarfCFI) { |
1960 | // Mark callee-saved push instruction. |
1961 | // Define the current CFA rule to use the provided offset. |
1962 | assert(StackSize); |
1963 | // Compared to push, push2 introduces more stack offset (one more |
1964 | // register). |
1965 | if (Opc == X86::PUSH2 || Opc == X86::PUSH2P) |
1966 | StackOffset += stackGrowth; |
1967 | BuildCFI(MBB, MBBI, DL, |
1968 | CFIInst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -StackOffset), |
1969 | Flag: MachineInstr::FrameSetup); |
1970 | StackOffset += stackGrowth; |
1971 | } |
1972 | |
1973 | if (NeedsWinCFI) { |
1974 | HasWinCFI = true; |
1975 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_PushReg)) |
1976 | .addImm(Val: Reg) |
1977 | .setMIFlag(MachineInstr::FrameSetup); |
1978 | if (Opc == X86::PUSH2 || Opc == X86::PUSH2P) |
1979 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_PushReg)) |
1980 | .addImm(Val: LastCSPush->getOperand(i: 1).getReg()) |
1981 | .setMIFlag(MachineInstr::FrameSetup); |
1982 | } |
1983 | } |
1984 | |
1985 | // Realign stack after we pushed callee-saved registers (so that we'll be |
1986 | // able to calculate their offsets from the frame pointer). |
1987 | // Don't do this for Win64, it needs to realign the stack after the prologue. |
1988 | if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) && |
1989 | !ArgBaseReg.isValid()) { |
1990 | assert(HasFP && "There should be a frame pointer if stack is realigned." ); |
1991 | BuildStackAlignAND(MBB, MBBI, DL, Reg: StackPtr, MaxAlign); |
1992 | |
1993 | if (NeedsWinCFI) { |
1994 | HasWinCFI = true; |
1995 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_StackAlign)) |
1996 | .addImm(Val: MaxAlign) |
1997 | .setMIFlag(MachineInstr::FrameSetup); |
1998 | } |
1999 | } |
2000 | |
2001 | // If there is an SUB32ri of ESP immediately before this instruction, merge |
2002 | // the two. This can be the case when tail call elimination is enabled and |
2003 | // the callee has more arguments than the caller. |
2004 | NumBytes = mergeSPUpdates( |
2005 | MBB, MBBI, CalcNewOffset: [NumBytes](int64_t Offset) { return NumBytes - Offset; }, |
2006 | doMergeWithPrevious: true); |
2007 | |
2008 | // Adjust stack pointer: ESP -= numbytes. |
2009 | |
2010 | // Windows and cygwin/mingw require a prologue helper routine when allocating |
2011 | // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw |
2012 | // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the |
2013 | // stack and adjust the stack pointer in one go. The 64-bit version of |
2014 | // __chkstk is only responsible for probing the stack. The 64-bit prologue is |
2015 | // responsible for adjusting the stack pointer. Touching the stack at 4K |
2016 | // increments is necessary to ensure that the guard pages used by the OS |
2017 | // virtual memory manager are allocated in correct sequence. |
2018 | uint64_t AlignedNumBytes = NumBytes; |
2019 | if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF)) |
2020 | AlignedNumBytes = alignTo(Value: AlignedNumBytes, Align: MaxAlign); |
2021 | if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) { |
2022 | assert(!X86FI->getUsesRedZone() && |
2023 | "The Red Zone is not accounted for in stack probes" ); |
2024 | |
2025 | // Check whether EAX is livein for this block. |
2026 | bool isEAXAlive = isEAXLiveIn(MBB); |
2027 | |
2028 | if (isEAXAlive) { |
2029 | if (Is64Bit) { |
2030 | // Save RAX |
2031 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH64r)) |
2032 | .addReg(RegNo: X86::RAX, flags: RegState::Kill) |
2033 | .setMIFlag(MachineInstr::FrameSetup); |
2034 | } else { |
2035 | // Save EAX |
2036 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH32r)) |
2037 | .addReg(RegNo: X86::EAX, flags: RegState::Kill) |
2038 | .setMIFlag(MachineInstr::FrameSetup); |
2039 | } |
2040 | } |
2041 | |
2042 | if (Is64Bit) { |
2043 | // Handle the 64-bit Windows ABI case where we need to call __chkstk. |
2044 | // Function prologue is responsible for adjusting the stack pointer. |
2045 | int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes; |
2046 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: getMOVriOpcode(Use64BitReg: Is64Bit, Imm: Alloc)), DestReg: X86::RAX) |
2047 | .addImm(Val: Alloc) |
2048 | .setMIFlag(MachineInstr::FrameSetup); |
2049 | } else { |
2050 | // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. |
2051 | // We'll also use 4 already allocated bytes for EAX. |
2052 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV32ri), DestReg: X86::EAX) |
2053 | .addImm(Val: isEAXAlive ? NumBytes - 4 : NumBytes) |
2054 | .setMIFlag(MachineInstr::FrameSetup); |
2055 | } |
2056 | |
2057 | // Call __chkstk, __chkstk_ms, or __alloca. |
2058 | emitStackProbe(MF, MBB, MBBI, DL, InProlog: true); |
2059 | |
2060 | if (isEAXAlive) { |
2061 | // Restore RAX/EAX |
2062 | MachineInstr *MI; |
2063 | if (Is64Bit) |
2064 | MI = addRegOffset(MIB: BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64rm), DestReg: X86::RAX), |
2065 | Reg: StackPtr, isKill: false, Offset: NumBytes - 8); |
2066 | else |
2067 | MI = addRegOffset(MIB: BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: X86::MOV32rm), DestReg: X86::EAX), |
2068 | Reg: StackPtr, isKill: false, Offset: NumBytes - 4); |
2069 | MI->setFlag(MachineInstr::FrameSetup); |
2070 | MBB.insert(I: MBBI, MI); |
2071 | } |
2072 | } else if (NumBytes) { |
2073 | emitSPUpdate(MBB, MBBI, DL, NumBytes: -(int64_t)NumBytes, /*InEpilogue=*/false); |
2074 | } |
2075 | |
2076 | if (NeedsWinCFI && NumBytes) { |
2077 | HasWinCFI = true; |
2078 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_StackAlloc)) |
2079 | .addImm(Val: NumBytes) |
2080 | .setMIFlag(MachineInstr::FrameSetup); |
2081 | } |
2082 | |
2083 | int SEHFrameOffset = 0; |
2084 | Register SPOrEstablisher; |
2085 | if (IsFunclet) { |
2086 | if (IsClrFunclet) { |
2087 | // The establisher parameter passed to a CLR funclet is actually a pointer |
2088 | // to the (mostly empty) frame of its nearest enclosing funclet; we have |
2089 | // to find the root function establisher frame by loading the PSPSym from |
2090 | // the intermediate frame. |
2091 | unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF); |
2092 | MachinePointerInfo NoInfo; |
2093 | MBB.addLiveIn(PhysReg: Establisher); |
2094 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64rm), DestReg: Establisher), |
2095 | Reg: Establisher, isKill: false, Offset: PSPSlotOffset) |
2096 | .addMemOperand(MMO: MF.getMachineMemOperand( |
2097 | PtrInfo: NoInfo, F: MachineMemOperand::MOLoad, Size: SlotSize, BaseAlignment: Align(SlotSize))); |
2098 | ; |
2099 | // Save the root establisher back into the current funclet's (mostly |
2100 | // empty) frame, in case a sub-funclet or the GC needs it. |
2101 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64mr)), Reg: StackPtr, |
2102 | isKill: false, Offset: PSPSlotOffset) |
2103 | .addReg(RegNo: Establisher) |
2104 | .addMemOperand(MMO: MF.getMachineMemOperand( |
2105 | PtrInfo: NoInfo, |
2106 | F: MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, |
2107 | Size: SlotSize, BaseAlignment: Align(SlotSize))); |
2108 | } |
2109 | SPOrEstablisher = Establisher; |
2110 | } else { |
2111 | SPOrEstablisher = StackPtr; |
2112 | } |
2113 | |
2114 | if (IsWin64Prologue && HasFP) { |
2115 | // Set RBP to a small fixed offset from RSP. In the funclet case, we base |
2116 | // this calculation on the incoming establisher, which holds the value of |
2117 | // RSP from the parent frame at the end of the prologue. |
2118 | SEHFrameOffset = calculateSetFPREG(SPAdjust: ParentFrameNumBytes); |
2119 | if (SEHFrameOffset) |
2120 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::LEA64r), DestReg: FramePtr), |
2121 | Reg: SPOrEstablisher, isKill: false, Offset: SEHFrameOffset); |
2122 | else |
2123 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64rr), DestReg: FramePtr) |
2124 | .addReg(RegNo: SPOrEstablisher); |
2125 | |
2126 | // If this is not a funclet, emit the CFI describing our frame pointer. |
2127 | if (NeedsWinCFI && !IsFunclet) { |
2128 | assert(!NeedsWinFPO && "this setframe incompatible with FPO data" ); |
2129 | HasWinCFI = true; |
2130 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_SetFrame)) |
2131 | .addImm(Val: FramePtr) |
2132 | .addImm(Val: SEHFrameOffset) |
2133 | .setMIFlag(MachineInstr::FrameSetup); |
2134 | if (isAsynchronousEHPersonality(Pers: Personality)) |
2135 | MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset; |
2136 | } |
2137 | } else if (IsFunclet && STI.is32Bit()) { |
2138 | // Reset EBP / ESI to something good for funclets. |
2139 | MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL); |
2140 | // If we're a catch funclet, we can be returned to via catchret. Save ESP |
2141 | // into the registration node so that the runtime will restore it for us. |
2142 | if (!MBB.isCleanupFuncletEntry()) { |
2143 | assert(Personality == EHPersonality::MSVC_CXX); |
2144 | Register FrameReg; |
2145 | int FI = MF.getWinEHFuncInfo()->EHRegNodeFrameIndex; |
2146 | int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed(); |
2147 | // ESP is the first field, so no extra displacement is needed. |
2148 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV32mr)), Reg: FrameReg, |
2149 | isKill: false, Offset: EHRegOffset) |
2150 | .addReg(RegNo: X86::ESP); |
2151 | } |
2152 | } |
2153 | |
2154 | while (MBBI != MBB.end() && MBBI->getFlag(Flag: MachineInstr::FrameSetup)) { |
2155 | const MachineInstr &FrameInstr = *MBBI; |
2156 | ++MBBI; |
2157 | |
2158 | if (NeedsWinCFI) { |
2159 | int FI; |
2160 | if (Register Reg = TII.isStoreToStackSlot(MI: FrameInstr, FrameIndex&: FI)) { |
2161 | if (X86::FR64RegClass.contains(Reg)) { |
2162 | int Offset; |
2163 | Register IgnoredFrameReg; |
2164 | if (IsWin64Prologue && IsFunclet) |
2165 | Offset = getWin64EHFrameIndexRef(MF, FI, SPReg&: IgnoredFrameReg); |
2166 | else |
2167 | Offset = |
2168 | getFrameIndexReference(MF, FI, FrameReg&: IgnoredFrameReg).getFixed() + |
2169 | SEHFrameOffset; |
2170 | |
2171 | HasWinCFI = true; |
2172 | assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data" ); |
2173 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_SaveXMM)) |
2174 | .addImm(Val: Reg) |
2175 | .addImm(Val: Offset) |
2176 | .setMIFlag(MachineInstr::FrameSetup); |
2177 | } |
2178 | } |
2179 | } |
2180 | } |
2181 | |
2182 | if (NeedsWinCFI && HasWinCFI) |
2183 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_EndPrologue)) |
2184 | .setMIFlag(MachineInstr::FrameSetup); |
2185 | |
2186 | if (FnHasClrFunclet && !IsFunclet) { |
2187 | // Save the so-called Initial-SP (i.e. the value of the stack pointer |
2188 | // immediately after the prolog) into the PSPSlot so that funclets |
2189 | // and the GC can recover it. |
2190 | unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF); |
2191 | auto PSPInfo = MachinePointerInfo::getFixedStack( |
2192 | MF, FI: MF.getWinEHFuncInfo()->PSPSymFrameIdx); |
2193 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64mr)), Reg: StackPtr, isKill: false, |
2194 | Offset: PSPSlotOffset) |
2195 | .addReg(RegNo: StackPtr) |
2196 | .addMemOperand(MMO: MF.getMachineMemOperand( |
2197 | PtrInfo: PSPInfo, F: MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, |
2198 | Size: SlotSize, BaseAlignment: Align(SlotSize))); |
2199 | } |
2200 | |
2201 | // Realign stack after we spilled callee-saved registers (so that we'll be |
2202 | // able to calculate their offsets from the frame pointer). |
2203 | // Win64 requires aligning the stack after the prologue. |
2204 | if (IsWin64Prologue && TRI->hasStackRealignment(MF)) { |
2205 | assert(HasFP && "There should be a frame pointer if stack is realigned." ); |
2206 | BuildStackAlignAND(MBB, MBBI, DL, Reg: SPOrEstablisher, MaxAlign); |
2207 | } |
2208 | |
2209 | // We already dealt with stack realignment and funclets above. |
2210 | if (IsFunclet && STI.is32Bit()) |
2211 | return; |
2212 | |
2213 | // If we need a base pointer, set it up here. It's whatever the value |
2214 | // of the stack pointer is at this point. Any variable size objects |
2215 | // will be allocated after this, so we can still use the base pointer |
2216 | // to reference locals. |
2217 | if (TRI->hasBasePointer(MF)) { |
2218 | // Update the base pointer with the current stack pointer. |
2219 | unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; |
2220 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: BasePtr) |
2221 | .addReg(RegNo: SPOrEstablisher) |
2222 | .setMIFlag(MachineInstr::FrameSetup); |
2223 | if (X86FI->getRestoreBasePointer()) { |
2224 | // Stash value of base pointer. Saving RSP instead of EBP shortens |
2225 | // dependence chain. Used by SjLj EH. |
2226 | unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; |
2227 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opm)), Reg: FramePtr, isKill: true, |
2228 | Offset: X86FI->getRestoreBasePointerOffset()) |
2229 | .addReg(RegNo: SPOrEstablisher) |
2230 | .setMIFlag(MachineInstr::FrameSetup); |
2231 | } |
2232 | |
2233 | if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) { |
2234 | // Stash the value of the frame pointer relative to the base pointer for |
2235 | // Win32 EH. This supports Win32 EH, which does the inverse of the above: |
2236 | // it recovers the frame pointer from the base pointer rather than the |
2237 | // other way around. |
2238 | unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; |
2239 | Register UsedReg; |
2240 | int Offset = |
2241 | getFrameIndexReference(MF, FI: X86FI->getSEHFramePtrSaveIndex(), FrameReg&: UsedReg) |
2242 | .getFixed(); |
2243 | assert(UsedReg == BasePtr); |
2244 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opm)), Reg: UsedReg, isKill: true, Offset) |
2245 | .addReg(RegNo: FramePtr) |
2246 | .setMIFlag(MachineInstr::FrameSetup); |
2247 | } |
2248 | } |
2249 | if (ArgBaseReg.isValid()) { |
2250 | // Save argument base pointer. |
2251 | auto *MI = X86FI->getStackPtrSaveMI(); |
2252 | int FI = MI->getOperand(i: 1).getIndex(); |
2253 | unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr; |
2254 | // movl %basereg, offset(%ebp) |
2255 | addFrameReference(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: MOVmr)), FI) |
2256 | .addReg(RegNo: ArgBaseReg) |
2257 | .setMIFlag(MachineInstr::FrameSetup); |
2258 | } |
2259 | |
2260 | if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { |
2261 | // Mark end of stack pointer adjustment. |
2262 | if (!HasFP && NumBytes) { |
2263 | // Define the current CFA rule to use the provided offset. |
2264 | assert(StackSize); |
2265 | BuildCFI( |
2266 | MBB, MBBI, DL, |
2267 | CFIInst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: StackSize - stackGrowth), |
2268 | Flag: MachineInstr::FrameSetup); |
2269 | } |
2270 | |
2271 | // Emit DWARF info specifying the offsets of the callee-saved registers. |
2272 | emitCalleeSavedFrameMoves(MBB, MBBI, DL, IsPrologue: true); |
2273 | } |
2274 | |
2275 | // X86 Interrupt handling function cannot assume anything about the direction |
2276 | // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction |
2277 | // in each prologue of interrupt handler function. |
2278 | // |
2279 | // Create "cld" instruction only in these cases: |
2280 | // 1. The interrupt handling function uses any of the "rep" instructions. |
2281 | // 2. Interrupt handling function calls another function. |
2282 | // 3. If there are any inline asm blocks, as we do not know what they do |
2283 | // |
2284 | // TODO: We should also emit cld if we detect the use of std, but as of now, |
2285 | // the compiler does not even emit that instruction or even define it, so in |
2286 | // practice, this would only happen with inline asm, which we cover anyway. |
2287 | if (Fn.getCallingConv() == CallingConv::X86_INTR) { |
2288 | bool NeedsCLD = false; |
2289 | |
2290 | for (const MachineBasicBlock &B : MF) { |
2291 | for (const MachineInstr &MI : B) { |
2292 | if (MI.isCall()) { |
2293 | NeedsCLD = true; |
2294 | break; |
2295 | } |
2296 | |
2297 | if (isOpcodeRep(Opcode: MI.getOpcode())) { |
2298 | NeedsCLD = true; |
2299 | break; |
2300 | } |
2301 | |
2302 | if (MI.isInlineAsm()) { |
2303 | // TODO: Parse asm for rep instructions or call sites? |
2304 | // For now, let's play it safe and emit a cld instruction |
2305 | // just in case. |
2306 | NeedsCLD = true; |
2307 | break; |
2308 | } |
2309 | } |
2310 | } |
2311 | |
2312 | if (NeedsCLD) { |
2313 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::CLD)) |
2314 | .setMIFlag(MachineInstr::FrameSetup); |
2315 | } |
2316 | } |
2317 | |
2318 | // At this point we know if the function has WinCFI or not. |
2319 | MF.setHasWinCFI(HasWinCFI); |
2320 | } |
2321 | |
2322 | bool X86FrameLowering::canUseLEAForSPInEpilogue( |
2323 | const MachineFunction &MF) const { |
2324 | // We can't use LEA instructions for adjusting the stack pointer if we don't |
2325 | // have a frame pointer in the Win64 ABI. Only ADD instructions may be used |
2326 | // to deallocate the stack. |
2327 | // This means that we can use LEA for SP in two situations: |
2328 | // 1. We *aren't* using the Win64 ABI which means we are free to use LEA. |
2329 | // 2. We *have* a frame pointer which means we are permitted to use LEA. |
2330 | return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF); |
2331 | } |
2332 | |
2333 | static bool isFuncletReturnInstr(MachineInstr &MI) { |
2334 | switch (MI.getOpcode()) { |
2335 | case X86::CATCHRET: |
2336 | case X86::CLEANUPRET: |
2337 | return true; |
2338 | default: |
2339 | return false; |
2340 | } |
2341 | llvm_unreachable("impossible" ); |
2342 | } |
2343 | |
2344 | // CLR funclets use a special "Previous Stack Pointer Symbol" slot on the |
2345 | // stack. It holds a pointer to the bottom of the root function frame. The |
2346 | // establisher frame pointer passed to a nested funclet may point to the |
2347 | // (mostly empty) frame of its parent funclet, but it will need to find |
2348 | // the frame of the root function to access locals. To facilitate this, |
2349 | // every funclet copies the pointer to the bottom of the root function |
2350 | // frame into a PSPSym slot in its own (mostly empty) stack frame. Using the |
2351 | // same offset for the PSPSym in the root function frame that's used in the |
2352 | // funclets' frames allows each funclet to dynamically accept any ancestor |
2353 | // frame as its establisher argument (the runtime doesn't guarantee the |
2354 | // immediate parent for some reason lost to history), and also allows the GC, |
2355 | // which uses the PSPSym for some bookkeeping, to find it in any funclet's |
2356 | // frame with only a single offset reported for the entire method. |
2357 | unsigned |
2358 | X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const { |
2359 | const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo(); |
2360 | Register SPReg; |
2361 | int Offset = getFrameIndexReferencePreferSP(MF, FI: Info.PSPSymFrameIdx, FrameReg&: SPReg, |
2362 | /*IgnoreSPUpdates*/ true) |
2363 | .getFixed(); |
2364 | assert(Offset >= 0 && SPReg == TRI->getStackRegister()); |
2365 | return static_cast<unsigned>(Offset); |
2366 | } |
2367 | |
2368 | unsigned |
2369 | X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const { |
2370 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
2371 | // This is the size of the pushed CSRs. |
2372 | unsigned CSSize = X86FI->getCalleeSavedFrameSize(); |
2373 | // This is the size of callee saved XMMs. |
2374 | const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); |
2375 | unsigned XMMSize = |
2376 | WinEHXMMSlotInfo.size() * TRI->getSpillSize(RC: X86::VR128RegClass); |
2377 | // This is the amount of stack a funclet needs to allocate. |
2378 | unsigned UsedSize; |
2379 | EHPersonality Personality = |
2380 | classifyEHPersonality(Pers: MF.getFunction().getPersonalityFn()); |
2381 | if (Personality == EHPersonality::CoreCLR) { |
2382 | // CLR funclets need to hold enough space to include the PSPSym, at the |
2383 | // same offset from the stack pointer (immediately after the prolog) as it |
2384 | // resides at in the main function. |
2385 | UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize; |
2386 | } else { |
2387 | // Other funclets just need enough stack for outgoing call arguments. |
2388 | UsedSize = MF.getFrameInfo().getMaxCallFrameSize(); |
2389 | } |
2390 | // RBP is not included in the callee saved register block. After pushing RBP, |
2391 | // everything is 16 byte aligned. Everything we allocate before an outgoing |
2392 | // call must also be 16 byte aligned. |
2393 | unsigned FrameSizeMinusRBP = alignTo(Size: CSSize + UsedSize, A: getStackAlign()); |
2394 | // Subtract out the size of the callee saved registers. This is how much stack |
2395 | // each funclet will allocate. |
2396 | return FrameSizeMinusRBP + XMMSize - CSSize; |
2397 | } |
2398 | |
2399 | static bool isTailCallOpcode(unsigned Opc) { |
2400 | return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi || |
2401 | Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 || |
2402 | Opc == X86::TCRETURNri64_ImpCall || Opc == X86::TCRETURNdi64 || |
2403 | Opc == X86::TCRETURNmi64; |
2404 | } |
2405 | |
2406 | void X86FrameLowering::emitEpilogue(MachineFunction &MF, |
2407 | MachineBasicBlock &MBB) const { |
2408 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
2409 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
2410 | MachineBasicBlock::iterator Terminator = MBB.getFirstTerminator(); |
2411 | MachineBasicBlock::iterator MBBI = Terminator; |
2412 | DebugLoc DL; |
2413 | if (MBBI != MBB.end()) |
2414 | DL = MBBI->getDebugLoc(); |
2415 | // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. |
2416 | const bool Is64BitILP32 = STI.isTarget64BitILP32(); |
2417 | Register FramePtr = TRI->getFrameRegister(MF); |
2418 | Register MachineFramePtr = |
2419 | Is64BitILP32 ? Register(getX86SubSuperRegister(Reg: FramePtr, Size: 64)) : FramePtr; |
2420 | |
2421 | bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); |
2422 | bool NeedsWin64CFI = |
2423 | IsWin64Prologue && MF.getFunction().needsUnwindTableEntry(); |
2424 | bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(MI&: *MBBI); |
2425 | |
2426 | // Get the number of bytes to allocate from the FrameInfo. |
2427 | uint64_t StackSize = MFI.getStackSize(); |
2428 | uint64_t MaxAlign = calculateMaxStackAlign(MF); |
2429 | unsigned CSSize = X86FI->getCalleeSavedFrameSize(); |
2430 | unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta(); |
2431 | bool HasFP = hasFP(MF); |
2432 | uint64_t NumBytes = 0; |
2433 | |
2434 | bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() && |
2435 | !MF.getTarget().getTargetTriple().isOSWindows() && |
2436 | !MF.getTarget().getTargetTriple().isUEFI()) && |
2437 | MF.needsFrameMoves(); |
2438 | |
2439 | Register ArgBaseReg; |
2440 | if (auto *MI = X86FI->getStackPtrSaveMI()) { |
2441 | unsigned Opc = X86::LEA32r; |
2442 | Register StackReg = X86::ESP; |
2443 | ArgBaseReg = MI->getOperand(i: 0).getReg(); |
2444 | if (STI.is64Bit()) { |
2445 | Opc = X86::LEA64r; |
2446 | StackReg = X86::RSP; |
2447 | } |
2448 | // leal -4(%basereg), %esp |
2449 | // .cfi_def_cfa %esp, 4 |
2450 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: StackReg) |
2451 | .addUse(RegNo: ArgBaseReg) |
2452 | .addImm(Val: 1) |
2453 | .addUse(RegNo: X86::NoRegister) |
2454 | .addImm(Val: -(int64_t)SlotSize) |
2455 | .addUse(RegNo: X86::NoRegister) |
2456 | .setMIFlag(MachineInstr::FrameDestroy); |
2457 | if (NeedsDwarfCFI) { |
2458 | unsigned DwarfStackPtr = TRI->getDwarfRegNum(RegNum: StackReg, isEH: true); |
2459 | BuildCFI(MBB, MBBI, DL, |
2460 | CFIInst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: DwarfStackPtr, Offset: SlotSize), |
2461 | Flag: MachineInstr::FrameDestroy); |
2462 | --MBBI; |
2463 | } |
2464 | --MBBI; |
2465 | } |
2466 | |
2467 | if (IsFunclet) { |
2468 | assert(HasFP && "EH funclets without FP not yet implemented" ); |
2469 | NumBytes = getWinEHFuncletFrameSize(MF); |
2470 | } else if (HasFP) { |
2471 | // Calculate required stack adjustment. |
2472 | uint64_t FrameSize = StackSize - SlotSize; |
2473 | NumBytes = FrameSize - CSSize - TailCallArgReserveSize; |
2474 | |
2475 | // Callee-saved registers were pushed on stack before the stack was |
2476 | // realigned. |
2477 | if (TRI->hasStackRealignment(MF) && !IsWin64Prologue) |
2478 | NumBytes = alignTo(Value: FrameSize, Align: MaxAlign); |
2479 | } else { |
2480 | NumBytes = StackSize - CSSize - TailCallArgReserveSize; |
2481 | } |
2482 | uint64_t SEHStackAllocAmt = NumBytes; |
2483 | |
2484 | // AfterPop is the position to insert .cfi_restore. |
2485 | MachineBasicBlock::iterator AfterPop = MBBI; |
2486 | if (HasFP) { |
2487 | if (X86FI->hasSwiftAsyncContext()) { |
2488 | // Discard the context. |
2489 | int64_t Offset = mergeSPAdd(MBB, MBBI, AddOffset: 16, doMergeWithPrevious: true); |
2490 | emitSPUpdate(MBB, MBBI, DL, NumBytes: Offset, /*InEpilogue*/ true); |
2491 | } |
2492 | // Pop EBP. |
2493 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, |
2494 | MCID: TII.get(Opcode: getPOPOpcode(ST: MF.getSubtarget<X86Subtarget>())), |
2495 | DestReg: MachineFramePtr) |
2496 | .setMIFlag(MachineInstr::FrameDestroy); |
2497 | |
2498 | // We need to reset FP to its untagged state on return. Bit 60 is currently |
2499 | // used to show the presence of an extended frame. |
2500 | if (X86FI->hasSwiftAsyncContext()) { |
2501 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::BTR64ri8), DestReg: MachineFramePtr) |
2502 | .addUse(RegNo: MachineFramePtr) |
2503 | .addImm(Val: 60) |
2504 | .setMIFlag(MachineInstr::FrameDestroy); |
2505 | } |
2506 | |
2507 | if (NeedsDwarfCFI) { |
2508 | if (!ArgBaseReg.isValid()) { |
2509 | unsigned DwarfStackPtr = |
2510 | TRI->getDwarfRegNum(RegNum: Is64Bit ? X86::RSP : X86::ESP, isEH: true); |
2511 | BuildCFI(MBB, MBBI, DL, |
2512 | CFIInst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: DwarfStackPtr, Offset: SlotSize), |
2513 | Flag: MachineInstr::FrameDestroy); |
2514 | } |
2515 | if (!MBB.succ_empty() && !MBB.isReturnBlock()) { |
2516 | unsigned DwarfFramePtr = TRI->getDwarfRegNum(RegNum: MachineFramePtr, isEH: true); |
2517 | BuildCFI(MBB, MBBI: AfterPop, DL, |
2518 | CFIInst: MCCFIInstruction::createRestore(L: nullptr, Register: DwarfFramePtr), |
2519 | Flag: MachineInstr::FrameDestroy); |
2520 | --MBBI; |
2521 | --AfterPop; |
2522 | } |
2523 | --MBBI; |
2524 | } |
2525 | } |
2526 | |
2527 | MachineBasicBlock::iterator FirstCSPop = MBBI; |
2528 | // Skip the callee-saved pop instructions. |
2529 | while (MBBI != MBB.begin()) { |
2530 | MachineBasicBlock::iterator PI = std::prev(x: MBBI); |
2531 | unsigned Opc = PI->getOpcode(); |
2532 | |
2533 | if (Opc != X86::DBG_VALUE && !PI->isTerminator()) { |
2534 | if (!PI->getFlag(Flag: MachineInstr::FrameDestroy) || |
2535 | (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::BTR64ri8 && |
2536 | Opc != X86::ADD64ri32 && Opc != X86::POPP64r && Opc != X86::POP2 && |
2537 | Opc != X86::POP2P && Opc != X86::LEA64r)) |
2538 | break; |
2539 | FirstCSPop = PI; |
2540 | } |
2541 | |
2542 | --MBBI; |
2543 | } |
2544 | if (ArgBaseReg.isValid()) { |
2545 | // Restore argument base pointer. |
2546 | auto *MI = X86FI->getStackPtrSaveMI(); |
2547 | int FI = MI->getOperand(i: 1).getIndex(); |
2548 | unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm; |
2549 | // movl offset(%ebp), %basereg |
2550 | addFrameReference(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: MOVrm), DestReg: ArgBaseReg), FI) |
2551 | .setMIFlag(MachineInstr::FrameDestroy); |
2552 | } |
2553 | MBBI = FirstCSPop; |
2554 | |
2555 | if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET) |
2556 | emitCatchRetReturnValue(MBB, MBBI: FirstCSPop, CatchRet: &*Terminator); |
2557 | |
2558 | if (MBBI != MBB.end()) |
2559 | DL = MBBI->getDebugLoc(); |
2560 | // If there is an ADD32ri or SUB32ri of ESP immediately before this |
2561 | // instruction, merge the two instructions. |
2562 | if (NumBytes || MFI.hasVarSizedObjects()) |
2563 | NumBytes = mergeSPAdd(MBB, MBBI, AddOffset: NumBytes, doMergeWithPrevious: true); |
2564 | |
2565 | // If dynamic alloca is used, then reset esp to point to the last callee-saved |
2566 | // slot before popping them off! Same applies for the case, when stack was |
2567 | // realigned. Don't do this if this was a funclet epilogue, since the funclets |
2568 | // will not do realignment or dynamic stack allocation. |
2569 | if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) && |
2570 | !IsFunclet) { |
2571 | if (TRI->hasStackRealignment(MF)) |
2572 | MBBI = FirstCSPop; |
2573 | unsigned SEHFrameOffset = calculateSetFPREG(SPAdjust: SEHStackAllocAmt); |
2574 | uint64_t LEAAmount = |
2575 | IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize; |
2576 | |
2577 | if (X86FI->hasSwiftAsyncContext()) |
2578 | LEAAmount -= 16; |
2579 | |
2580 | // There are only two legal forms of epilogue: |
2581 | // - add SEHAllocationSize, %rsp |
2582 | // - lea SEHAllocationSize(%FramePtr), %rsp |
2583 | // |
2584 | // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence. |
2585 | // However, we may use this sequence if we have a frame pointer because the |
2586 | // effects of the prologue can safely be undone. |
2587 | if (LEAAmount != 0) { |
2588 | unsigned Opc = getLEArOpcode(IsLP64: Uses64BitFramePtr); |
2589 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: StackPtr), Reg: FramePtr, |
2590 | isKill: false, Offset: LEAAmount); |
2591 | --MBBI; |
2592 | } else { |
2593 | unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr); |
2594 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: StackPtr).addReg(RegNo: FramePtr); |
2595 | --MBBI; |
2596 | } |
2597 | } else if (NumBytes) { |
2598 | // Adjust stack pointer back: ESP += numbytes. |
2599 | emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true); |
2600 | if (!HasFP && NeedsDwarfCFI) { |
2601 | // Define the current CFA rule to use the provided offset. |
2602 | BuildCFI(MBB, MBBI, DL, |
2603 | CFIInst: MCCFIInstruction::cfiDefCfaOffset( |
2604 | L: nullptr, Offset: CSSize + TailCallArgReserveSize + SlotSize), |
2605 | Flag: MachineInstr::FrameDestroy); |
2606 | } |
2607 | --MBBI; |
2608 | } |
2609 | |
2610 | if (NeedsWin64CFI && MF.hasWinCFI()) |
2611 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_BeginEpilogue)); |
2612 | |
2613 | if (!HasFP && NeedsDwarfCFI) { |
2614 | MBBI = FirstCSPop; |
2615 | int64_t Offset = -(int64_t)CSSize - SlotSize; |
2616 | // Mark callee-saved pop instruction. |
2617 | // Define the current CFA rule to use the provided offset. |
2618 | while (MBBI != MBB.end()) { |
2619 | MachineBasicBlock::iterator PI = MBBI; |
2620 | unsigned Opc = PI->getOpcode(); |
2621 | ++MBBI; |
2622 | if (Opc == X86::POP32r || Opc == X86::POP64r || Opc == X86::POPP64r || |
2623 | Opc == X86::POP2 || Opc == X86::POP2P) { |
2624 | Offset += SlotSize; |
2625 | // Compared to pop, pop2 introduces more stack offset (one more |
2626 | // register). |
2627 | if (Opc == X86::POP2 || Opc == X86::POP2P) |
2628 | Offset += SlotSize; |
2629 | BuildCFI(MBB, MBBI, DL, |
2630 | CFIInst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -Offset), |
2631 | Flag: MachineInstr::FrameDestroy); |
2632 | } |
2633 | } |
2634 | } |
2635 | |
2636 | // Emit DWARF info specifying the restores of the callee-saved registers. |
2637 | // For epilogue with return inside or being other block without successor, |
2638 | // no need to generate .cfi_restore for callee-saved registers. |
2639 | if (NeedsDwarfCFI && !MBB.succ_empty()) |
2640 | emitCalleeSavedFrameMoves(MBB, MBBI: AfterPop, DL, IsPrologue: false); |
2641 | |
2642 | if (Terminator == MBB.end() || !isTailCallOpcode(Opc: Terminator->getOpcode())) { |
2643 | // Add the return addr area delta back since we are not tail calling. |
2644 | int64_t Delta = X86FI->getTCReturnAddrDelta(); |
2645 | assert(Delta <= 0 && "TCDelta should never be positive" ); |
2646 | if (Delta) { |
2647 | // Check for possible merge with preceding ADD instruction. |
2648 | int64_t Offset = mergeSPAdd(MBB, MBBI&: Terminator, AddOffset: -Delta, doMergeWithPrevious: true); |
2649 | emitSPUpdate(MBB, MBBI&: Terminator, DL, NumBytes: Offset, /*InEpilogue=*/true); |
2650 | } |
2651 | } |
2652 | |
2653 | // Emit tilerelease for AMX kernel. |
2654 | if (X86FI->getAMXProgModel() == AMXProgModelEnum::ManagedRA) |
2655 | BuildMI(BB&: MBB, I: Terminator, MIMD: DL, MCID: TII.get(Opcode: X86::TILERELEASE)); |
2656 | |
2657 | if (NeedsWin64CFI && MF.hasWinCFI()) |
2658 | BuildMI(BB&: MBB, I: Terminator, MIMD: DL, MCID: TII.get(Opcode: X86::SEH_EndEpilogue)); |
2659 | } |
2660 | |
2661 | StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, |
2662 | int FI, |
2663 | Register &FrameReg) const { |
2664 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
2665 | |
2666 | bool IsFixed = MFI.isFixedObjectIndex(ObjectIdx: FI); |
2667 | // We can't calculate offset from frame pointer if the stack is realigned, |
2668 | // so enforce usage of stack/base pointer. The base pointer is used when we |
2669 | // have dynamic allocas in addition to dynamic realignment. |
2670 | if (TRI->hasBasePointer(MF)) |
2671 | FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister(); |
2672 | else if (TRI->hasStackRealignment(MF)) |
2673 | FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister(); |
2674 | else |
2675 | FrameReg = TRI->getFrameRegister(MF); |
2676 | |
2677 | // Offset will hold the offset from the stack pointer at function entry to the |
2678 | // object. |
2679 | // We need to factor in additional offsets applied during the prologue to the |
2680 | // frame, base, and stack pointer depending on which is used. |
2681 | int Offset = MFI.getObjectOffset(ObjectIdx: FI) - getOffsetOfLocalArea(); |
2682 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
2683 | unsigned CSSize = X86FI->getCalleeSavedFrameSize(); |
2684 | uint64_t StackSize = MFI.getStackSize(); |
2685 | bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); |
2686 | int64_t FPDelta = 0; |
2687 | |
2688 | // In an x86 interrupt, remove the offset we added to account for the return |
2689 | // address from any stack object allocated in the caller's frame. Interrupts |
2690 | // do not have a standard return address. Fixed objects in the current frame, |
2691 | // such as SSE register spills, should not get this treatment. |
2692 | if (MF.getFunction().getCallingConv() == CallingConv::X86_INTR && |
2693 | Offset >= 0) { |
2694 | Offset += getOffsetOfLocalArea(); |
2695 | } |
2696 | |
2697 | if (IsWin64Prologue) { |
2698 | assert(!MFI.hasCalls() || (StackSize % 16) == 8); |
2699 | |
2700 | // Calculate required stack adjustment. |
2701 | uint64_t FrameSize = StackSize - SlotSize; |
2702 | // If required, include space for extra hidden slot for stashing base |
2703 | // pointer. |
2704 | if (X86FI->getRestoreBasePointer()) |
2705 | FrameSize += SlotSize; |
2706 | uint64_t NumBytes = FrameSize - CSSize; |
2707 | |
2708 | uint64_t SEHFrameOffset = calculateSetFPREG(SPAdjust: NumBytes); |
2709 | if (FI && FI == X86FI->getFAIndex()) |
2710 | return StackOffset::getFixed(Fixed: -SEHFrameOffset); |
2711 | |
2712 | // FPDelta is the offset from the "traditional" FP location of the old base |
2713 | // pointer followed by return address and the location required by the |
2714 | // restricted Win64 prologue. |
2715 | // Add FPDelta to all offsets below that go through the frame pointer. |
2716 | FPDelta = FrameSize - SEHFrameOffset; |
2717 | assert((!MFI.hasCalls() || (FPDelta % 16) == 0) && |
2718 | "FPDelta isn't aligned per the Win64 ABI!" ); |
2719 | } |
2720 | |
2721 | if (FrameReg == TRI->getFramePtr()) { |
2722 | // Skip saved EBP/RBP |
2723 | Offset += SlotSize; |
2724 | |
2725 | // Account for restricted Windows prologue. |
2726 | Offset += FPDelta; |
2727 | |
2728 | // Skip the RETADDR move area |
2729 | int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); |
2730 | if (TailCallReturnAddrDelta < 0) |
2731 | Offset -= TailCallReturnAddrDelta; |
2732 | |
2733 | return StackOffset::getFixed(Fixed: Offset); |
2734 | } |
2735 | |
2736 | // FrameReg is either the stack pointer or a base pointer. But the base is |
2737 | // located at the end of the statically known StackSize so the distinction |
2738 | // doesn't really matter. |
2739 | if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF)) |
2740 | assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize))); |
2741 | return StackOffset::getFixed(Fixed: Offset + StackSize); |
2742 | } |
2743 | |
2744 | int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, |
2745 | Register &FrameReg) const { |
2746 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
2747 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
2748 | const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); |
2749 | const auto it = WinEHXMMSlotInfo.find(Val: FI); |
2750 | |
2751 | if (it == WinEHXMMSlotInfo.end()) |
2752 | return getFrameIndexReference(MF, FI, FrameReg).getFixed(); |
2753 | |
2754 | FrameReg = TRI->getStackRegister(); |
2755 | return alignDown(Value: MFI.getMaxCallFrameSize(), Align: getStackAlign().value()) + |
2756 | it->second; |
2757 | } |
2758 | |
2759 | StackOffset |
2760 | X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF, int FI, |
2761 | Register &FrameReg, |
2762 | int Adjustment) const { |
2763 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
2764 | FrameReg = TRI->getStackRegister(); |
2765 | return StackOffset::getFixed(Fixed: MFI.getObjectOffset(ObjectIdx: FI) - |
2766 | getOffsetOfLocalArea() + Adjustment); |
2767 | } |
2768 | |
2769 | StackOffset |
2770 | X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF, |
2771 | int FI, Register &FrameReg, |
2772 | bool IgnoreSPUpdates) const { |
2773 | |
2774 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
2775 | // Does not include any dynamic realign. |
2776 | const uint64_t StackSize = MFI.getStackSize(); |
2777 | // LLVM arranges the stack as follows: |
2778 | // ... |
2779 | // ARG2 |
2780 | // ARG1 |
2781 | // RETADDR |
2782 | // PUSH RBP <-- RBP points here |
2783 | // PUSH CSRs |
2784 | // ~~~~~~~ <-- possible stack realignment (non-win64) |
2785 | // ... |
2786 | // STACK OBJECTS |
2787 | // ... <-- RSP after prologue points here |
2788 | // ~~~~~~~ <-- possible stack realignment (win64) |
2789 | // |
2790 | // if (hasVarSizedObjects()): |
2791 | // ... <-- "base pointer" (ESI/RBX) points here |
2792 | // DYNAMIC ALLOCAS |
2793 | // ... <-- RSP points here |
2794 | // |
2795 | // Case 1: In the simple case of no stack realignment and no dynamic |
2796 | // allocas, both "fixed" stack objects (arguments and CSRs) are addressable |
2797 | // with fixed offsets from RSP. |
2798 | // |
2799 | // Case 2: In the case of stack realignment with no dynamic allocas, fixed |
2800 | // stack objects are addressed with RBP and regular stack objects with RSP. |
2801 | // |
2802 | // Case 3: In the case of dynamic allocas and stack realignment, RSP is used |
2803 | // to address stack arguments for outgoing calls and nothing else. The "base |
2804 | // pointer" points to local variables, and RBP points to fixed objects. |
2805 | // |
2806 | // In cases 2 and 3, we can only answer for non-fixed stack objects, and the |
2807 | // answer we give is relative to the SP after the prologue, and not the |
2808 | // SP in the middle of the function. |
2809 | |
2810 | if (MFI.isFixedObjectIndex(ObjectIdx: FI) && TRI->hasStackRealignment(MF) && |
2811 | !STI.isTargetWin64()) |
2812 | return getFrameIndexReference(MF, FI, FrameReg); |
2813 | |
2814 | // If !hasReservedCallFrame the function might have SP adjustement in the |
2815 | // body. So, even though the offset is statically known, it depends on where |
2816 | // we are in the function. |
2817 | if (!IgnoreSPUpdates && !hasReservedCallFrame(MF)) |
2818 | return getFrameIndexReference(MF, FI, FrameReg); |
2819 | |
2820 | // We don't handle tail calls, and shouldn't be seeing them either. |
2821 | assert(MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta() >= 0 && |
2822 | "we don't handle this case!" ); |
2823 | |
2824 | // This is how the math works out: |
2825 | // |
2826 | // %rsp grows (i.e. gets lower) left to right. Each box below is |
2827 | // one word (eight bytes). Obj0 is the stack slot we're trying to |
2828 | // get to. |
2829 | // |
2830 | // ---------------------------------- |
2831 | // | BP | Obj0 | Obj1 | ... | ObjN | |
2832 | // ---------------------------------- |
2833 | // ^ ^ ^ ^ |
2834 | // A B C E |
2835 | // |
2836 | // A is the incoming stack pointer. |
2837 | // (B - A) is the local area offset (-8 for x86-64) [1] |
2838 | // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2] |
2839 | // |
2840 | // |(E - B)| is the StackSize (absolute value, positive). For a |
2841 | // stack that grown down, this works out to be (B - E). [3] |
2842 | // |
2843 | // E is also the value of %rsp after stack has been set up, and we |
2844 | // want (C - E) -- the value we can add to %rsp to get to Obj0. Now |
2845 | // (C - E) == (C - A) - (B - A) + (B - E) |
2846 | // { Using [1], [2] and [3] above } |
2847 | // == getObjectOffset - LocalAreaOffset + StackSize |
2848 | |
2849 | return getFrameIndexReferenceSP(MF, FI, FrameReg, Adjustment: StackSize); |
2850 | } |
2851 | |
2852 | bool X86FrameLowering::assignCalleeSavedSpillSlots( |
2853 | MachineFunction &MF, const TargetRegisterInfo *TRI, |
2854 | std::vector<CalleeSavedInfo> &CSI) const { |
2855 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
2856 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
2857 | |
2858 | unsigned CalleeSavedFrameSize = 0; |
2859 | unsigned XMMCalleeSavedFrameSize = 0; |
2860 | auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); |
2861 | int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta(); |
2862 | |
2863 | int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); |
2864 | |
2865 | if (TailCallReturnAddrDelta < 0) { |
2866 | // create RETURNADDR area |
2867 | // arg |
2868 | // arg |
2869 | // RETADDR |
2870 | // { ... |
2871 | // RETADDR area |
2872 | // ... |
2873 | // } |
2874 | // [EBP] |
2875 | MFI.CreateFixedObject(Size: -TailCallReturnAddrDelta, |
2876 | SPOffset: TailCallReturnAddrDelta - SlotSize, IsImmutable: true); |
2877 | } |
2878 | |
2879 | // Spill the BasePtr if it's used. |
2880 | if (this->TRI->hasBasePointer(MF)) { |
2881 | // Allocate a spill slot for EBP if we have a base pointer and EH funclets. |
2882 | if (MF.hasEHFunclets()) { |
2883 | int FI = MFI.CreateSpillStackObject(Size: SlotSize, Alignment: Align(SlotSize)); |
2884 | X86FI->setHasSEHFramePtrSave(true); |
2885 | X86FI->setSEHFramePtrSaveIndex(FI); |
2886 | } |
2887 | } |
2888 | |
2889 | if (hasFP(MF)) { |
2890 | // emitPrologue always spills frame register the first thing. |
2891 | SpillSlotOffset -= SlotSize; |
2892 | MFI.CreateFixedSpillStackObject(Size: SlotSize, SPOffset: SpillSlotOffset); |
2893 | |
2894 | // The async context lives directly before the frame pointer, and we |
2895 | // allocate a second slot to preserve stack alignment. |
2896 | if (X86FI->hasSwiftAsyncContext()) { |
2897 | SpillSlotOffset -= SlotSize; |
2898 | MFI.CreateFixedSpillStackObject(Size: SlotSize, SPOffset: SpillSlotOffset); |
2899 | SpillSlotOffset -= SlotSize; |
2900 | } |
2901 | |
2902 | // Since emitPrologue and emitEpilogue will handle spilling and restoring of |
2903 | // the frame register, we can delete it from CSI list and not have to worry |
2904 | // about avoiding it later. |
2905 | Register FPReg = TRI->getFrameRegister(MF); |
2906 | for (unsigned i = 0; i < CSI.size(); ++i) { |
2907 | if (TRI->regsOverlap(RegA: CSI[i].getReg(), RegB: FPReg)) { |
2908 | CSI.erase(position: CSI.begin() + i); |
2909 | break; |
2910 | } |
2911 | } |
2912 | } |
2913 | |
2914 | // Strategy: |
2915 | // 1. Use push2 when |
2916 | // a) number of CSR > 1 if no need padding |
2917 | // b) number of CSR > 2 if need padding |
2918 | // c) stack alignment >= 16 bytes |
2919 | // 2. When the number of CSR push is odd |
2920 | // a. Start to use push2 from the 1st push if stack is 16B aligned. |
2921 | // b. Start to use push2 from the 2nd push if stack is not 16B aligned. |
2922 | // 3. When the number of CSR push is even, start to use push2 from the 1st |
2923 | // push and make the stack 16B aligned before the push |
2924 | unsigned NumRegsForPush2 = 0; |
2925 | if (STI.hasPush2Pop2() && getStackAlignment() >= 16) { |
2926 | unsigned NumCSGPR = llvm::count_if(Range&: CSI, P: [](const CalleeSavedInfo &I) { |
2927 | return X86::GR64RegClass.contains(Reg: I.getReg()); |
2928 | }); |
2929 | bool NeedPadding = (SpillSlotOffset % 16 != 0) && (NumCSGPR % 2 == 0); |
2930 | bool UsePush2Pop2 = NeedPadding ? NumCSGPR > 2 : NumCSGPR > 1; |
2931 | X86FI->setPadForPush2Pop2(NeedPadding && UsePush2Pop2); |
2932 | NumRegsForPush2 = UsePush2Pop2 ? alignDown(Value: NumCSGPR, Align: 2) : 0; |
2933 | if (X86FI->padForPush2Pop2()) { |
2934 | SpillSlotOffset -= SlotSize; |
2935 | MFI.CreateFixedSpillStackObject(Size: SlotSize, SPOffset: SpillSlotOffset); |
2936 | } |
2937 | } |
2938 | |
2939 | // Assign slots for GPRs. It increases frame size. |
2940 | for (CalleeSavedInfo &I : llvm::reverse(C&: CSI)) { |
2941 | MCRegister Reg = I.getReg(); |
2942 | |
2943 | if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) |
2944 | continue; |
2945 | |
2946 | // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned |
2947 | // or only an odd number of registers in the candidates. |
2948 | if (X86FI->getNumCandidatesForPush2Pop2() < NumRegsForPush2 && |
2949 | (SpillSlotOffset % 16 == 0 || |
2950 | X86FI->getNumCandidatesForPush2Pop2() % 2)) |
2951 | X86FI->addCandidateForPush2Pop2(Reg); |
2952 | |
2953 | SpillSlotOffset -= SlotSize; |
2954 | CalleeSavedFrameSize += SlotSize; |
2955 | |
2956 | int SlotIndex = MFI.CreateFixedSpillStackObject(Size: SlotSize, SPOffset: SpillSlotOffset); |
2957 | I.setFrameIdx(SlotIndex); |
2958 | } |
2959 | |
2960 | // Adjust the offset of spill slot as we know the accurate callee saved frame |
2961 | // size. |
2962 | if (X86FI->getRestoreBasePointer()) { |
2963 | SpillSlotOffset -= SlotSize; |
2964 | CalleeSavedFrameSize += SlotSize; |
2965 | |
2966 | MFI.CreateFixedSpillStackObject(Size: SlotSize, SPOffset: SpillSlotOffset); |
2967 | // TODO: saving the slot index is better? |
2968 | X86FI->setRestoreBasePointer(CalleeSavedFrameSize); |
2969 | } |
2970 | assert(X86FI->getNumCandidatesForPush2Pop2() % 2 == 0 && |
2971 | "Expect even candidates for push2/pop2" ); |
2972 | if (X86FI->getNumCandidatesForPush2Pop2()) |
2973 | ++NumFunctionUsingPush2Pop2; |
2974 | X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize); |
2975 | MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize); |
2976 | |
2977 | // Assign slots for XMMs. |
2978 | for (CalleeSavedInfo &I : llvm::reverse(C&: CSI)) { |
2979 | MCRegister Reg = I.getReg(); |
2980 | if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) |
2981 | continue; |
2982 | |
2983 | // If this is k-register make sure we lookup via the largest legal type. |
2984 | MVT VT = MVT::Other; |
2985 | if (X86::VK16RegClass.contains(Reg)) |
2986 | VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; |
2987 | |
2988 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); |
2989 | unsigned Size = TRI->getSpillSize(RC: *RC); |
2990 | Align Alignment = TRI->getSpillAlign(RC: *RC); |
2991 | // ensure alignment |
2992 | assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86" ); |
2993 | SpillSlotOffset = -alignTo(Size: -SpillSlotOffset, A: Alignment); |
2994 | |
2995 | // spill into slot |
2996 | SpillSlotOffset -= Size; |
2997 | int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SPOffset: SpillSlotOffset); |
2998 | I.setFrameIdx(SlotIndex); |
2999 | MFI.ensureMaxAlignment(Alignment); |
3000 | |
3001 | // Save the start offset and size of XMM in stack frame for funclets. |
3002 | if (X86::VR128RegClass.contains(Reg)) { |
3003 | WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize; |
3004 | XMMCalleeSavedFrameSize += Size; |
3005 | } |
3006 | } |
3007 | |
3008 | return true; |
3009 | } |
3010 | |
3011 | bool X86FrameLowering::spillCalleeSavedRegisters( |
3012 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
3013 | ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
3014 | DebugLoc DL = MBB.findDebugLoc(MBBI: MI); |
3015 | |
3016 | // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI |
3017 | // for us, and there are no XMM CSRs on Win32. |
3018 | if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows()) |
3019 | return true; |
3020 | |
3021 | // Push GPRs. It increases frame size. |
3022 | const MachineFunction &MF = *MBB.getParent(); |
3023 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
3024 | if (X86FI->padForPush2Pop2()) { |
3025 | assert(SlotSize == 8 && "Unexpected slot size for padding!" ); |
3026 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH64r)) |
3027 | .addReg(RegNo: X86::RAX, flags: RegState::Undef) |
3028 | .setMIFlag(MachineInstr::FrameSetup); |
3029 | } |
3030 | |
3031 | // Update LiveIn of the basic block and decide whether we can add a kill flag |
3032 | // to the use. |
3033 | auto UpdateLiveInCheckCanKill = [&](Register Reg) { |
3034 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
3035 | // Do not set a kill flag on values that are also marked as live-in. This |
3036 | // happens with the @llvm-returnaddress intrinsic and with arguments |
3037 | // passed in callee saved registers. |
3038 | // Omitting the kill flags is conservatively correct even if the live-in |
3039 | // is not used after all. |
3040 | if (MRI.isLiveIn(Reg)) |
3041 | return false; |
3042 | MBB.addLiveIn(PhysReg: Reg); |
3043 | // Check if any subregister is live-in |
3044 | for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg) |
3045 | if (MRI.isLiveIn(Reg: *AReg)) |
3046 | return false; |
3047 | return true; |
3048 | }; |
3049 | auto UpdateLiveInGetKillRegState = [&](Register Reg) { |
3050 | return getKillRegState(B: UpdateLiveInCheckCanKill(Reg)); |
3051 | }; |
3052 | |
3053 | for (auto RI = CSI.rbegin(), RE = CSI.rend(); RI != RE; ++RI) { |
3054 | MCRegister Reg = RI->getReg(); |
3055 | if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) |
3056 | continue; |
3057 | |
3058 | if (X86FI->isCandidateForPush2Pop2(Reg)) { |
3059 | MCRegister Reg2 = (++RI)->getReg(); |
3060 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: getPUSH2Opcode(ST: STI))) |
3061 | .addReg(RegNo: Reg, flags: UpdateLiveInGetKillRegState(Reg)) |
3062 | .addReg(RegNo: Reg2, flags: UpdateLiveInGetKillRegState(Reg2)) |
3063 | .setMIFlag(MachineInstr::FrameSetup); |
3064 | } else { |
3065 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: getPUSHOpcode(ST: STI))) |
3066 | .addReg(RegNo: Reg, flags: UpdateLiveInGetKillRegState(Reg)) |
3067 | .setMIFlag(MachineInstr::FrameSetup); |
3068 | } |
3069 | } |
3070 | |
3071 | if (X86FI->getRestoreBasePointer()) { |
3072 | unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; |
3073 | Register BaseReg = this->TRI->getBaseRegister(); |
3074 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc)) |
3075 | .addReg(RegNo: BaseReg, flags: getKillRegState(B: true)) |
3076 | .setMIFlag(MachineInstr::FrameSetup); |
3077 | } |
3078 | |
3079 | // Make XMM regs spilled. X86 does not have ability of push/pop XMM. |
3080 | // It can be done by spilling XMMs to stack frame. |
3081 | for (const CalleeSavedInfo &I : llvm::reverse(C&: CSI)) { |
3082 | MCRegister Reg = I.getReg(); |
3083 | if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) |
3084 | continue; |
3085 | |
3086 | // If this is k-register make sure we lookup via the largest legal type. |
3087 | MVT VT = MVT::Other; |
3088 | if (X86::VK16RegClass.contains(Reg)) |
3089 | VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; |
3090 | |
3091 | // Add the callee-saved register as live-in. It's killed at the spill. |
3092 | MBB.addLiveIn(PhysReg: Reg); |
3093 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); |
3094 | |
3095 | TII.storeRegToStackSlot(MBB, MI, SrcReg: Reg, isKill: true, FrameIndex: I.getFrameIdx(), RC, TRI, |
3096 | VReg: Register(), Flags: MachineInstr::FrameSetup); |
3097 | } |
3098 | |
3099 | return true; |
3100 | } |
3101 | |
3102 | void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB, |
3103 | MachineBasicBlock::iterator MBBI, |
3104 | MachineInstr *CatchRet) const { |
3105 | // SEH shouldn't use catchret. |
3106 | assert(!isAsynchronousEHPersonality(classifyEHPersonality( |
3107 | MBB.getParent()->getFunction().getPersonalityFn())) && |
3108 | "SEH should not use CATCHRET" ); |
3109 | const DebugLoc &DL = CatchRet->getDebugLoc(); |
3110 | MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(i: 0).getMBB(); |
3111 | |
3112 | // Fill EAX/RAX with the address of the target block. |
3113 | if (STI.is64Bit()) { |
3114 | // LEA64r CatchRetTarget(%rip), %rax |
3115 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::LEA64r), DestReg: X86::RAX) |
3116 | .addReg(RegNo: X86::RIP) |
3117 | .addImm(Val: 0) |
3118 | .addReg(RegNo: 0) |
3119 | .addMBB(MBB: CatchRetTarget) |
3120 | .addReg(RegNo: 0); |
3121 | } else { |
3122 | // MOV32ri $CatchRetTarget, %eax |
3123 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV32ri), DestReg: X86::EAX) |
3124 | .addMBB(MBB: CatchRetTarget); |
3125 | } |
3126 | |
3127 | // Record that we've taken the address of CatchRetTarget and no longer just |
3128 | // reference it in a terminator. |
3129 | CatchRetTarget->setMachineBlockAddressTaken(); |
3130 | } |
3131 | |
3132 | bool X86FrameLowering::restoreCalleeSavedRegisters( |
3133 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
3134 | MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
3135 | if (CSI.empty()) |
3136 | return false; |
3137 | |
3138 | if (MI != MBB.end() && isFuncletReturnInstr(MI&: *MI) && STI.isOSWindows()) { |
3139 | // Don't restore CSRs in 32-bit EH funclets. Matches |
3140 | // spillCalleeSavedRegisters. |
3141 | if (STI.is32Bit()) |
3142 | return true; |
3143 | // Don't restore CSRs before an SEH catchret. SEH except blocks do not form |
3144 | // funclets. emitEpilogue transforms these to normal jumps. |
3145 | if (MI->getOpcode() == X86::CATCHRET) { |
3146 | const Function &F = MBB.getParent()->getFunction(); |
3147 | bool IsSEH = isAsynchronousEHPersonality( |
3148 | Pers: classifyEHPersonality(Pers: F.getPersonalityFn())); |
3149 | if (IsSEH) |
3150 | return true; |
3151 | } |
3152 | } |
3153 | |
3154 | DebugLoc DL = MBB.findDebugLoc(MBBI: MI); |
3155 | |
3156 | // Reload XMMs from stack frame. |
3157 | for (const CalleeSavedInfo &I : CSI) { |
3158 | MCRegister Reg = I.getReg(); |
3159 | if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) |
3160 | continue; |
3161 | |
3162 | // If this is k-register make sure we lookup via the largest legal type. |
3163 | MVT VT = MVT::Other; |
3164 | if (X86::VK16RegClass.contains(Reg)) |
3165 | VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; |
3166 | |
3167 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); |
3168 | TII.loadRegFromStackSlot(MBB, MI, DestReg: Reg, FrameIndex: I.getFrameIdx(), RC, TRI, |
3169 | VReg: Register()); |
3170 | } |
3171 | |
3172 | // Clear the stack slot for spill base pointer register. |
3173 | MachineFunction &MF = *MBB.getParent(); |
3174 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
3175 | if (X86FI->getRestoreBasePointer()) { |
3176 | unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; |
3177 | Register BaseReg = this->TRI->getBaseRegister(); |
3178 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: BaseReg) |
3179 | .setMIFlag(MachineInstr::FrameDestroy); |
3180 | } |
3181 | |
3182 | // POP GPRs. |
3183 | for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) { |
3184 | MCRegister Reg = I->getReg(); |
3185 | if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) |
3186 | continue; |
3187 | |
3188 | if (X86FI->isCandidateForPush2Pop2(Reg)) |
3189 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: getPOP2Opcode(ST: STI)), DestReg: Reg) |
3190 | .addReg(RegNo: (++I)->getReg(), flags: RegState::Define) |
3191 | .setMIFlag(MachineInstr::FrameDestroy); |
3192 | else |
3193 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: getPOPOpcode(ST: STI)), DestReg: Reg) |
3194 | .setMIFlag(MachineInstr::FrameDestroy); |
3195 | } |
3196 | if (X86FI->padForPush2Pop2()) |
3197 | emitSPUpdate(MBB, MBBI&: MI, DL, NumBytes: SlotSize, /*InEpilogue=*/true); |
3198 | |
3199 | return true; |
3200 | } |
3201 | |
3202 | void X86FrameLowering::determineCalleeSaves(MachineFunction &MF, |
3203 | BitVector &SavedRegs, |
3204 | RegScavenger *RS) const { |
3205 | TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); |
3206 | |
3207 | // Spill the BasePtr if it's used. |
3208 | if (TRI->hasBasePointer(MF)) { |
3209 | Register BasePtr = TRI->getBaseRegister(); |
3210 | if (STI.isTarget64BitILP32()) |
3211 | BasePtr = getX86SubSuperRegister(Reg: BasePtr, Size: 64); |
3212 | SavedRegs.set(BasePtr); |
3213 | } |
3214 | } |
3215 | |
3216 | static bool HasNestArgument(const MachineFunction *MF) { |
3217 | const Function &F = MF->getFunction(); |
3218 | for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; |
3219 | I++) { |
3220 | if (I->hasNestAttr() && !I->use_empty()) |
3221 | return true; |
3222 | } |
3223 | return false; |
3224 | } |
3225 | |
3226 | /// GetScratchRegister - Get a temp register for performing work in the |
3227 | /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform |
3228 | /// and the properties of the function either one or two registers will be |
3229 | /// needed. Set primary to true for the first register, false for the second. |
3230 | static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, |
3231 | const MachineFunction &MF, bool Primary) { |
3232 | CallingConv::ID CallingConvention = MF.getFunction().getCallingConv(); |
3233 | |
3234 | // Erlang stuff. |
3235 | if (CallingConvention == CallingConv::HiPE) { |
3236 | if (Is64Bit) |
3237 | return Primary ? X86::R14 : X86::R13; |
3238 | else |
3239 | return Primary ? X86::EBX : X86::EDI; |
3240 | } |
3241 | |
3242 | if (Is64Bit) { |
3243 | if (IsLP64) |
3244 | return Primary ? X86::R11 : X86::R12; |
3245 | else |
3246 | return Primary ? X86::R11D : X86::R12D; |
3247 | } |
3248 | |
3249 | bool IsNested = HasNestArgument(MF: &MF); |
3250 | |
3251 | if (CallingConvention == CallingConv::X86_FastCall || |
3252 | CallingConvention == CallingConv::Fast || |
3253 | CallingConvention == CallingConv::Tail) { |
3254 | if (IsNested) |
3255 | report_fatal_error(reason: "Segmented stacks does not support fastcall with " |
3256 | "nested function." ); |
3257 | return Primary ? X86::EAX : X86::ECX; |
3258 | } |
3259 | if (IsNested) |
3260 | return Primary ? X86::EDX : X86::EAX; |
3261 | return Primary ? X86::ECX : X86::EAX; |
3262 | } |
3263 | |
3264 | // The stack limit in the TCB is set to this many bytes above the actual stack |
3265 | // limit. |
3266 | static const uint64_t kSplitStackAvailable = 256; |
3267 | |
3268 | void X86FrameLowering::adjustForSegmentedStacks( |
3269 | MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { |
3270 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
3271 | uint64_t StackSize; |
3272 | unsigned TlsReg, TlsOffset; |
3273 | DebugLoc DL; |
3274 | |
3275 | // To support shrink-wrapping we would need to insert the new blocks |
3276 | // at the right place and update the branches to PrologueMBB. |
3277 | assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet" ); |
3278 | |
3279 | unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, Primary: true); |
3280 | assert(!MF.getRegInfo().isLiveIn(ScratchReg) && |
3281 | "Scratch register is live-in" ); |
3282 | |
3283 | if (MF.getFunction().isVarArg()) |
3284 | report_fatal_error(reason: "Segmented stacks do not support vararg functions." ); |
3285 | if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() && |
3286 | !STI.isTargetWin64() && !STI.isTargetFreeBSD() && |
3287 | !STI.isTargetDragonFly()) |
3288 | report_fatal_error(reason: "Segmented stacks not supported on this platform." ); |
3289 | |
3290 | // Eventually StackSize will be calculated by a link-time pass; which will |
3291 | // also decide whether checking code needs to be injected into this particular |
3292 | // prologue. |
3293 | StackSize = MFI.getStackSize(); |
3294 | |
3295 | if (!MFI.needsSplitStackProlog()) |
3296 | return; |
3297 | |
3298 | MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); |
3299 | MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); |
3300 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
3301 | bool IsNested = false; |
3302 | |
3303 | // We need to know if the function has a nest argument only in 64 bit mode. |
3304 | if (Is64Bit) |
3305 | IsNested = HasNestArgument(MF: &MF); |
3306 | |
3307 | // The MOV R10, RAX needs to be in a different block, since the RET we emit in |
3308 | // allocMBB needs to be last (terminating) instruction. |
3309 | |
3310 | for (const auto &LI : PrologueMBB.liveins()) { |
3311 | allocMBB->addLiveIn(RegMaskPair: LI); |
3312 | checkMBB->addLiveIn(RegMaskPair: LI); |
3313 | } |
3314 | |
3315 | if (IsNested) |
3316 | allocMBB->addLiveIn(PhysReg: IsLP64 ? X86::R10 : X86::R10D); |
3317 | |
3318 | MF.push_front(MBB: allocMBB); |
3319 | MF.push_front(MBB: checkMBB); |
3320 | |
3321 | // When the frame size is less than 256 we just compare the stack |
3322 | // boundary directly to the value of the stack pointer, per gcc. |
3323 | bool CompareStackPointer = StackSize < kSplitStackAvailable; |
3324 | |
3325 | // Read the limit off the current stacklet off the stack_guard location. |
3326 | if (Is64Bit) { |
3327 | if (STI.isTargetLinux()) { |
3328 | TlsReg = X86::FS; |
3329 | TlsOffset = IsLP64 ? 0x70 : 0x40; |
3330 | } else if (STI.isTargetDarwin()) { |
3331 | TlsReg = X86::GS; |
3332 | TlsOffset = 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90. |
3333 | } else if (STI.isTargetWin64()) { |
3334 | TlsReg = X86::GS; |
3335 | TlsOffset = 0x28; // pvArbitrary, reserved for application use |
3336 | } else if (STI.isTargetFreeBSD()) { |
3337 | TlsReg = X86::FS; |
3338 | TlsOffset = 0x18; |
3339 | } else if (STI.isTargetDragonFly()) { |
3340 | TlsReg = X86::FS; |
3341 | TlsOffset = 0x20; // use tls_tcb.tcb_segstack |
3342 | } else { |
3343 | report_fatal_error(reason: "Segmented stacks not supported on this platform." ); |
3344 | } |
3345 | |
3346 | if (CompareStackPointer) |
3347 | ScratchReg = IsLP64 ? X86::RSP : X86::ESP; |
3348 | else |
3349 | BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: IsLP64 ? X86::LEA64r : X86::LEA64_32r), |
3350 | DestReg: ScratchReg) |
3351 | .addReg(RegNo: X86::RSP) |
3352 | .addImm(Val: 1) |
3353 | .addReg(RegNo: 0) |
3354 | .addImm(Val: -StackSize) |
3355 | .addReg(RegNo: 0); |
3356 | |
3357 | BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: IsLP64 ? X86::CMP64rm : X86::CMP32rm)) |
3358 | .addReg(RegNo: ScratchReg) |
3359 | .addReg(RegNo: 0) |
3360 | .addImm(Val: 1) |
3361 | .addReg(RegNo: 0) |
3362 | .addImm(Val: TlsOffset) |
3363 | .addReg(RegNo: TlsReg); |
3364 | } else { |
3365 | if (STI.isTargetLinux()) { |
3366 | TlsReg = X86::GS; |
3367 | TlsOffset = 0x30; |
3368 | } else if (STI.isTargetDarwin()) { |
3369 | TlsReg = X86::GS; |
3370 | TlsOffset = 0x48 + 90 * 4; |
3371 | } else if (STI.isTargetWin32()) { |
3372 | TlsReg = X86::FS; |
3373 | TlsOffset = 0x14; // pvArbitrary, reserved for application use |
3374 | } else if (STI.isTargetDragonFly()) { |
3375 | TlsReg = X86::FS; |
3376 | TlsOffset = 0x10; // use tls_tcb.tcb_segstack |
3377 | } else if (STI.isTargetFreeBSD()) { |
3378 | report_fatal_error(reason: "Segmented stacks not supported on FreeBSD i386." ); |
3379 | } else { |
3380 | report_fatal_error(reason: "Segmented stacks not supported on this platform." ); |
3381 | } |
3382 | |
3383 | if (CompareStackPointer) |
3384 | ScratchReg = X86::ESP; |
3385 | else |
3386 | BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: X86::LEA32r), DestReg: ScratchReg) |
3387 | .addReg(RegNo: X86::ESP) |
3388 | .addImm(Val: 1) |
3389 | .addReg(RegNo: 0) |
3390 | .addImm(Val: -StackSize) |
3391 | .addReg(RegNo: 0); |
3392 | |
3393 | if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() || |
3394 | STI.isTargetDragonFly()) { |
3395 | BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: X86::CMP32rm)) |
3396 | .addReg(RegNo: ScratchReg) |
3397 | .addReg(RegNo: 0) |
3398 | .addImm(Val: 0) |
3399 | .addReg(RegNo: 0) |
3400 | .addImm(Val: TlsOffset) |
3401 | .addReg(RegNo: TlsReg); |
3402 | } else if (STI.isTargetDarwin()) { |
3403 | |
3404 | // TlsOffset doesn't fit into a mod r/m byte so we need an extra register. |
3405 | unsigned ScratchReg2; |
3406 | bool SaveScratch2; |
3407 | if (CompareStackPointer) { |
3408 | // The primary scratch register is available for holding the TLS offset. |
3409 | ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, Primary: true); |
3410 | SaveScratch2 = false; |
3411 | } else { |
3412 | // Need to use a second register to hold the TLS offset |
3413 | ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, Primary: false); |
3414 | |
3415 | // Unfortunately, with fastcc the second scratch register may hold an |
3416 | // argument. |
3417 | SaveScratch2 = MF.getRegInfo().isLiveIn(Reg: ScratchReg2); |
3418 | } |
3419 | |
3420 | // If Scratch2 is live-in then it needs to be saved. |
3421 | assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) && |
3422 | "Scratch register is live-in and not saved" ); |
3423 | |
3424 | if (SaveScratch2) |
3425 | BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH32r)) |
3426 | .addReg(RegNo: ScratchReg2, flags: RegState::Kill); |
3427 | |
3428 | BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: X86::MOV32ri), DestReg: ScratchReg2) |
3429 | .addImm(Val: TlsOffset); |
3430 | BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: X86::CMP32rm)) |
3431 | .addReg(RegNo: ScratchReg) |
3432 | .addReg(RegNo: ScratchReg2) |
3433 | .addImm(Val: 1) |
3434 | .addReg(RegNo: 0) |
3435 | .addImm(Val: 0) |
3436 | .addReg(RegNo: TlsReg); |
3437 | |
3438 | if (SaveScratch2) |
3439 | BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: X86::POP32r), DestReg: ScratchReg2); |
3440 | } |
3441 | } |
3442 | |
3443 | // This jump is taken if SP >= (Stacklet Limit + Stack Space required). |
3444 | // It jumps to normal execution of the function body. |
3445 | BuildMI(BB: checkMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1)) |
3446 | .addMBB(MBB: &PrologueMBB) |
3447 | .addImm(Val: X86::COND_A); |
3448 | |
3449 | // On 32 bit we first push the arguments size and then the frame size. On 64 |
3450 | // bit, we pass the stack frame size in r10 and the argument size in r11. |
3451 | if (Is64Bit) { |
3452 | // Functions with nested arguments use R10, so it needs to be saved across |
3453 | // the call to _morestack |
3454 | |
3455 | const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX; |
3456 | const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D; |
3457 | const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D; |
3458 | const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr; |
3459 | |
3460 | if (IsNested) |
3461 | BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: MOVrr), DestReg: RegAX).addReg(RegNo: Reg10); |
3462 | |
3463 | BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: getMOVriOpcode(Use64BitReg: IsLP64, Imm: StackSize)), DestReg: Reg10) |
3464 | .addImm(Val: StackSize); |
3465 | BuildMI(BB: allocMBB, MIMD: DL, |
3466 | MCID: TII.get(Opcode: getMOVriOpcode(Use64BitReg: IsLP64, Imm: X86FI->getArgumentStackSize())), |
3467 | DestReg: Reg11) |
3468 | .addImm(Val: X86FI->getArgumentStackSize()); |
3469 | } else { |
3470 | BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH32i)) |
3471 | .addImm(Val: X86FI->getArgumentStackSize()); |
3472 | BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::PUSH32i)).addImm(Val: StackSize); |
3473 | } |
3474 | |
3475 | // __morestack is in libgcc |
3476 | if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { |
3477 | // Under the large code model, we cannot assume that __morestack lives |
3478 | // within 2^31 bytes of the call site, so we cannot use pc-relative |
3479 | // addressing. We cannot perform the call via a temporary register, |
3480 | // as the rax register may be used to store the static chain, and all |
3481 | // other suitable registers may be either callee-save or used for |
3482 | // parameter passing. We cannot use the stack at this point either |
3483 | // because __morestack manipulates the stack directly. |
3484 | // |
3485 | // To avoid these issues, perform an indirect call via a read-only memory |
3486 | // location containing the address. |
3487 | // |
3488 | // This solution is not perfect, as it assumes that the .rodata section |
3489 | // is laid out within 2^31 bytes of each function body, but this seems |
3490 | // to be sufficient for JIT. |
3491 | // FIXME: Add retpoline support and remove the error here.. |
3492 | if (STI.useIndirectThunkCalls()) |
3493 | report_fatal_error(reason: "Emitting morestack calls on 64-bit with the large " |
3494 | "code model and thunks not yet implemented." ); |
3495 | BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::CALL64m)) |
3496 | .addReg(RegNo: X86::RIP) |
3497 | .addImm(Val: 0) |
3498 | .addReg(RegNo: 0) |
3499 | .addExternalSymbol(FnName: "__morestack_addr" ) |
3500 | .addReg(RegNo: 0); |
3501 | } else { |
3502 | if (Is64Bit) |
3503 | BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::CALL64pcrel32)) |
3504 | .addExternalSymbol(FnName: "__morestack" ); |
3505 | else |
3506 | BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::CALLpcrel32)) |
3507 | .addExternalSymbol(FnName: "__morestack" ); |
3508 | } |
3509 | |
3510 | if (IsNested) |
3511 | BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::MORESTACK_RET_RESTORE_R10)); |
3512 | else |
3513 | BuildMI(BB: allocMBB, MIMD: DL, MCID: TII.get(Opcode: X86::MORESTACK_RET)); |
3514 | |
3515 | allocMBB->addSuccessor(Succ: &PrologueMBB); |
3516 | |
3517 | checkMBB->addSuccessor(Succ: allocMBB, Prob: BranchProbability::getZero()); |
3518 | checkMBB->addSuccessor(Succ: &PrologueMBB, Prob: BranchProbability::getOne()); |
3519 | |
3520 | #ifdef EXPENSIVE_CHECKS |
3521 | MF.verify(); |
3522 | #endif |
3523 | } |
3524 | |
3525 | /// Lookup an ERTS parameter in the !hipe.literals named metadata node. |
3526 | /// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets |
3527 | /// to fields it needs, through a named metadata node "hipe.literals" containing |
3528 | /// name-value pairs. |
3529 | static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, |
3530 | const StringRef LiteralName) { |
3531 | for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) { |
3532 | MDNode *Node = HiPELiteralsMD->getOperand(i); |
3533 | if (Node->getNumOperands() != 2) |
3534 | continue; |
3535 | MDString *NodeName = dyn_cast<MDString>(Val: Node->getOperand(I: 0)); |
3536 | ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Val: Node->getOperand(I: 1)); |
3537 | if (!NodeName || !NodeVal) |
3538 | continue; |
3539 | ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(Val: NodeVal->getValue()); |
3540 | if (ValConst && NodeName->getString() == LiteralName) { |
3541 | return ValConst->getZExtValue(); |
3542 | } |
3543 | } |
3544 | |
3545 | report_fatal_error(reason: "HiPE literal " + LiteralName + |
3546 | " required but not provided" ); |
3547 | } |
3548 | |
3549 | // Return true if there are no non-ehpad successors to MBB and there are no |
3550 | // non-meta instructions between MBBI and MBB.end(). |
3551 | static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, |
3552 | MachineBasicBlock::const_iterator MBBI) { |
3553 | return llvm::all_of( |
3554 | Range: MBB.successors(), |
3555 | P: [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) && |
3556 | std::all_of(first: MBBI, last: MBB.end(), pred: [](const MachineInstr &MI) { |
3557 | return MI.isMetaInstruction(); |
3558 | }); |
3559 | } |
3560 | |
3561 | /// Erlang programs may need a special prologue to handle the stack size they |
3562 | /// might need at runtime. That is because Erlang/OTP does not implement a C |
3563 | /// stack but uses a custom implementation of hybrid stack/heap architecture. |
3564 | /// (for more information see Eric Stenman's Ph.D. thesis: |
3565 | /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) |
3566 | /// |
3567 | /// CheckStack: |
3568 | /// temp0 = sp - MaxStack |
3569 | /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart |
3570 | /// OldStart: |
3571 | /// ... |
3572 | /// IncStack: |
3573 | /// call inc_stack # doubles the stack space |
3574 | /// temp0 = sp - MaxStack |
3575 | /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart |
3576 | void X86FrameLowering::adjustForHiPEPrologue( |
3577 | MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { |
3578 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
3579 | DebugLoc DL; |
3580 | |
3581 | // To support shrink-wrapping we would need to insert the new blocks |
3582 | // at the right place and update the branches to PrologueMBB. |
3583 | assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet" ); |
3584 | |
3585 | // HiPE-specific values |
3586 | NamedMDNode *HiPELiteralsMD = |
3587 | MF.getFunction().getParent()->getNamedMetadata(Name: "hipe.literals" ); |
3588 | if (!HiPELiteralsMD) |
3589 | report_fatal_error( |
3590 | reason: "Can't generate HiPE prologue without runtime parameters" ); |
3591 | const unsigned HipeLeafWords = getHiPELiteral( |
3592 | HiPELiteralsMD, LiteralName: Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS" ); |
3593 | const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; |
3594 | const unsigned Guaranteed = HipeLeafWords * SlotSize; |
3595 | unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs |
3596 | ? MF.getFunction().arg_size() - CCRegisteredArgs |
3597 | : 0; |
3598 | unsigned MaxStack = MFI.getStackSize() + CallerStkArity * SlotSize + SlotSize; |
3599 | |
3600 | assert(STI.isTargetLinux() && |
3601 | "HiPE prologue is only supported on Linux operating systems." ); |
3602 | |
3603 | // Compute the largest caller's frame that is needed to fit the callees' |
3604 | // frames. This 'MaxStack' is computed from: |
3605 | // |
3606 | // a) the fixed frame size, which is the space needed for all spilled temps, |
3607 | // b) outgoing on-stack parameter areas, and |
3608 | // c) the minimum stack space this function needs to make available for the |
3609 | // functions it calls (a tunable ABI property). |
3610 | if (MFI.hasCalls()) { |
3611 | unsigned MoreStackForCalls = 0; |
3612 | |
3613 | for (auto &MBB : MF) { |
3614 | for (auto &MI : MBB) { |
3615 | if (!MI.isCall()) |
3616 | continue; |
3617 | |
3618 | // Get callee operand. |
3619 | const MachineOperand &MO = MI.getOperand(i: 0); |
3620 | |
3621 | // Only take account of global function calls (no closures etc.). |
3622 | if (!MO.isGlobal()) |
3623 | continue; |
3624 | |
3625 | const Function *F = dyn_cast<Function>(Val: MO.getGlobal()); |
3626 | if (!F) |
3627 | continue; |
3628 | |
3629 | // Do not update 'MaxStack' for primitive and built-in functions |
3630 | // (encoded with names either starting with "erlang."/"bif_" or not |
3631 | // having a ".", such as a simple <Module>.<Function>.<Arity>, or an |
3632 | // "_", such as the BIF "suspend_0") as they are executed on another |
3633 | // stack. |
3634 | if (F->getName().contains(Other: "erlang." ) || F->getName().contains(Other: "bif_" ) || |
3635 | F->getName().find_first_of(Chars: "._" ) == StringRef::npos) |
3636 | continue; |
3637 | |
3638 | unsigned CalleeStkArity = F->arg_size() > CCRegisteredArgs |
3639 | ? F->arg_size() - CCRegisteredArgs |
3640 | : 0; |
3641 | if (HipeLeafWords - 1 > CalleeStkArity) |
3642 | MoreStackForCalls = |
3643 | std::max(a: MoreStackForCalls, |
3644 | b: (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); |
3645 | } |
3646 | } |
3647 | MaxStack += MoreStackForCalls; |
3648 | } |
3649 | |
3650 | // If the stack frame needed is larger than the guaranteed then runtime checks |
3651 | // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue. |
3652 | if (MaxStack > Guaranteed) { |
3653 | MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock(); |
3654 | MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock(); |
3655 | |
3656 | for (const auto &LI : PrologueMBB.liveins()) { |
3657 | stackCheckMBB->addLiveIn(RegMaskPair: LI); |
3658 | incStackMBB->addLiveIn(RegMaskPair: LI); |
3659 | } |
3660 | |
3661 | MF.push_front(MBB: incStackMBB); |
3662 | MF.push_front(MBB: stackCheckMBB); |
3663 | |
3664 | unsigned ScratchReg, SPReg, PReg, SPLimitOffset; |
3665 | unsigned LEAop, CMPop, CALLop; |
3666 | SPLimitOffset = getHiPELiteral(HiPELiteralsMD, LiteralName: "P_NSP_LIMIT" ); |
3667 | if (Is64Bit) { |
3668 | SPReg = X86::RSP; |
3669 | PReg = X86::RBP; |
3670 | LEAop = X86::LEA64r; |
3671 | CMPop = X86::CMP64rm; |
3672 | CALLop = X86::CALL64pcrel32; |
3673 | } else { |
3674 | SPReg = X86::ESP; |
3675 | PReg = X86::EBP; |
3676 | LEAop = X86::LEA32r; |
3677 | CMPop = X86::CMP32rm; |
3678 | CALLop = X86::CALLpcrel32; |
3679 | } |
3680 | |
3681 | ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, Primary: true); |
3682 | assert(!MF.getRegInfo().isLiveIn(ScratchReg) && |
3683 | "HiPE prologue scratch register is live-in" ); |
3684 | |
3685 | // Create new MBB for StackCheck: |
3686 | addRegOffset(MIB: BuildMI(BB: stackCheckMBB, MIMD: DL, MCID: TII.get(Opcode: LEAop), DestReg: ScratchReg), Reg: SPReg, |
3687 | isKill: false, Offset: -MaxStack); |
3688 | // SPLimitOffset is in a fixed heap location (pointed by BP). |
3689 | addRegOffset(MIB: BuildMI(BB: stackCheckMBB, MIMD: DL, MCID: TII.get(Opcode: CMPop)).addReg(RegNo: ScratchReg), |
3690 | Reg: PReg, isKill: false, Offset: SPLimitOffset); |
3691 | BuildMI(BB: stackCheckMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1)) |
3692 | .addMBB(MBB: &PrologueMBB) |
3693 | .addImm(Val: X86::COND_AE); |
3694 | |
3695 | // Create new MBB for IncStack: |
3696 | BuildMI(BB: incStackMBB, MIMD: DL, MCID: TII.get(Opcode: CALLop)).addExternalSymbol(FnName: "inc_stack_0" ); |
3697 | addRegOffset(MIB: BuildMI(BB: incStackMBB, MIMD: DL, MCID: TII.get(Opcode: LEAop), DestReg: ScratchReg), Reg: SPReg, |
3698 | isKill: false, Offset: -MaxStack); |
3699 | addRegOffset(MIB: BuildMI(BB: incStackMBB, MIMD: DL, MCID: TII.get(Opcode: CMPop)).addReg(RegNo: ScratchReg), |
3700 | Reg: PReg, isKill: false, Offset: SPLimitOffset); |
3701 | BuildMI(BB: incStackMBB, MIMD: DL, MCID: TII.get(Opcode: X86::JCC_1)) |
3702 | .addMBB(MBB: incStackMBB) |
3703 | .addImm(Val: X86::COND_LE); |
3704 | |
3705 | stackCheckMBB->addSuccessor(Succ: &PrologueMBB, Prob: {99, 100}); |
3706 | stackCheckMBB->addSuccessor(Succ: incStackMBB, Prob: {1, 100}); |
3707 | incStackMBB->addSuccessor(Succ: &PrologueMBB, Prob: {99, 100}); |
3708 | incStackMBB->addSuccessor(Succ: incStackMBB, Prob: {1, 100}); |
3709 | } |
3710 | #ifdef EXPENSIVE_CHECKS |
3711 | MF.verify(); |
3712 | #endif |
3713 | } |
3714 | |
3715 | bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB, |
3716 | MachineBasicBlock::iterator MBBI, |
3717 | const DebugLoc &DL, |
3718 | int Offset) const { |
3719 | if (Offset <= 0) |
3720 | return false; |
3721 | |
3722 | if (Offset % SlotSize) |
3723 | return false; |
3724 | |
3725 | int NumPops = Offset / SlotSize; |
3726 | // This is only worth it if we have at most 2 pops. |
3727 | if (NumPops != 1 && NumPops != 2) |
3728 | return false; |
3729 | |
3730 | // Handle only the trivial case where the adjustment directly follows |
3731 | // a call. This is the most common one, anyway. |
3732 | if (MBBI == MBB.begin()) |
3733 | return false; |
3734 | MachineBasicBlock::iterator Prev = std::prev(x: MBBI); |
3735 | if (!Prev->isCall() || !Prev->getOperand(i: 1).isRegMask()) |
3736 | return false; |
3737 | |
3738 | unsigned Regs[2]; |
3739 | unsigned FoundRegs = 0; |
3740 | |
3741 | const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); |
3742 | const MachineOperand &RegMask = Prev->getOperand(i: 1); |
3743 | |
3744 | auto &RegClass = |
3745 | Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass; |
3746 | // Try to find up to NumPops free registers. |
3747 | for (auto Candidate : RegClass) { |
3748 | // Poor man's liveness: |
3749 | // Since we're immediately after a call, any register that is clobbered |
3750 | // by the call and not defined by it can be considered dead. |
3751 | if (!RegMask.clobbersPhysReg(PhysReg: Candidate)) |
3752 | continue; |
3753 | |
3754 | // Don't clobber reserved registers |
3755 | if (MRI.isReserved(PhysReg: Candidate)) |
3756 | continue; |
3757 | |
3758 | bool IsDef = false; |
3759 | for (const MachineOperand &MO : Prev->implicit_operands()) { |
3760 | if (MO.isReg() && MO.isDef() && |
3761 | TRI->isSuperOrSubRegisterEq(RegA: MO.getReg(), RegB: Candidate)) { |
3762 | IsDef = true; |
3763 | break; |
3764 | } |
3765 | } |
3766 | |
3767 | if (IsDef) |
3768 | continue; |
3769 | |
3770 | Regs[FoundRegs++] = Candidate; |
3771 | if (FoundRegs == (unsigned)NumPops) |
3772 | break; |
3773 | } |
3774 | |
3775 | if (FoundRegs == 0) |
3776 | return false; |
3777 | |
3778 | // If we found only one free register, but need two, reuse the same one twice. |
3779 | while (FoundRegs < (unsigned)NumPops) |
3780 | Regs[FoundRegs++] = Regs[0]; |
3781 | |
3782 | for (int i = 0; i < NumPops; ++i) |
3783 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: STI.is64Bit() ? X86::POP64r : X86::POP32r), |
3784 | DestReg: Regs[i]); |
3785 | |
3786 | return true; |
3787 | } |
3788 | |
3789 | MachineBasicBlock::iterator X86FrameLowering::eliminateCallFramePseudoInstr( |
3790 | MachineFunction &MF, MachineBasicBlock &MBB, |
3791 | MachineBasicBlock::iterator I) const { |
3792 | bool reserveCallFrame = hasReservedCallFrame(MF); |
3793 | unsigned Opcode = I->getOpcode(); |
3794 | bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); |
3795 | DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased. |
3796 | uint64_t Amount = TII.getFrameSize(I: *I); |
3797 | uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(I: *I) : 0; |
3798 | I = MBB.erase(I); |
3799 | auto InsertPos = skipDebugInstructionsForward(It: I, End: MBB.end()); |
3800 | |
3801 | // Try to avoid emitting dead SP adjustments if the block end is unreachable, |
3802 | // typically because the function is marked noreturn (abort, throw, |
3803 | // assert_fail, etc). |
3804 | if (isDestroy && blockEndIsUnreachable(MBB, MBBI: I)) |
3805 | return I; |
3806 | |
3807 | if (!reserveCallFrame) { |
3808 | // If the stack pointer can be changed after prologue, turn the |
3809 | // adjcallstackup instruction into a 'sub ESP, <amt>' and the |
3810 | // adjcallstackdown instruction into 'add ESP, <amt>' |
3811 | |
3812 | // We need to keep the stack aligned properly. To do this, we round the |
3813 | // amount of space needed for the outgoing arguments up to the next |
3814 | // alignment boundary. |
3815 | Amount = alignTo(Size: Amount, A: getStackAlign()); |
3816 | |
3817 | const Function &F = MF.getFunction(); |
3818 | bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); |
3819 | bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves(); |
3820 | |
3821 | // If we have any exception handlers in this function, and we adjust |
3822 | // the SP before calls, we may need to indicate this to the unwinder |
3823 | // using GNU_ARGS_SIZE. Note that this may be necessary even when |
3824 | // Amount == 0, because the preceding function may have set a non-0 |
3825 | // GNU_ARGS_SIZE. |
3826 | // TODO: We don't need to reset this between subsequent functions, |
3827 | // if it didn't change. |
3828 | bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty(); |
3829 | |
3830 | if (HasDwarfEHHandlers && !isDestroy && |
3831 | MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences()) |
3832 | BuildCFI(MBB, MBBI: InsertPos, DL, |
3833 | CFIInst: MCCFIInstruction::createGnuArgsSize(L: nullptr, Size: Amount)); |
3834 | |
3835 | if (Amount == 0) |
3836 | return I; |
3837 | |
3838 | // Factor out the amount that gets handled inside the sequence |
3839 | // (Pushes of argument for frame setup, callee pops for frame destroy) |
3840 | Amount -= InternalAmt; |
3841 | |
3842 | // TODO: This is needed only if we require precise CFA. |
3843 | // If this is a callee-pop calling convention, emit a CFA adjust for |
3844 | // the amount the callee popped. |
3845 | if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF)) |
3846 | BuildCFI(MBB, MBBI: InsertPos, DL, |
3847 | CFIInst: MCCFIInstruction::createAdjustCfaOffset(L: nullptr, Adjustment: -InternalAmt)); |
3848 | |
3849 | // Add Amount to SP to destroy a frame, or subtract to setup. |
3850 | int64_t StackAdjustment = isDestroy ? Amount : -Amount; |
3851 | int64_t CfaAdjustment = StackAdjustment; |
3852 | |
3853 | if (StackAdjustment) { |
3854 | // Merge with any previous or following adjustment instruction. Note: the |
3855 | // instructions merged with here do not have CFI, so their stack |
3856 | // adjustments do not feed into CfaAdjustment |
3857 | |
3858 | auto CalcCfaAdjust = [&CfaAdjustment](MachineBasicBlock::iterator PI, |
3859 | int64_t Offset) { |
3860 | CfaAdjustment += Offset; |
3861 | }; |
3862 | auto CalcNewOffset = [&StackAdjustment](int64_t Offset) { |
3863 | return StackAdjustment + Offset; |
3864 | }; |
3865 | StackAdjustment = |
3866 | mergeSPUpdates(MBB, MBBI&: InsertPos, FoundStackAdjust: CalcCfaAdjust, CalcNewOffset, doMergeWithPrevious: true); |
3867 | StackAdjustment = |
3868 | mergeSPUpdates(MBB, MBBI&: InsertPos, FoundStackAdjust: CalcCfaAdjust, CalcNewOffset, doMergeWithPrevious: false); |
3869 | |
3870 | if (StackAdjustment) { |
3871 | if (!(F.hasMinSize() && |
3872 | adjustStackWithPops(MBB, MBBI: InsertPos, DL, Offset: StackAdjustment))) |
3873 | BuildStackAdjustment(MBB, MBBI: InsertPos, DL, Offset: StackAdjustment, |
3874 | /*InEpilogue=*/false); |
3875 | } |
3876 | } |
3877 | |
3878 | if (DwarfCFI && !hasFP(MF) && CfaAdjustment) { |
3879 | // If we don't have FP, but need to generate unwind information, |
3880 | // we need to set the correct CFA offset after the stack adjustment. |
3881 | // How much we adjust the CFA offset depends on whether we're emitting |
3882 | // CFI only for EH purposes or for debugging. EH only requires the CFA |
3883 | // offset to be correct at each call site, while for debugging we want |
3884 | // it to be more precise. |
3885 | |
3886 | // TODO: When not using precise CFA, we also need to adjust for the |
3887 | // InternalAmt here. |
3888 | BuildCFI( |
3889 | MBB, MBBI: InsertPos, DL, |
3890 | CFIInst: MCCFIInstruction::createAdjustCfaOffset(L: nullptr, Adjustment: -CfaAdjustment)); |
3891 | } |
3892 | |
3893 | return I; |
3894 | } |
3895 | |
3896 | if (InternalAmt) { |
3897 | MachineBasicBlock::iterator CI = I; |
3898 | MachineBasicBlock::iterator B = MBB.begin(); |
3899 | while (CI != B && !std::prev(x: CI)->isCall()) |
3900 | --CI; |
3901 | BuildStackAdjustment(MBB, MBBI: CI, DL, Offset: -InternalAmt, /*InEpilogue=*/false); |
3902 | } |
3903 | |
3904 | return I; |
3905 | } |
3906 | |
3907 | bool X86FrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { |
3908 | assert(MBB.getParent() && "Block is not attached to a function!" ); |
3909 | const MachineFunction &MF = *MBB.getParent(); |
3910 | if (!MBB.isLiveIn(Reg: X86::EFLAGS)) |
3911 | return true; |
3912 | |
3913 | // If stack probes have to loop inline or call, that will clobber EFLAGS. |
3914 | // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock. |
3915 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); |
3916 | const X86TargetLowering &TLI = *STI.getTargetLowering(); |
3917 | if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF)) |
3918 | return false; |
3919 | |
3920 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
3921 | return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext(); |
3922 | } |
3923 | |
3924 | bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { |
3925 | assert(MBB.getParent() && "Block is not attached to a function!" ); |
3926 | |
3927 | // Win64 has strict requirements in terms of epilogue and we are |
3928 | // not taking a chance at messing with them. |
3929 | // I.e., unless this block is already an exit block, we can't use |
3930 | // it as an epilogue. |
3931 | if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock()) |
3932 | return false; |
3933 | |
3934 | // Swift async context epilogue has a BTR instruction that clobbers parts of |
3935 | // EFLAGS. |
3936 | const MachineFunction &MF = *MBB.getParent(); |
3937 | if (MF.getInfo<X86MachineFunctionInfo>()->hasSwiftAsyncContext()) |
3938 | return !flagsNeedToBePreservedBeforeTheTerminators(MBB); |
3939 | |
3940 | if (canUseLEAForSPInEpilogue(MF: *MBB.getParent())) |
3941 | return true; |
3942 | |
3943 | // If we cannot use LEA to adjust SP, we may need to use ADD, which |
3944 | // clobbers the EFLAGS. Check that we do not need to preserve it, |
3945 | // otherwise, conservatively assume this is not |
3946 | // safe to insert the epilogue here. |
3947 | return !flagsNeedToBePreservedBeforeTheTerminators(MBB); |
3948 | } |
3949 | |
3950 | bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { |
3951 | // If we may need to emit frameless compact unwind information, give |
3952 | // up as this is currently broken: PR25614. |
3953 | bool CompactUnwind = |
3954 | MF.getContext().getObjectFileInfo()->getCompactUnwindSection() != nullptr; |
3955 | return (MF.getFunction().hasFnAttribute(Kind: Attribute::NoUnwind) || hasFP(MF) || |
3956 | !CompactUnwind) && |
3957 | // The lowering of segmented stack and HiPE only support entry |
3958 | // blocks as prologue blocks: PR26107. This limitation may be |
3959 | // lifted if we fix: |
3960 | // - adjustForSegmentedStacks |
3961 | // - adjustForHiPEPrologue |
3962 | MF.getFunction().getCallingConv() != CallingConv::HiPE && |
3963 | !MF.shouldSplitStack(); |
3964 | } |
3965 | |
3966 | MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers( |
3967 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
3968 | const DebugLoc &DL, bool RestoreSP) const { |
3969 | assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env" ); |
3970 | assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32" ); |
3971 | assert(STI.is32Bit() && !Uses64BitFramePtr && |
3972 | "restoring EBP/ESI on non-32-bit target" ); |
3973 | |
3974 | MachineFunction &MF = *MBB.getParent(); |
3975 | Register FramePtr = TRI->getFrameRegister(MF); |
3976 | Register BasePtr = TRI->getBaseRegister(); |
3977 | WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo(); |
3978 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
3979 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
3980 | |
3981 | // FIXME: Don't set FrameSetup flag in catchret case. |
3982 | |
3983 | int FI = FuncInfo.EHRegNodeFrameIndex; |
3984 | int EHRegSize = MFI.getObjectSize(ObjectIdx: FI); |
3985 | |
3986 | if (RestoreSP) { |
3987 | // MOV32rm -EHRegSize(%ebp), %esp |
3988 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV32rm), DestReg: X86::ESP), |
3989 | Reg: X86::EBP, isKill: true, Offset: -EHRegSize) |
3990 | .setMIFlag(MachineInstr::FrameSetup); |
3991 | } |
3992 | |
3993 | Register UsedReg; |
3994 | int EHRegOffset = getFrameIndexReference(MF, FI, FrameReg&: UsedReg).getFixed(); |
3995 | int EndOffset = -EHRegOffset - EHRegSize; |
3996 | FuncInfo.EHRegNodeEndOffset = EndOffset; |
3997 | |
3998 | if (UsedReg == FramePtr) { |
3999 | // ADD $offset, %ebp |
4000 | unsigned ADDri = getADDriOpcode(IsLP64: false); |
4001 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ADDri), DestReg: FramePtr) |
4002 | .addReg(RegNo: FramePtr) |
4003 | .addImm(Val: EndOffset) |
4004 | .setMIFlag(MachineInstr::FrameSetup) |
4005 | ->getOperand(i: 3) |
4006 | .setIsDead(); |
4007 | assert(EndOffset >= 0 && |
4008 | "end of registration object above normal EBP position!" ); |
4009 | } else if (UsedReg == BasePtr) { |
4010 | // LEA offset(%ebp), %esi |
4011 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::LEA32r), DestReg: BasePtr), |
4012 | Reg: FramePtr, isKill: false, Offset: EndOffset) |
4013 | .setMIFlag(MachineInstr::FrameSetup); |
4014 | // MOV32rm SavedEBPOffset(%esi), %ebp |
4015 | assert(X86FI->getHasSEHFramePtrSave()); |
4016 | int Offset = |
4017 | getFrameIndexReference(MF, FI: X86FI->getSEHFramePtrSaveIndex(), FrameReg&: UsedReg) |
4018 | .getFixed(); |
4019 | assert(UsedReg == BasePtr); |
4020 | addRegOffset(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV32rm), DestReg: FramePtr), |
4021 | Reg: UsedReg, isKill: true, Offset) |
4022 | .setMIFlag(MachineInstr::FrameSetup); |
4023 | } else { |
4024 | llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr" ); |
4025 | } |
4026 | return MBBI; |
4027 | } |
4028 | |
4029 | int X86FrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { |
4030 | return TRI->getSlotSize(); |
4031 | } |
4032 | |
4033 | Register |
4034 | X86FrameLowering::getInitialCFARegister(const MachineFunction &MF) const { |
4035 | return StackPtr; |
4036 | } |
4037 | |
4038 | TargetFrameLowering::DwarfFrameBase |
4039 | X86FrameLowering::getDwarfFrameBase(const MachineFunction &MF) const { |
4040 | const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); |
4041 | Register FrameRegister = RI->getFrameRegister(MF); |
4042 | if (getInitialCFARegister(MF) == FrameRegister && |
4043 | MF.getInfo<X86MachineFunctionInfo>()->hasCFIAdjustCfa()) { |
4044 | DwarfFrameBase FrameBase; |
4045 | FrameBase.Kind = DwarfFrameBase::CFA; |
4046 | FrameBase.Location.Offset = |
4047 | -MF.getFrameInfo().getStackSize() - getInitialCFAOffset(MF); |
4048 | return FrameBase; |
4049 | } |
4050 | |
4051 | return DwarfFrameBase{.Kind: DwarfFrameBase::Register, .Location: {.Reg: FrameRegister}}; |
4052 | } |
4053 | |
4054 | namespace { |
4055 | // Struct used by orderFrameObjects to help sort the stack objects. |
4056 | struct X86FrameSortingObject { |
4057 | bool IsValid = false; // true if we care about this Object. |
4058 | unsigned ObjectIndex = 0; // Index of Object into MFI list. |
4059 | unsigned ObjectSize = 0; // Size of Object in bytes. |
4060 | Align ObjectAlignment = Align(1); // Alignment of Object in bytes. |
4061 | unsigned ObjectNumUses = 0; // Object static number of uses. |
4062 | }; |
4063 | |
4064 | // The comparison function we use for std::sort to order our local |
4065 | // stack symbols. The current algorithm is to use an estimated |
4066 | // "density". This takes into consideration the size and number of |
4067 | // uses each object has in order to roughly minimize code size. |
4068 | // So, for example, an object of size 16B that is referenced 5 times |
4069 | // will get higher priority than 4 4B objects referenced 1 time each. |
4070 | // It's not perfect and we may be able to squeeze a few more bytes out of |
4071 | // it (for example : 0(esp) requires fewer bytes, symbols allocated at the |
4072 | // fringe end can have special consideration, given their size is less |
4073 | // important, etc.), but the algorithmic complexity grows too much to be |
4074 | // worth the extra gains we get. This gets us pretty close. |
4075 | // The final order leaves us with objects with highest priority going |
4076 | // at the end of our list. |
4077 | struct X86FrameSortingComparator { |
4078 | inline bool operator()(const X86FrameSortingObject &A, |
4079 | const X86FrameSortingObject &B) const { |
4080 | uint64_t DensityAScaled, DensityBScaled; |
4081 | |
4082 | // For consistency in our comparison, all invalid objects are placed |
4083 | // at the end. This also allows us to stop walking when we hit the |
4084 | // first invalid item after it's all sorted. |
4085 | if (!A.IsValid) |
4086 | return false; |
4087 | if (!B.IsValid) |
4088 | return true; |
4089 | |
4090 | // The density is calculated by doing : |
4091 | // (double)DensityA = A.ObjectNumUses / A.ObjectSize |
4092 | // (double)DensityB = B.ObjectNumUses / B.ObjectSize |
4093 | // Since this approach may cause inconsistencies in |
4094 | // the floating point <, >, == comparisons, depending on the floating |
4095 | // point model with which the compiler was built, we're going |
4096 | // to scale both sides by multiplying with |
4097 | // A.ObjectSize * B.ObjectSize. This ends up factoring away |
4098 | // the division and, with it, the need for any floating point |
4099 | // arithmetic. |
4100 | DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) * |
4101 | static_cast<uint64_t>(B.ObjectSize); |
4102 | DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) * |
4103 | static_cast<uint64_t>(A.ObjectSize); |
4104 | |
4105 | // If the two densities are equal, prioritize highest alignment |
4106 | // objects. This allows for similar alignment objects |
4107 | // to be packed together (given the same density). |
4108 | // There's room for improvement here, also, since we can pack |
4109 | // similar alignment (different density) objects next to each |
4110 | // other to save padding. This will also require further |
4111 | // complexity/iterations, and the overall gain isn't worth it, |
4112 | // in general. Something to keep in mind, though. |
4113 | if (DensityAScaled == DensityBScaled) |
4114 | return A.ObjectAlignment < B.ObjectAlignment; |
4115 | |
4116 | return DensityAScaled < DensityBScaled; |
4117 | } |
4118 | }; |
4119 | } // namespace |
4120 | |
4121 | // Order the symbols in the local stack. |
4122 | // We want to place the local stack objects in some sort of sensible order. |
4123 | // The heuristic we use is to try and pack them according to static number |
4124 | // of uses and size of object in order to minimize code size. |
4125 | void X86FrameLowering::orderFrameObjects( |
4126 | const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const { |
4127 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
4128 | |
4129 | // Don't waste time if there's nothing to do. |
4130 | if (ObjectsToAllocate.empty()) |
4131 | return; |
4132 | |
4133 | // Create an array of all MFI objects. We won't need all of these |
4134 | // objects, but we're going to create a full array of them to make |
4135 | // it easier to index into when we're counting "uses" down below. |
4136 | // We want to be able to easily/cheaply access an object by simply |
4137 | // indexing into it, instead of having to search for it every time. |
4138 | std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd()); |
4139 | |
4140 | // Walk the objects we care about and mark them as such in our working |
4141 | // struct. |
4142 | for (auto &Obj : ObjectsToAllocate) { |
4143 | SortingObjects[Obj].IsValid = true; |
4144 | SortingObjects[Obj].ObjectIndex = Obj; |
4145 | SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(ObjectIdx: Obj); |
4146 | // Set the size. |
4147 | int ObjectSize = MFI.getObjectSize(ObjectIdx: Obj); |
4148 | if (ObjectSize == 0) |
4149 | // Variable size. Just use 4. |
4150 | SortingObjects[Obj].ObjectSize = 4; |
4151 | else |
4152 | SortingObjects[Obj].ObjectSize = ObjectSize; |
4153 | } |
4154 | |
4155 | // Count the number of uses for each object. |
4156 | for (auto &MBB : MF) { |
4157 | for (auto &MI : MBB) { |
4158 | if (MI.isDebugInstr()) |
4159 | continue; |
4160 | for (const MachineOperand &MO : MI.operands()) { |
4161 | // Check to see if it's a local stack symbol. |
4162 | if (!MO.isFI()) |
4163 | continue; |
4164 | int Index = MO.getIndex(); |
4165 | // Check to see if it falls within our range, and is tagged |
4166 | // to require ordering. |
4167 | if (Index >= 0 && Index < MFI.getObjectIndexEnd() && |
4168 | SortingObjects[Index].IsValid) |
4169 | SortingObjects[Index].ObjectNumUses++; |
4170 | } |
4171 | } |
4172 | } |
4173 | |
4174 | // Sort the objects using X86FrameSortingAlgorithm (see its comment for |
4175 | // info). |
4176 | llvm::stable_sort(Range&: SortingObjects, C: X86FrameSortingComparator()); |
4177 | |
4178 | // Now modify the original list to represent the final order that |
4179 | // we want. The order will depend on whether we're going to access them |
4180 | // from the stack pointer or the frame pointer. For SP, the list should |
4181 | // end up with the END containing objects that we want with smaller offsets. |
4182 | // For FP, it should be flipped. |
4183 | int i = 0; |
4184 | for (auto &Obj : SortingObjects) { |
4185 | // All invalid items are sorted at the end, so it's safe to stop. |
4186 | if (!Obj.IsValid) |
4187 | break; |
4188 | ObjectsToAllocate[i++] = Obj.ObjectIndex; |
4189 | } |
4190 | |
4191 | // Flip it if we're accessing off of the FP. |
4192 | if (!TRI->hasStackRealignment(MF) && hasFP(MF)) |
4193 | std::reverse(first: ObjectsToAllocate.begin(), last: ObjectsToAllocate.end()); |
4194 | } |
4195 | |
4196 | unsigned |
4197 | X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const { |
4198 | // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue. |
4199 | unsigned Offset = 16; |
4200 | // RBP is immediately pushed. |
4201 | Offset += SlotSize; |
4202 | // All callee-saved registers are then pushed. |
4203 | Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize(); |
4204 | // Every funclet allocates enough stack space for the largest outgoing call. |
4205 | Offset += getWinEHFuncletFrameSize(MF); |
4206 | return Offset; |
4207 | } |
4208 | |
4209 | void X86FrameLowering::processFunctionBeforeFrameFinalized( |
4210 | MachineFunction &MF, RegScavenger *RS) const { |
4211 | // Mark the function as not having WinCFI. We will set it back to true in |
4212 | // emitPrologue if it gets called and emits CFI. |
4213 | MF.setHasWinCFI(false); |
4214 | |
4215 | // If we are using Windows x64 CFI, ensure that the stack is always 8 byte |
4216 | // aligned. The format doesn't support misaligned stack adjustments. |
4217 | if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) |
4218 | MF.getFrameInfo().ensureMaxAlignment(Alignment: Align(SlotSize)); |
4219 | |
4220 | // If this function isn't doing Win64-style C++ EH, we don't need to do |
4221 | // anything. |
4222 | if (STI.is64Bit() && MF.hasEHFunclets() && |
4223 | classifyEHPersonality(Pers: MF.getFunction().getPersonalityFn()) == |
4224 | EHPersonality::MSVC_CXX) { |
4225 | adjustFrameForMsvcCxxEh(MF); |
4226 | } |
4227 | } |
4228 | |
4229 | void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const { |
4230 | // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset |
4231 | // relative to RSP after the prologue. Find the offset of the last fixed |
4232 | // object, so that we can allocate a slot immediately following it. If there |
4233 | // were no fixed objects, use offset -SlotSize, which is immediately after the |
4234 | // return address. Fixed objects have negative frame indices. |
4235 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
4236 | WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo(); |
4237 | int64_t MinFixedObjOffset = -SlotSize; |
4238 | for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) |
4239 | MinFixedObjOffset = std::min(a: MinFixedObjOffset, b: MFI.getObjectOffset(ObjectIdx: I)); |
4240 | |
4241 | for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { |
4242 | for (WinEHHandlerType &H : TBME.HandlerArray) { |
4243 | int FrameIndex = H.CatchObj.FrameIndex; |
4244 | if (FrameIndex != INT_MAX) { |
4245 | // Ensure alignment. |
4246 | unsigned Align = MFI.getObjectAlign(ObjectIdx: FrameIndex).value(); |
4247 | MinFixedObjOffset -= std::abs(i: MinFixedObjOffset) % Align; |
4248 | MinFixedObjOffset -= MFI.getObjectSize(ObjectIdx: FrameIndex); |
4249 | MFI.setObjectOffset(ObjectIdx: FrameIndex, SPOffset: MinFixedObjOffset); |
4250 | } |
4251 | } |
4252 | } |
4253 | |
4254 | // Ensure alignment. |
4255 | MinFixedObjOffset -= std::abs(i: MinFixedObjOffset) % 8; |
4256 | int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize; |
4257 | int UnwindHelpFI = |
4258 | MFI.CreateFixedObject(Size: SlotSize, SPOffset: UnwindHelpOffset, /*IsImmutable=*/false); |
4259 | EHInfo.UnwindHelpFrameIdx = UnwindHelpFI; |
4260 | |
4261 | // Store -2 into UnwindHelp on function entry. We have to scan forwards past |
4262 | // other frame setup instructions. |
4263 | MachineBasicBlock &MBB = MF.front(); |
4264 | auto MBBI = MBB.begin(); |
4265 | while (MBBI != MBB.end() && MBBI->getFlag(Flag: MachineInstr::FrameSetup)) |
4266 | ++MBBI; |
4267 | |
4268 | DebugLoc DL = MBB.findDebugLoc(MBBI); |
4269 | addFrameReference(MIB: BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: X86::MOV64mi32)), |
4270 | FI: UnwindHelpFI) |
4271 | .addImm(Val: -2); |
4272 | } |
4273 | |
4274 | void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced( |
4275 | MachineFunction &MF, RegScavenger *RS) const { |
4276 | auto *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
4277 | |
4278 | if (STI.is32Bit() && MF.hasEHFunclets()) |
4279 | restoreWinEHStackPointersInParent(MF); |
4280 | // We have emitted prolog and epilog. Don't need stack pointer saving |
4281 | // instruction any more. |
4282 | if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) { |
4283 | MI->eraseFromParent(); |
4284 | X86FI->setStackPtrSaveMI(nullptr); |
4285 | } |
4286 | } |
4287 | |
4288 | void X86FrameLowering::restoreWinEHStackPointersInParent( |
4289 | MachineFunction &MF) const { |
4290 | // 32-bit functions have to restore stack pointers when control is transferred |
4291 | // back to the parent function. These blocks are identified as eh pads that |
4292 | // are not funclet entries. |
4293 | bool IsSEH = isAsynchronousEHPersonality( |
4294 | Pers: classifyEHPersonality(Pers: MF.getFunction().getPersonalityFn())); |
4295 | for (MachineBasicBlock &MBB : MF) { |
4296 | bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry(); |
4297 | if (NeedsRestore) |
4298 | restoreWin32EHStackPointers(MBB, MBBI: MBB.begin(), DL: DebugLoc(), |
4299 | /*RestoreSP=*/IsSEH); |
4300 | } |
4301 | } |
4302 | |
4303 | // Compute the alignment gap between current SP after spilling FP/BP and the |
4304 | // next properly aligned stack offset. |
4305 | static int computeFPBPAlignmentGap(MachineFunction &MF, |
4306 | const TargetRegisterClass *RC, |
4307 | unsigned NumSpilledRegs) { |
4308 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
4309 | unsigned AllocSize = TRI->getSpillSize(RC: *RC) * NumSpilledRegs; |
4310 | Align StackAlign = MF.getSubtarget().getFrameLowering()->getStackAlign(); |
4311 | unsigned AlignedSize = alignTo(Size: AllocSize, A: StackAlign); |
4312 | return AlignedSize - AllocSize; |
4313 | } |
4314 | |
4315 | void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF, |
4316 | MachineBasicBlock::iterator BeforeMI, |
4317 | Register FP, Register BP, |
4318 | int SPAdjust) const { |
4319 | assert(FP.isValid() || BP.isValid()); |
4320 | |
4321 | MachineBasicBlock *MBB = BeforeMI->getParent(); |
4322 | DebugLoc DL = BeforeMI->getDebugLoc(); |
4323 | |
4324 | // Spill FP. |
4325 | if (FP.isValid()) { |
4326 | BuildMI(BB&: *MBB, I: BeforeMI, MIMD: DL, |
4327 | MCID: TII.get(Opcode: getPUSHOpcode(ST: MF.getSubtarget<X86Subtarget>()))) |
4328 | .addReg(RegNo: FP); |
4329 | } |
4330 | |
4331 | // Spill BP. |
4332 | if (BP.isValid()) { |
4333 | BuildMI(BB&: *MBB, I: BeforeMI, MIMD: DL, |
4334 | MCID: TII.get(Opcode: getPUSHOpcode(ST: MF.getSubtarget<X86Subtarget>()))) |
4335 | .addReg(RegNo: BP); |
4336 | } |
4337 | |
4338 | // Make sure SP is aligned. |
4339 | if (SPAdjust) |
4340 | emitSPUpdate(MBB&: *MBB, MBBI&: BeforeMI, DL, NumBytes: -SPAdjust, InEpilogue: false); |
4341 | |
4342 | // Emit unwinding information. |
4343 | if (FP.isValid() && needsDwarfCFI(MF)) { |
4344 | // Emit .cfi_remember_state to remember old frame. |
4345 | unsigned CFIIndex = |
4346 | MF.addFrameInst(Inst: MCCFIInstruction::createRememberState(L: nullptr)); |
4347 | BuildMI(BB&: *MBB, I: BeforeMI, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
4348 | .addCFIIndex(CFIIndex); |
4349 | |
4350 | // Setup new CFA value with DW_CFA_def_cfa_expression: |
4351 | // DW_OP_breg7+offset, DW_OP_deref, DW_OP_consts 16, DW_OP_plus |
4352 | SmallString<64> CfaExpr; |
4353 | uint8_t buffer[16]; |
4354 | int Offset = SPAdjust; |
4355 | if (BP.isValid()) |
4356 | Offset += TRI->getSpillSize(RC: *TRI->getMinimalPhysRegClass(Reg: BP)); |
4357 | // If BeforeMI is a frame setup instruction, we need to adjust the position |
4358 | // and offset of the new cfi instruction. |
4359 | if (TII.isFrameSetup(I: *BeforeMI)) { |
4360 | Offset += alignTo(Size: TII.getFrameSize(I: *BeforeMI), A: getStackAlign()); |
4361 | BeforeMI = std::next(x: BeforeMI); |
4362 | } |
4363 | Register StackPtr = TRI->getStackRegister(); |
4364 | if (STI.isTarget64BitILP32()) |
4365 | StackPtr = Register(getX86SubSuperRegister(Reg: StackPtr, Size: 64)); |
4366 | unsigned DwarfStackPtr = TRI->getDwarfRegNum(RegNum: StackPtr, isEH: true); |
4367 | CfaExpr.push_back(Elt: (uint8_t)(dwarf::DW_OP_breg0 + DwarfStackPtr)); |
4368 | CfaExpr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: Offset, p: buffer)); |
4369 | CfaExpr.push_back(Elt: dwarf::DW_OP_deref); |
4370 | CfaExpr.push_back(Elt: dwarf::DW_OP_consts); |
4371 | CfaExpr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: SlotSize * 2, p: buffer)); |
4372 | CfaExpr.push_back(Elt: (uint8_t)dwarf::DW_OP_plus); |
4373 | |
4374 | SmallString<64> DefCfaExpr; |
4375 | DefCfaExpr.push_back(Elt: dwarf::DW_CFA_def_cfa_expression); |
4376 | DefCfaExpr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: CfaExpr.size(), p: buffer)); |
4377 | DefCfaExpr.append(RHS: CfaExpr.str()); |
4378 | BuildCFI(MBB&: *MBB, MBBI: BeforeMI, DL, |
4379 | CFIInst: MCCFIInstruction::createEscape(L: nullptr, Vals: DefCfaExpr.str()), |
4380 | Flag: MachineInstr::FrameSetup); |
4381 | } |
4382 | } |
4383 | |
4384 | void X86FrameLowering::restoreFPBPUsingSP(MachineFunction &MF, |
4385 | MachineBasicBlock::iterator AfterMI, |
4386 | Register FP, Register BP, |
4387 | int SPAdjust) const { |
4388 | assert(FP.isValid() || BP.isValid()); |
4389 | |
4390 | // Adjust SP so it points to spilled FP or BP. |
4391 | MachineBasicBlock *MBB = AfterMI->getParent(); |
4392 | MachineBasicBlock::iterator Pos = std::next(x: AfterMI); |
4393 | DebugLoc DL = AfterMI->getDebugLoc(); |
4394 | if (SPAdjust) |
4395 | emitSPUpdate(MBB&: *MBB, MBBI&: Pos, DL, NumBytes: SPAdjust, InEpilogue: false); |
4396 | |
4397 | // Restore BP. |
4398 | if (BP.isValid()) { |
4399 | BuildMI(BB&: *MBB, I: Pos, MIMD: DL, |
4400 | MCID: TII.get(Opcode: getPOPOpcode(ST: MF.getSubtarget<X86Subtarget>())), DestReg: BP); |
4401 | } |
4402 | |
4403 | // Restore FP. |
4404 | if (FP.isValid()) { |
4405 | BuildMI(BB&: *MBB, I: Pos, MIMD: DL, |
4406 | MCID: TII.get(Opcode: getPOPOpcode(ST: MF.getSubtarget<X86Subtarget>())), DestReg: FP); |
4407 | |
4408 | // Emit unwinding information. |
4409 | if (needsDwarfCFI(MF)) { |
4410 | // Restore original frame with .cfi_restore_state. |
4411 | unsigned CFIIndex = |
4412 | MF.addFrameInst(Inst: MCCFIInstruction::createRestoreState(L: nullptr)); |
4413 | BuildMI(BB&: *MBB, I: Pos, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
4414 | .addCFIIndex(CFIIndex); |
4415 | } |
4416 | } |
4417 | } |
4418 | |
4419 | void X86FrameLowering::saveAndRestoreFPBPUsingSP( |
4420 | MachineFunction &MF, MachineBasicBlock::iterator BeforeMI, |
4421 | MachineBasicBlock::iterator AfterMI, bool SpillFP, bool SpillBP) const { |
4422 | assert(SpillFP || SpillBP); |
4423 | |
4424 | Register FP, BP; |
4425 | const TargetRegisterClass *RC; |
4426 | unsigned NumRegs = 0; |
4427 | |
4428 | if (SpillFP) { |
4429 | FP = TRI->getFrameRegister(MF); |
4430 | if (STI.isTarget64BitILP32()) |
4431 | FP = Register(getX86SubSuperRegister(Reg: FP, Size: 64)); |
4432 | RC = TRI->getMinimalPhysRegClass(Reg: FP); |
4433 | ++NumRegs; |
4434 | } |
4435 | if (SpillBP) { |
4436 | BP = TRI->getBaseRegister(); |
4437 | if (STI.isTarget64BitILP32()) |
4438 | BP = Register(getX86SubSuperRegister(Reg: BP, Size: 64)); |
4439 | RC = TRI->getMinimalPhysRegClass(Reg: BP); |
4440 | ++NumRegs; |
4441 | } |
4442 | int SPAdjust = computeFPBPAlignmentGap(MF, RC, NumSpilledRegs: NumRegs); |
4443 | |
4444 | spillFPBPUsingSP(MF, BeforeMI, FP, BP, SPAdjust); |
4445 | restoreFPBPUsingSP(MF, AfterMI, FP, BP, SPAdjust); |
4446 | } |
4447 | |
4448 | bool X86FrameLowering::skipSpillFPBP( |
4449 | MachineFunction &MF, MachineBasicBlock::reverse_iterator &MI) const { |
4450 | if (MI->getOpcode() == X86::LCMPXCHG16B_SAVE_RBX) { |
4451 | // The pseudo instruction LCMPXCHG16B_SAVE_RBX is generated in the form |
4452 | // SaveRbx = COPY RBX |
4453 | // SaveRbx = LCMPXCHG16B_SAVE_RBX ..., SaveRbx, implicit-def rbx |
4454 | // And later LCMPXCHG16B_SAVE_RBX is expanded to restore RBX from SaveRbx. |
4455 | // We should skip this instruction sequence. |
4456 | int FI; |
4457 | Register Reg; |
4458 | while (!(MI->getOpcode() == TargetOpcode::COPY && |
4459 | MI->getOperand(i: 1).getReg() == X86::RBX) && |
4460 | !((Reg = TII.isStoreToStackSlot(MI: *MI, FrameIndex&: FI)) && Reg == X86::RBX)) |
4461 | ++MI; |
4462 | return true; |
4463 | } |
4464 | return false; |
4465 | } |
4466 | |
4467 | static bool isFPBPAccess(const MachineInstr &MI, Register FP, Register BP, |
4468 | const TargetRegisterInfo *TRI, bool &AccessFP, |
4469 | bool &AccessBP) { |
4470 | AccessFP = AccessBP = false; |
4471 | if (FP) { |
4472 | if (MI.findRegisterUseOperandIdx(Reg: FP, TRI, isKill: false) != -1 || |
4473 | MI.findRegisterDefOperandIdx(Reg: FP, TRI, isDead: false, Overlap: true) != -1) |
4474 | AccessFP = true; |
4475 | } |
4476 | if (BP) { |
4477 | if (MI.findRegisterUseOperandIdx(Reg: BP, TRI, isKill: false) != -1 || |
4478 | MI.findRegisterDefOperandIdx(Reg: BP, TRI, isDead: false, Overlap: true) != -1) |
4479 | AccessBP = true; |
4480 | } |
4481 | return AccessFP || AccessBP; |
4482 | } |
4483 | |
4484 | // Invoke instruction has been lowered to normal function call. We try to figure |
4485 | // out if MI comes from Invoke. |
4486 | // Do we have any better method? |
4487 | static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels) { |
4488 | if (!MI.isCall()) |
4489 | return false; |
4490 | if (InsideEHLabels) |
4491 | return true; |
4492 | |
4493 | const MachineBasicBlock *MBB = MI.getParent(); |
4494 | if (!MBB->hasEHPadSuccessor()) |
4495 | return false; |
4496 | |
4497 | // Check if there is another call instruction from MI to the end of MBB. |
4498 | MachineBasicBlock::const_iterator MBBI = MI, ME = MBB->end(); |
4499 | for (++MBBI; MBBI != ME; ++MBBI) |
4500 | if (MBBI->isCall()) |
4501 | return false; |
4502 | return true; |
4503 | } |
4504 | |
4505 | /// Given the live range of FP or BP (DefMI, KillMI), check if there is any |
4506 | /// interfered stack access in the range, usually generated by register spill. |
4507 | void X86FrameLowering::checkInterferedAccess( |
4508 | MachineFunction &MF, MachineBasicBlock::reverse_iterator DefMI, |
4509 | MachineBasicBlock::reverse_iterator KillMI, bool SpillFP, |
4510 | bool SpillBP) const { |
4511 | if (DefMI == KillMI) |
4512 | return; |
4513 | if (TRI->hasBasePointer(MF)) { |
4514 | if (!SpillBP) |
4515 | return; |
4516 | } else { |
4517 | if (!SpillFP) |
4518 | return; |
4519 | } |
4520 | |
4521 | auto MI = KillMI; |
4522 | while (MI != DefMI) { |
4523 | if (any_of(Range: MI->operands(), |
4524 | P: [](const MachineOperand &MO) { return MO.isFI(); })) |
4525 | MF.getContext().reportError(L: SMLoc(), |
4526 | Msg: "Interference usage of base pointer/frame " |
4527 | "pointer." ); |
4528 | MI++; |
4529 | } |
4530 | } |
4531 | |
4532 | /// If a function uses base pointer and the base pointer is clobbered by inline |
4533 | /// asm, RA doesn't detect this case, and after the inline asm, the base pointer |
4534 | /// contains garbage value. |
4535 | /// For example if a 32b x86 function uses base pointer esi, and esi is |
4536 | /// clobbered by following inline asm |
4537 | /// asm("rep movsb" : "+D"(ptr), "+S"(x), "+c"(c)::"memory"); |
4538 | /// We need to save esi before the asm and restore it after the asm. |
4539 | /// |
4540 | /// The problem can also occur to frame pointer if there is a function call, and |
4541 | /// the callee uses a different calling convention and clobbers the fp. |
4542 | /// |
4543 | /// Because normal frame objects (spill slots) are accessed through fp/bp |
4544 | /// register, so we can't spill fp/bp to normal spill slots. |
4545 | /// |
4546 | /// FIXME: There are 2 possible enhancements: |
4547 | /// 1. In many cases there are different physical registers not clobbered by |
4548 | /// inline asm, we can use one of them as base pointer. Or use a virtual |
4549 | /// register as base pointer and let RA allocate a physical register to it. |
4550 | /// 2. If there is no other instructions access stack with fp/bp from the |
4551 | /// inline asm to the epilog, and no cfi requirement for a correct fp, we can |
4552 | /// skip the save and restore operations. |
4553 | void X86FrameLowering::spillFPBP(MachineFunction &MF) const { |
4554 | Register FP, BP; |
4555 | const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); |
4556 | if (TFI.hasFP(MF)) |
4557 | FP = TRI->getFrameRegister(MF); |
4558 | if (TRI->hasBasePointer(MF)) |
4559 | BP = TRI->getBaseRegister(); |
4560 | |
4561 | // Currently only inline asm and function call can clobbers fp/bp. So we can |
4562 | // do some quick test and return early. |
4563 | if (!MF.hasInlineAsm()) { |
4564 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
4565 | if (!X86FI->getFPClobberedByCall()) |
4566 | FP = 0; |
4567 | if (!X86FI->getBPClobberedByCall()) |
4568 | BP = 0; |
4569 | } |
4570 | if (!FP && !BP) |
4571 | return; |
4572 | |
4573 | for (MachineBasicBlock &MBB : MF) { |
4574 | bool InsideEHLabels = false; |
4575 | auto MI = MBB.rbegin(), ME = MBB.rend(); |
4576 | auto TermMI = MBB.getFirstTerminator(); |
4577 | if (TermMI == MBB.begin()) |
4578 | continue; |
4579 | MI = *(std::prev(x: TermMI)); |
4580 | |
4581 | while (MI != ME) { |
4582 | // Skip frame setup/destroy instructions. |
4583 | // Skip Invoke (call inside try block) instructions. |
4584 | // Skip instructions handled by target. |
4585 | if (MI->getFlag(Flag: MachineInstr::MIFlag::FrameSetup) || |
4586 | MI->getFlag(Flag: MachineInstr::MIFlag::FrameDestroy) || |
4587 | isInvoke(MI: *MI, InsideEHLabels) || skipSpillFPBP(MF, MI)) { |
4588 | ++MI; |
4589 | continue; |
4590 | } |
4591 | |
4592 | if (MI->getOpcode() == TargetOpcode::EH_LABEL) { |
4593 | InsideEHLabels = !InsideEHLabels; |
4594 | ++MI; |
4595 | continue; |
4596 | } |
4597 | |
4598 | bool AccessFP, AccessBP; |
4599 | // Check if fp or bp is used in MI. |
4600 | if (!isFPBPAccess(MI: *MI, FP, BP, TRI, AccessFP, AccessBP)) { |
4601 | ++MI; |
4602 | continue; |
4603 | } |
4604 | |
4605 | // Look for the range [DefMI, KillMI] in which fp or bp is defined and |
4606 | // used. |
4607 | bool FPLive = false, BPLive = false; |
4608 | bool SpillFP = false, SpillBP = false; |
4609 | auto DefMI = MI, KillMI = MI; |
4610 | do { |
4611 | SpillFP |= AccessFP; |
4612 | SpillBP |= AccessBP; |
4613 | |
4614 | // Maintain FPLive and BPLive. |
4615 | if (FPLive && MI->findRegisterDefOperandIdx(Reg: FP, TRI, isDead: false, Overlap: true) != -1) |
4616 | FPLive = false; |
4617 | if (FP && MI->findRegisterUseOperandIdx(Reg: FP, TRI, isKill: false) != -1) |
4618 | FPLive = true; |
4619 | if (BPLive && MI->findRegisterDefOperandIdx(Reg: BP, TRI, isDead: false, Overlap: true) != -1) |
4620 | BPLive = false; |
4621 | if (BP && MI->findRegisterUseOperandIdx(Reg: BP, TRI, isKill: false) != -1) |
4622 | BPLive = true; |
4623 | |
4624 | DefMI = MI++; |
4625 | } while ((MI != ME) && |
4626 | (FPLive || BPLive || |
4627 | isFPBPAccess(MI: *MI, FP, BP, TRI, AccessFP, AccessBP))); |
4628 | |
4629 | // Don't need to save/restore if FP is accessed through llvm.frameaddress. |
4630 | if (FPLive && !SpillBP) |
4631 | continue; |
4632 | |
4633 | // If the bp is clobbered by a call, we should save and restore outside of |
4634 | // the frame setup instructions. |
4635 | if (KillMI->isCall() && DefMI != ME) { |
4636 | auto FrameSetup = std::next(x: DefMI); |
4637 | // Look for frame setup instruction toward the start of the BB. |
4638 | // If we reach another call instruction, it means no frame setup |
4639 | // instruction for the current call instruction. |
4640 | while (FrameSetup != ME && !TII.isFrameSetup(I: *FrameSetup) && |
4641 | !FrameSetup->isCall()) |
4642 | ++FrameSetup; |
4643 | // If a frame setup instruction is found, we need to find out the |
4644 | // corresponding frame destroy instruction. |
4645 | if (FrameSetup != ME && TII.isFrameSetup(I: *FrameSetup) && |
4646 | (TII.getFrameSize(I: *FrameSetup) || |
4647 | TII.getFrameAdjustment(I: *FrameSetup))) { |
4648 | while (!TII.isFrameInstr(I: *KillMI)) |
4649 | --KillMI; |
4650 | DefMI = FrameSetup; |
4651 | MI = DefMI; |
4652 | ++MI; |
4653 | } |
4654 | } |
4655 | |
4656 | checkInterferedAccess(MF, DefMI, KillMI, SpillFP, SpillBP); |
4657 | |
4658 | // Call target function to spill and restore FP and BP registers. |
4659 | saveAndRestoreFPBPUsingSP(MF, BeforeMI: &(*DefMI), AfterMI: &(*KillMI), SpillFP, SpillBP); |
4660 | } |
4661 | } |
4662 | } |
4663 | |