1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AArch64PrologueEpilogue.h"
10#include "AArch64FrameLowering.h"
11#include "AArch64MachineFunctionInfo.h"
12#include "AArch64Subtarget.h"
13#include "MCTargetDesc/AArch64AddressingModes.h"
14#include "llvm/ADT/Statistic.h"
15#include "llvm/BinaryFormat/Dwarf.h"
16#include "llvm/CodeGen/CFIInstBuilder.h"
17#include "llvm/MC/MCContext.h"
18
19#define DEBUG_TYPE "frame-info"
20
21STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
22
23namespace llvm {
24
25static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO,
26 RTLIB::Libcall LC) {
27 return MO.isSymbol() &&
28 StringRef(TLI.getLibcallName(Call: LC)) == MO.getSymbolName();
29}
30
31bool AArch64PrologueEpilogueCommon::requiresGetVGCall() const {
32 return AFI->hasStreamingModeChanges() &&
33 !MF.getSubtarget<AArch64Subtarget>().hasSVE();
34}
35
36bool AArch64PrologueEpilogueCommon::isVGInstruction(
37 MachineBasicBlock::iterator MBBI, const TargetLowering &TLI) const {
38 unsigned Opc = MBBI->getOpcode();
39 if (Opc == AArch64::CNTD_XPiI)
40 return true;
41
42 if (!requiresGetVGCall())
43 return false;
44
45 if (Opc == AArch64::BL)
46 return matchLibcall(TLI, MO: MBBI->getOperand(i: 0), LC: RTLIB::SMEABI_GET_CURRENT_VG);
47
48 return Opc == TargetOpcode::COPY;
49}
50
51// Convenience function to determine whether I is part of the ZPR callee saves.
52static bool isPartOfZPRCalleeSaves(MachineBasicBlock::iterator I) {
53 switch (I->getOpcode()) {
54 default:
55 return false;
56 case AArch64::LD1B_2Z_IMM:
57 case AArch64::ST1B_2Z_IMM:
58 case AArch64::STR_ZXI:
59 case AArch64::LDR_ZXI:
60 case AArch64::PTRUE_C_B:
61 return I->getFlag(Flag: MachineInstr::FrameSetup) ||
62 I->getFlag(Flag: MachineInstr::FrameDestroy);
63 case AArch64::SEH_SaveZReg:
64 return true;
65 }
66}
67
68// Convenience function to determine whether I is part of the PPR callee saves.
69static bool isPartOfPPRCalleeSaves(MachineBasicBlock::iterator I) {
70 switch (I->getOpcode()) {
71 default:
72 return false;
73 case AArch64::STR_PXI:
74 case AArch64::LDR_PXI:
75 return I->getFlag(Flag: MachineInstr::FrameSetup) ||
76 I->getFlag(Flag: MachineInstr::FrameDestroy);
77 case AArch64::SEH_SavePReg:
78 return true;
79 }
80}
81
82// Convenience function to determine whether I is part of the SVE callee saves.
83static bool isPartOfSVECalleeSaves(MachineBasicBlock::iterator I) {
84 return isPartOfZPRCalleeSaves(I) || isPartOfPPRCalleeSaves(I);
85}
86
87AArch64PrologueEpilogueCommon::AArch64PrologueEpilogueCommon(
88 MachineFunction &MF, MachineBasicBlock &MBB,
89 const AArch64FrameLowering &AFL)
90 : MF(MF), MBB(MBB), MFI(MF.getFrameInfo()),
91 Subtarget(MF.getSubtarget<AArch64Subtarget>()), AFL(AFL),
92 RegInfo(*Subtarget.getRegisterInfo()) {
93 TII = Subtarget.getInstrInfo();
94 AFI = MF.getInfo<AArch64FunctionInfo>();
95
96 HasFP = AFL.hasFP(MF);
97 NeedsWinCFI = AFL.needsWinCFI(MF);
98
99 if (AFL.hasSVECalleeSavesAboveFrameRecord(MF)) {
100 if (AFI->hasStackHazardSlotIndex())
101 reportFatalUsageError(reason: "SME hazard padding is not supported on Windows");
102 SVELayout = SVEStackLayout::CalleeSavesAboveFrameRecord;
103 } else if (AFI->hasSplitSVEObjects()) {
104 SVELayout = SVEStackLayout::Split;
105 }
106}
107
108MachineBasicBlock::iterator
109AArch64PrologueEpilogueCommon::convertCalleeSaveRestoreToSPPrePostIncDec(
110 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc,
111 bool EmitCFI, MachineInstr::MIFlag FrameFlag, int CFAOffset) const {
112 unsigned NewOpc;
113
114 // If the function contains streaming mode changes, we expect instructions
115 // to calculate the value of VG before spilling. Move past these instructions
116 // if necessary.
117 if (AFL.requiresSaveVG(MF)) {
118 auto &TLI = *Subtarget.getTargetLowering();
119 while (isVGInstruction(MBBI, TLI))
120 ++MBBI;
121 }
122
123 switch (MBBI->getOpcode()) {
124 default:
125 llvm_unreachable("Unexpected callee-save save/restore opcode!");
126 case AArch64::STPXi:
127 NewOpc = AArch64::STPXpre;
128 break;
129 case AArch64::STPDi:
130 NewOpc = AArch64::STPDpre;
131 break;
132 case AArch64::STPQi:
133 NewOpc = AArch64::STPQpre;
134 break;
135 case AArch64::STRXui:
136 NewOpc = AArch64::STRXpre;
137 break;
138 case AArch64::STRDui:
139 NewOpc = AArch64::STRDpre;
140 break;
141 case AArch64::STRQui:
142 NewOpc = AArch64::STRQpre;
143 break;
144 case AArch64::LDPXi:
145 NewOpc = AArch64::LDPXpost;
146 break;
147 case AArch64::LDPDi:
148 NewOpc = AArch64::LDPDpost;
149 break;
150 case AArch64::LDPQi:
151 NewOpc = AArch64::LDPQpost;
152 break;
153 case AArch64::LDRXui:
154 NewOpc = AArch64::LDRXpost;
155 break;
156 case AArch64::LDRDui:
157 NewOpc = AArch64::LDRDpost;
158 break;
159 case AArch64::LDRQui:
160 NewOpc = AArch64::LDRQpost;
161 break;
162 }
163 TypeSize Scale = TypeSize::getFixed(ExactSize: 1), Width = TypeSize::getFixed(ExactSize: 0);
164 int64_t MinOffset, MaxOffset;
165 bool Success = TII->getMemOpInfo(Opcode: NewOpc, Scale, Width, MinOffset, MaxOffset);
166 (void)Success;
167 assert(Success && "unknown load/store opcode");
168
169 // If the first store isn't right where we want SP then we can't fold the
170 // update in so create a normal arithmetic instruction instead.
171 //
172 // On Windows, some register pairs involving LR can't be folded because
173 // there isn't a corresponding unwind opcode.
174 if (MBBI->getOperand(i: MBBI->getNumOperands() - 1).getImm() != 0 ||
175 CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() ||
176 CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue() ||
177 (NeedsWinCFI &&
178 (NewOpc == AArch64::LDPXpost || NewOpc == AArch64::STPXpre) &&
179 RegInfo.getEncodingValue(Reg: MBBI->getOperand(i: 0).getReg()) + 1 !=
180 RegInfo.getEncodingValue(Reg: MBBI->getOperand(i: 1).getReg()))) {
181 // If we are destroying the frame, make sure we add the increment after the
182 // last frame operation.
183 if (FrameFlag == MachineInstr::FrameDestroy) {
184 ++MBBI;
185 // Also skip the SEH instruction, if needed
186 if (NeedsWinCFI && AArch64InstrInfo::isSEHInstruction(MI: *MBBI))
187 ++MBBI;
188 }
189 emitFrameOffset(MBB, MBBI, DL, DestReg: AArch64::SP, SrcReg: AArch64::SP,
190 Offset: StackOffset::getFixed(Fixed: CSStackSizeInc), TII, FrameFlag,
191 SetNZCV: false, NeedsWinCFI, HasWinCFI: &HasWinCFI, EmitCFAOffset: EmitCFI,
192 InitialOffset: StackOffset::getFixed(Fixed: CFAOffset));
193
194 return std::prev(x: MBBI);
195 }
196
197 // Get rid of the SEH code associated with the old instruction.
198 if (NeedsWinCFI) {
199 auto SEH = std::next(x: MBBI);
200 if (AArch64InstrInfo::isSEHInstruction(MI: *SEH))
201 SEH->eraseFromParent();
202 }
203
204 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: NewOpc));
205 MIB.addReg(RegNo: AArch64::SP, Flags: RegState::Define);
206
207 // Copy all operands other than the immediate offset.
208 unsigned OpndIdx = 0;
209 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
210 ++OpndIdx)
211 MIB.add(MO: MBBI->getOperand(i: OpndIdx));
212
213 assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
214 "Unexpected immediate offset in first/last callee-save save/restore "
215 "instruction!");
216 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
217 "Unexpected base register in callee-save save/restore instruction!");
218 assert(CSStackSizeInc % Scale == 0);
219 MIB.addImm(Val: CSStackSizeInc / (int)Scale);
220
221 MIB.setMIFlags(MBBI->getFlags());
222 MIB.setMemRefs(MBBI->memoperands());
223
224 // Generate a new SEH code that corresponds to the new instruction.
225 if (NeedsWinCFI) {
226 HasWinCFI = true;
227 AFL.insertSEH(MBBI: *MIB, TII: *TII, Flag: FrameFlag);
228 }
229
230 if (EmitCFI)
231 CFIInstBuilder(MBB, MBBI, FrameFlag)
232 .buildDefCFAOffset(Offset: CFAOffset - CSStackSizeInc);
233
234 return std::prev(x: MBB.erase(I: MBBI));
235}
236
237// Fix up the SEH opcode associated with the save/restore instruction.
238static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
239 unsigned LocalStackSize) {
240 MachineOperand *ImmOpnd = nullptr;
241 unsigned ImmIdx = MBBI->getNumOperands() - 1;
242 switch (MBBI->getOpcode()) {
243 default:
244 llvm_unreachable("Fix the offset in the SEH instruction");
245 case AArch64::SEH_SaveFPLR:
246 case AArch64::SEH_SaveRegP:
247 case AArch64::SEH_SaveReg:
248 case AArch64::SEH_SaveFRegP:
249 case AArch64::SEH_SaveFReg:
250 case AArch64::SEH_SaveAnyRegI:
251 case AArch64::SEH_SaveAnyRegIP:
252 case AArch64::SEH_SaveAnyRegQP:
253 case AArch64::SEH_SaveAnyRegQPX:
254 ImmOpnd = &MBBI->getOperand(i: ImmIdx);
255 break;
256 }
257 if (ImmOpnd)
258 ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
259}
260
261void AArch64PrologueEpilogueCommon::fixupCalleeSaveRestoreStackOffset(
262 MachineInstr &MI, uint64_t LocalStackSize) const {
263 if (AArch64InstrInfo::isSEHInstruction(MI))
264 return;
265
266 unsigned Opc = MI.getOpcode();
267 unsigned Scale;
268 switch (Opc) {
269 case AArch64::STPXi:
270 case AArch64::STRXui:
271 case AArch64::STPDi:
272 case AArch64::STRDui:
273 case AArch64::LDPXi:
274 case AArch64::LDRXui:
275 case AArch64::LDPDi:
276 case AArch64::LDRDui:
277 Scale = 8;
278 break;
279 case AArch64::STPQi:
280 case AArch64::STRQui:
281 case AArch64::LDPQi:
282 case AArch64::LDRQui:
283 Scale = 16;
284 break;
285 default:
286 llvm_unreachable("Unexpected callee-save save/restore opcode!");
287 }
288
289 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
290 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
291 "Unexpected base register in callee-save save/restore instruction!");
292 // Last operand is immediate offset that needs fixing.
293 MachineOperand &OffsetOpnd = MI.getOperand(i: OffsetIdx);
294 // All generated opcodes have scaled offsets.
295 assert(LocalStackSize % Scale == 0);
296 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
297
298 if (NeedsWinCFI) {
299 HasWinCFI = true;
300 auto MBBI = std::next(x: MachineBasicBlock::iterator(MI));
301 assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
302 assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
303 "Expecting a SEH instruction");
304 fixupSEHOpcode(MBBI, LocalStackSize);
305 }
306}
307
308bool AArch64PrologueEpilogueCommon::shouldCombineCSRLocalStackBump(
309 uint64_t StackBumpBytes) const {
310 if (AFL.homogeneousPrologEpilog(MF))
311 return false;
312
313 if (AFI->getLocalStackSize() == 0)
314 return false;
315
316 // For WinCFI, if optimizing for size, prefer to not combine the stack bump
317 // (to force a stp with predecrement) to match the packed unwind format,
318 // provided that there actually are any callee saved registers to merge the
319 // decrement with.
320 //
321 // Note that for certain paired saves, like "x19, lr", we can't actually
322 // emit an predecrement stp, but packed unwind still expects a separate stack
323 // adjustment.
324 //
325 // This is potentially marginally slower, but allows using the packed
326 // unwind format for functions that both have a local area and callee saved
327 // registers. Using the packed unwind format notably reduces the size of
328 // the unwind info.
329 if (AFL.needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
330 MF.getFunction().hasOptSize())
331 return false;
332
333 // 512 is the maximum immediate for stp/ldp that will be used for
334 // callee-save save/restores
335 if (StackBumpBytes >= 512 ||
336 AFL.windowsRequiresStackProbe(MF, StackSizeInBytes: StackBumpBytes))
337 return false;
338
339 if (MFI.hasVarSizedObjects())
340 return false;
341
342 if (RegInfo.hasStackRealignment(MF))
343 return false;
344
345 // This isn't strictly necessary, but it simplifies things a bit since the
346 // current RedZone handling code assumes the SP is adjusted by the
347 // callee-save save/restore code.
348 if (AFL.canUseRedZone(MF))
349 return false;
350
351 // When there is an SVE area on the stack, always allocate the
352 // callee-saves and spills/locals separately.
353 if (AFI->hasSVEStackSize())
354 return false;
355
356 return true;
357}
358
359SVEFrameSizes AArch64PrologueEpilogueCommon::getSVEStackFrameSizes() const {
360 StackOffset PPRCalleeSavesSize =
361 StackOffset::getScalable(Scalable: AFI->getPPRCalleeSavedStackSize());
362 StackOffset ZPRCalleeSavesSize =
363 StackOffset::getScalable(Scalable: AFI->getZPRCalleeSavedStackSize());
364 StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize;
365 StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize;
366 if (SVELayout == SVEStackLayout::Split)
367 return {.PPR: {.CalleeSavesSize: PPRCalleeSavesSize, .LocalsSize: PPRLocalsSize},
368 .ZPR: {.CalleeSavesSize: ZPRCalleeSavesSize, .LocalsSize: ZPRLocalsSize}};
369 // For simplicity, attribute all locals to ZPRs when split SVE is disabled.
370 return {.PPR: {.CalleeSavesSize: PPRCalleeSavesSize, .LocalsSize: StackOffset{}},
371 .ZPR: {.CalleeSavesSize: ZPRCalleeSavesSize, .LocalsSize: PPRLocalsSize + ZPRLocalsSize}};
372}
373
374SVEStackAllocations AArch64PrologueEpilogueCommon::getSVEStackAllocations(
375 SVEFrameSizes const &SVE) {
376 StackOffset AfterZPRs = SVE.ZPR.LocalsSize;
377 StackOffset BeforePPRs = SVE.ZPR.CalleeSavesSize + SVE.PPR.CalleeSavesSize;
378 StackOffset AfterPPRs = {};
379 if (SVELayout == SVEStackLayout::Split) {
380 BeforePPRs = SVE.PPR.CalleeSavesSize;
381 // If there are no ZPR CSRs, place all local allocations after the ZPRs.
382 if (SVE.ZPR.CalleeSavesSize)
383 AfterPPRs += SVE.PPR.LocalsSize + SVE.ZPR.CalleeSavesSize;
384 else
385 AfterZPRs += SVE.PPR.LocalsSize; // Group allocation of locals.
386 }
387 return {.BeforePPRs: BeforePPRs, .AfterPPRs: AfterPPRs, .AfterZPRs: AfterZPRs};
388}
389
390struct SVEPartitions {
391 struct {
392 MachineBasicBlock::iterator Begin, End;
393 } PPR, ZPR;
394};
395
396static SVEPartitions partitionSVECS(MachineBasicBlock &MBB,
397 MachineBasicBlock::iterator MBBI,
398 StackOffset PPRCalleeSavesSize,
399 StackOffset ZPRCalleeSavesSize,
400 bool IsEpilogue) {
401 MachineBasicBlock::iterator PPRsI = MBBI;
402 MachineBasicBlock::iterator End =
403 IsEpilogue ? MBB.begin() : MBB.getFirstTerminator();
404 auto AdjustI = [&](auto MBBI) { return IsEpilogue ? std::prev(MBBI) : MBBI; };
405 // Process the SVE CS to find the starts/ends of the ZPR and PPR areas.
406 if (PPRCalleeSavesSize) {
407 PPRsI = AdjustI(PPRsI);
408 assert(isPartOfPPRCalleeSaves(*PPRsI) && "Unexpected instruction");
409 while (PPRsI != End && isPartOfPPRCalleeSaves(I: AdjustI(PPRsI)))
410 IsEpilogue ? (--PPRsI) : (++PPRsI);
411 }
412 MachineBasicBlock::iterator ZPRsI = PPRsI;
413 if (ZPRCalleeSavesSize) {
414 ZPRsI = AdjustI(ZPRsI);
415 assert(isPartOfZPRCalleeSaves(*ZPRsI) && "Unexpected instruction");
416 while (ZPRsI != End && isPartOfZPRCalleeSaves(I: AdjustI(ZPRsI)))
417 IsEpilogue ? (--ZPRsI) : (++ZPRsI);
418 }
419 if (IsEpilogue)
420 return {.PPR: {.Begin: PPRsI, .End: MBBI}, .ZPR: {.Begin: ZPRsI, .End: PPRsI}};
421 return {.PPR: {.Begin: MBBI, .End: PPRsI}, .ZPR: {.Begin: PPRsI, .End: ZPRsI}};
422}
423
424AArch64PrologueEmitter::AArch64PrologueEmitter(MachineFunction &MF,
425 MachineBasicBlock &MBB,
426 const AArch64FrameLowering &AFL)
427 : AArch64PrologueEpilogueCommon(MF, MBB, AFL), F(MF.getFunction()) {
428 EmitCFI = AFI->needsDwarfUnwindInfo(MF);
429 EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
430 IsFunclet = MBB.isEHFuncletEntry();
431 HomPrologEpilog = AFL.homogeneousPrologEpilog(MF);
432
433#ifndef NDEBUG
434 collectBlockLiveins();
435#endif
436}
437
438#ifndef NDEBUG
439/// Collect live registers from the end of \p MI's parent up to (including) \p
440/// MI in \p LiveRegs.
441static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI,
442 LivePhysRegs &LiveRegs) {
443
444 MachineBasicBlock &MBB = *MI.getParent();
445 LiveRegs.addLiveOuts(MBB);
446 for (const MachineInstr &MI :
447 reverse(make_range(MI.getIterator(), MBB.instr_end())))
448 LiveRegs.stepBackward(MI);
449}
450
451void AArch64PrologueEmitter::collectBlockLiveins() {
452 // Collect live register from the end of MBB up to the start of the existing
453 // frame setup instructions.
454 PrologueEndI = MBB.begin();
455 while (PrologueEndI != MBB.end() &&
456 PrologueEndI->getFlag(MachineInstr::FrameSetup))
457 ++PrologueEndI;
458
459 if (PrologueEndI != MBB.end()) {
460 getLivePhysRegsUpTo(*PrologueEndI, RegInfo, LiveRegs);
461 // Ignore registers used for stack management for now.
462 LiveRegs.removeReg(AArch64::SP);
463 LiveRegs.removeReg(AArch64::X19);
464 LiveRegs.removeReg(AArch64::FP);
465 LiveRegs.removeReg(AArch64::LR);
466
467 // X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
468 // This is necessary to spill VG if required where SVE is unavailable, but
469 // X0 is preserved around this call.
470 if (requiresGetVGCall())
471 LiveRegs.removeReg(AArch64::X0);
472 }
473}
474
475void AArch64PrologueEmitter::verifyPrologueClobbers() const {
476 if (PrologueEndI == MBB.end())
477 return;
478 // Check if any of the newly instructions clobber any of the live registers.
479 for (MachineInstr &MI :
480 make_range(MBB.instr_begin(), PrologueEndI->getIterator())) {
481 for (auto &Op : MI.operands())
482 if (Op.isReg() && Op.isDef())
483 assert(!LiveRegs.contains(Op.getReg()) &&
484 "live register clobbered by inserted prologue instructions");
485 }
486}
487#endif
488
489void AArch64PrologueEmitter::determineLocalsStackSize(
490 uint64_t StackSize, uint64_t PrologueSaveSize) {
491 AFI->setLocalStackSize(StackSize - PrologueSaveSize);
492 CombineSPBump = shouldCombineCSRLocalStackBump(StackBumpBytes: StackSize);
493}
494
495// Return the maximum possible number of bytes for `Size` due to the
496// architectural limit on the size of a SVE register.
497static int64_t upperBound(StackOffset Size) {
498 static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
499 return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();
500}
501
502void AArch64PrologueEmitter::allocateStackSpace(
503 MachineBasicBlock::iterator MBBI, int64_t RealignmentPadding,
504 StackOffset AllocSize, bool EmitCFI, StackOffset InitialOffset,
505 bool FollowupAllocs) {
506
507 if (!AllocSize)
508 return;
509
510 DebugLoc DL;
511 const int64_t MaxAlign = MFI.getMaxAlign().value();
512 const uint64_t AndMask = ~(MaxAlign - 1);
513
514 if (!Subtarget.getTargetLowering()->hasInlineStackProbe(MF)) {
515 Register TargetReg = RealignmentPadding
516 ? AFL.findScratchNonCalleeSaveRegister(MBB: &MBB)
517 : AArch64::SP;
518 // SUB Xd/SP, SP, AllocSize
519 emitFrameOffset(MBB, MBBI, DL, DestReg: TargetReg, SrcReg: AArch64::SP, Offset: -AllocSize, TII,
520 MachineInstr::FrameSetup, SetNZCV: false, NeedsWinCFI, HasWinCFI: &HasWinCFI,
521 EmitCFAOffset: EmitCFI, InitialOffset);
522
523 if (RealignmentPadding) {
524 // AND SP, X9, 0b11111...0000
525 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ANDXri), DestReg: AArch64::SP)
526 .addReg(RegNo: TargetReg, Flags: RegState::Kill)
527 .addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: AndMask, regSize: 64))
528 .setMIFlags(MachineInstr::FrameSetup);
529 AFI->setStackRealigned(true);
530
531 // No need for SEH instructions here; if we're realigning the stack,
532 // we've set a frame pointer and already finished the SEH prologue.
533 assert(!NeedsWinCFI);
534 }
535 return;
536 }
537
538 //
539 // Stack probing allocation.
540 //
541
542 // Fixed length allocation. If we don't need to re-align the stack and don't
543 // have SVE objects, we can use a more efficient sequence for stack probing.
544 if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {
545 Register ScratchReg = AFL.findScratchNonCalleeSaveRegister(MBB: &MBB);
546 assert(ScratchReg != AArch64::NoRegister);
547 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::PROBED_STACKALLOC))
548 .addDef(RegNo: ScratchReg)
549 .addImm(Val: AllocSize.getFixed())
550 .addImm(Val: InitialOffset.getFixed())
551 .addImm(Val: InitialOffset.getScalable());
552 // The fixed allocation may leave unprobed bytes at the top of the
553 // stack. If we have subsequent allocation (e.g. if we have variable-sized
554 // objects), we need to issue an extra probe, so these allocations start in
555 // a known state.
556 if (FollowupAllocs) {
557 // LDR XZR, [SP]
558 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::LDRXui))
559 .addDef(RegNo: AArch64::XZR)
560 .addReg(RegNo: AArch64::SP)
561 .addImm(Val: 0)
562 .addMemOperand(MMO: MF.getMachineMemOperand(
563 PtrInfo: MachinePointerInfo::getUnknownStack(MF),
564 F: MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, Size: 8,
565 BaseAlignment: Align(8)))
566 .setMIFlags(MachineInstr::FrameSetup);
567 }
568
569 return;
570 }
571
572 // Variable length allocation.
573
574 // If the (unknown) allocation size cannot exceed the probe size, decrement
575 // the stack pointer right away.
576 int64_t ProbeSize = AFI->getStackProbeSize();
577 if (upperBound(Size: AllocSize) + RealignmentPadding <= ProbeSize) {
578 Register ScratchReg = RealignmentPadding
579 ? AFL.findScratchNonCalleeSaveRegister(MBB: &MBB)
580 : AArch64::SP;
581 assert(ScratchReg != AArch64::NoRegister);
582 // SUB Xd, SP, AllocSize
583 emitFrameOffset(MBB, MBBI, DL, DestReg: ScratchReg, SrcReg: AArch64::SP, Offset: -AllocSize, TII,
584 MachineInstr::FrameSetup, SetNZCV: false, NeedsWinCFI, HasWinCFI: &HasWinCFI,
585 EmitCFAOffset: EmitCFI, InitialOffset);
586 if (RealignmentPadding) {
587 // AND SP, Xn, 0b11111...0000
588 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ANDXri), DestReg: AArch64::SP)
589 .addReg(RegNo: ScratchReg, Flags: RegState::Kill)
590 .addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: AndMask, regSize: 64))
591 .setMIFlags(MachineInstr::FrameSetup);
592 AFI->setStackRealigned(true);
593 }
594 if (FollowupAllocs || upperBound(Size: AllocSize) + RealignmentPadding >
595 AArch64::StackProbeMaxUnprobedStack) {
596 // LDR XZR, [SP]
597 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::LDRXui))
598 .addDef(RegNo: AArch64::XZR)
599 .addReg(RegNo: AArch64::SP)
600 .addImm(Val: 0)
601 .addMemOperand(MMO: MF.getMachineMemOperand(
602 PtrInfo: MachinePointerInfo::getUnknownStack(MF),
603 F: MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, Size: 8,
604 BaseAlignment: Align(8)))
605 .setMIFlags(MachineInstr::FrameSetup);
606 }
607 return;
608 }
609
610 // Emit a variable-length allocation probing loop.
611 // TODO: As an optimisation, the loop can be "unrolled" into a few parts,
612 // each of them guaranteed to adjust the stack by less than the probe size.
613 Register TargetReg = AFL.findScratchNonCalleeSaveRegister(MBB: &MBB);
614 assert(TargetReg != AArch64::NoRegister);
615 // SUB Xd, SP, AllocSize
616 emitFrameOffset(MBB, MBBI, DL, DestReg: TargetReg, SrcReg: AArch64::SP, Offset: -AllocSize, TII,
617 MachineInstr::FrameSetup, SetNZCV: false, NeedsWinCFI, HasWinCFI: &HasWinCFI,
618 EmitCFAOffset: EmitCFI, InitialOffset);
619 if (RealignmentPadding) {
620 // AND Xn, Xn, 0b11111...0000
621 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ANDXri), DestReg: TargetReg)
622 .addReg(RegNo: TargetReg, Flags: RegState::Kill)
623 .addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: AndMask, regSize: 64))
624 .setMIFlags(MachineInstr::FrameSetup);
625 }
626
627 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::PROBED_STACKALLOC_VAR))
628 .addReg(RegNo: TargetReg);
629 if (EmitCFI) {
630 // Set the CFA register back to SP.
631 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
632 .buildDefCFARegister(Reg: AArch64::SP);
633 }
634 if (RealignmentPadding)
635 AFI->setStackRealigned(true);
636}
637
638void AArch64PrologueEmitter::emitPrologue() {
639 const MachineBasicBlock::iterator PrologueBeginI = MBB.begin();
640 const MachineBasicBlock::iterator EndI = MBB.end();
641
642 // At this point, we're going to decide whether or not the function uses a
643 // redzone. In most cases, the function doesn't have a redzone so let's
644 // assume that's false and set it to true in the case that there's a redzone.
645 AFI->setHasRedZone(false);
646
647 // Debug location must be unknown since the first debug location is used
648 // to determine the end of the prologue.
649 DebugLoc DL;
650
651 // In some cases, particularly with CallingConv::SwiftTail, it is possible to
652 // have a tail-call where the caller only needs to adjust the stack pointer in
653 // the epilogue. In this case, we still need to emit a SEH prologue sequence.
654 // See `seh-minimal-prologue-epilogue.ll` test cases.
655 if (AFI->getArgumentStackToRestore())
656 HasWinCFI |= NeedsWinCFI;
657
658 if (AFI->shouldSignReturnAddress(MF)) {
659 // If pac-ret+leaf is in effect, PAUTH_PROLOGUE pseudo instructions
660 // are inserted by emitPacRetPlusLeafHardening().
661 if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
662 BuildMI(BB&: MBB, I: PrologueBeginI, MIMD: DL, MCID: TII->get(Opcode: AArch64::PAUTH_PROLOGUE))
663 .setMIFlag(MachineInstr::FrameSetup);
664 }
665 // AArch64PointerAuth pass will insert SEH_PACSignLR
666 HasWinCFI |= NeedsWinCFI;
667 }
668
669 if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
670 emitShadowCallStackPrologue(MBBI: PrologueBeginI, DL);
671 HasWinCFI |= NeedsWinCFI;
672 }
673
674 if (EmitCFI && AFI->isMTETagged())
675 BuildMI(BB&: MBB, I: PrologueBeginI, MIMD: DL, MCID: TII->get(Opcode: AArch64::EMITMTETAGGED))
676 .setMIFlag(MachineInstr::FrameSetup);
677
678 // We signal the presence of a Swift extended frame to external tools by
679 // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
680 // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
681 // bits so that is still true.
682 if (HasFP && AFI->hasSwiftAsyncContext())
683 emitSwiftAsyncContextFramePointer(MBBI: PrologueBeginI, DL);
684
685 // All calls are tail calls in GHC calling conv, and functions have no
686 // prologue/epilogue.
687 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
688 return;
689
690 // Set tagged base pointer to the requested stack slot. Ideally it should
691 // match SP value after prologue.
692 if (std::optional<int> TBPI = AFI->getTaggedBasePointerIndex())
693 AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(ObjectIdx: *TBPI));
694 else
695 AFI->setTaggedBasePointerOffset(MFI.getStackSize());
696
697 // getStackSize() includes all the locals in its size calculation. We don't
698 // include these locals when computing the stack size of a funclet, as they
699 // are allocated in the parent's stack frame and accessed via the frame
700 // pointer from the funclet. We only save the callee saved registers in the
701 // funclet, which are really the callee saved registers of the parent
702 // function, including the funclet.
703 int64_t NumBytes =
704 IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
705 if (!AFI->hasStackFrame() && !AFL.windowsRequiresStackProbe(MF, StackSizeInBytes: NumBytes))
706 return emitEmptyStackFramePrologue(NumBytes, MBBI: PrologueBeginI, DL);
707
708 bool IsWin64 = Subtarget.isCallingConvWin64(CC: F.getCallingConv(), IsVarArg: F.isVarArg());
709 unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
710
711 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
712 // All of the remaining stack allocations are for locals.
713 determineLocalsStackSize(StackSize: NumBytes, PrologueSaveSize);
714
715 auto [PPR, ZPR] = getSVEStackFrameSizes();
716 SVEStackAllocations SVEAllocs = getSVEStackAllocations(SVE: {.PPR: PPR, .ZPR: ZPR});
717
718 MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;
719 if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
720 assert(!SVEAllocs.AfterPPRs &&
721 "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
722 // If we're doing SVE saves first, we need to immediately allocate space
723 // for fixed objects, then space for the SVE callee saves.
724 //
725 // Windows unwind requires that the scalable size is a multiple of 16;
726 // that's handled when the callee-saved size is computed.
727 auto SaveSize = SVEAllocs.BeforePPRs + StackOffset::getFixed(Fixed: FixedObject);
728 allocateStackSpace(MBBI: PrologueBeginI, RealignmentPadding: 0, AllocSize: SaveSize, EmitCFI: false, InitialOffset: StackOffset{},
729 /*FollowupAllocs=*/true);
730 NumBytes -= FixedObject;
731
732 // Now allocate space for the GPR callee saves.
733 MachineBasicBlock::iterator MBBI = PrologueBeginI;
734 while (MBBI != EndI && isPartOfSVECalleeSaves(I: MBBI))
735 ++MBBI;
736 FirstGPRSaveI = convertCalleeSaveRestoreToSPPrePostIncDec(
737 MBBI, DL, CSStackSizeInc: -AFI->getCalleeSavedStackSize(), EmitCFI: EmitAsyncCFI);
738 NumBytes -= AFI->getCalleeSavedStackSize();
739 } else if (CombineSPBump) {
740 assert(!AFL.getSVEStackSize(MF) && "Cannot combine SP bump with SVE");
741 emitFrameOffset(MBB, MBBI: PrologueBeginI, DL, DestReg: AArch64::SP, SrcReg: AArch64::SP,
742 Offset: StackOffset::getFixed(Fixed: -NumBytes), TII,
743 MachineInstr::FrameSetup, SetNZCV: false, NeedsWinCFI, HasWinCFI: &HasWinCFI,
744 EmitCFAOffset: EmitAsyncCFI);
745 NumBytes = 0;
746 } else if (HomPrologEpilog) {
747 // Stack has been already adjusted.
748 NumBytes -= PrologueSaveSize;
749 } else if (PrologueSaveSize != 0) {
750 FirstGPRSaveI = convertCalleeSaveRestoreToSPPrePostIncDec(
751 MBBI: PrologueBeginI, DL, CSStackSizeInc: -PrologueSaveSize, EmitCFI: EmitAsyncCFI);
752 NumBytes -= PrologueSaveSize;
753 }
754 assert(NumBytes >= 0 && "Negative stack allocation size!?");
755
756 // Move past the saves of the callee-saved registers, fixing up the offsets
757 // and pre-inc if we decided to combine the callee-save and local stack
758 // pointer bump above.
759 auto &TLI = *Subtarget.getTargetLowering();
760
761 MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
762 while (AfterGPRSavesI != EndI &&
763 AfterGPRSavesI->getFlag(Flag: MachineInstr::FrameSetup) &&
764 !isPartOfSVECalleeSaves(I: AfterGPRSavesI)) {
765 if (CombineSPBump &&
766 // Only fix-up frame-setup load/store instructions.
767 (!AFL.requiresSaveVG(MF) || !isVGInstruction(MBBI: AfterGPRSavesI, TLI)))
768 fixupCalleeSaveRestoreStackOffset(MI&: *AfterGPRSavesI,
769 LocalStackSize: AFI->getLocalStackSize());
770 ++AfterGPRSavesI;
771 }
772
773 // For funclets the FP belongs to the containing function. Only set up FP if
774 // we actually need to.
775 if (!IsFunclet && HasFP)
776 emitFramePointerSetup(MBBI: AfterGPRSavesI, DL, FixedObject);
777
778 // Now emit the moves for whatever callee saved regs we have (including FP,
779 // LR if those are saved). Frame instructions for SVE register are emitted
780 // later, after the instruction which actually save SVE regs.
781 if (EmitAsyncCFI)
782 emitCalleeSavedGPRLocations(MBBI: AfterGPRSavesI);
783
784 // Alignment is required for the parent frame, not the funclet
785 const bool NeedsRealignment =
786 NumBytes && !IsFunclet && RegInfo.hasStackRealignment(MF);
787 const int64_t RealignmentPadding =
788 (NeedsRealignment && MFI.getMaxAlign() > Align(16))
789 ? MFI.getMaxAlign().value() - 16
790 : 0;
791
792 if (AFL.windowsRequiresStackProbe(MF, StackSizeInBytes: NumBytes + RealignmentPadding))
793 emitWindowsStackProbe(MBBI: AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
794
795 StackOffset NonSVELocalsSize = StackOffset::getFixed(Fixed: NumBytes);
796 SVEAllocs.AfterZPRs += NonSVELocalsSize;
797
798 StackOffset CFAOffset =
799 StackOffset::getFixed(Fixed: MFI.getStackSize()) - NonSVELocalsSize;
800 MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
801 // Allocate space for the callee saves and PPR locals (if any).
802 if (SVELayout != SVEStackLayout::CalleeSavesAboveFrameRecord) {
803 auto [PPRRange, ZPRRange] =
804 partitionSVECS(MBB, MBBI: AfterGPRSavesI, PPRCalleeSavesSize: PPR.CalleeSavesSize,
805 ZPRCalleeSavesSize: ZPR.CalleeSavesSize, /*IsEpilogue=*/false);
806 AfterSVESavesI = ZPRRange.End;
807 if (EmitAsyncCFI)
808 emitCalleeSavedSVELocations(MBBI: AfterSVESavesI);
809
810 allocateStackSpace(MBBI: PPRRange.Begin, RealignmentPadding: 0, AllocSize: SVEAllocs.BeforePPRs,
811 EmitCFI: EmitAsyncCFI && !HasFP, InitialOffset: CFAOffset,
812 FollowupAllocs: MFI.hasVarSizedObjects() || SVEAllocs.AfterPPRs ||
813 SVEAllocs.AfterZPRs);
814 CFAOffset += SVEAllocs.BeforePPRs;
815 assert(PPRRange.End == ZPRRange.Begin &&
816 "Expected ZPR callee saves after PPR locals");
817 allocateStackSpace(MBBI: PPRRange.End, RealignmentPadding: 0, AllocSize: SVEAllocs.AfterPPRs,
818 EmitCFI: EmitAsyncCFI && !HasFP, InitialOffset: CFAOffset,
819 FollowupAllocs: MFI.hasVarSizedObjects() || SVEAllocs.AfterZPRs);
820 CFAOffset += SVEAllocs.AfterPPRs;
821 } else {
822 assert(SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord);
823 // Note: With CalleeSavesAboveFrameRecord, the SVE CS (BeforePPRs) have
824 // already been allocated. PPR locals (included in AfterPPRs) are not
825 // supported (note: this is asserted above).
826 CFAOffset += SVEAllocs.BeforePPRs;
827 }
828
829 // Allocate space for the rest of the frame including ZPR locals. Align the
830 // stack as necessary.
831 assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
832 "Cannot use redzone with stack realignment");
833 if (!AFL.canUseRedZone(MF)) {
834 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have the
835 // correct value here, as NumBytes also includes padding bytes, which
836 // shouldn't be counted here.
837 allocateStackSpace(MBBI: AfterSVESavesI, RealignmentPadding, AllocSize: SVEAllocs.AfterZPRs,
838 EmitCFI: EmitAsyncCFI && !HasFP, InitialOffset: CFAOffset,
839 FollowupAllocs: MFI.hasVarSizedObjects());
840 }
841
842 // If we need a base pointer, set it up here. It's whatever the value of the
843 // stack pointer is at this point. Any variable size objects will be
844 // allocated after this, so we can still use the base pointer to reference
845 // locals.
846 //
847 // FIXME: Clarify FrameSetup flags here.
848 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
849 // needed.
850 // For funclets the BP belongs to the containing function.
851 if (!IsFunclet && RegInfo.hasBasePointer(MF)) {
852 TII->copyPhysReg(MBB, I: AfterSVESavesI, DL, DestReg: RegInfo.getBaseRegister(),
853 SrcReg: AArch64::SP, KillSrc: false);
854 if (NeedsWinCFI) {
855 HasWinCFI = true;
856 BuildMI(BB&: MBB, I: AfterSVESavesI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_Nop))
857 .setMIFlag(MachineInstr::FrameSetup);
858 }
859 }
860
861 // The very last FrameSetup instruction indicates the end of prologue. Emit a
862 // SEH opcode indicating the prologue end.
863 if (NeedsWinCFI && HasWinCFI) {
864 BuildMI(BB&: MBB, I: AfterSVESavesI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_PrologEnd))
865 .setMIFlag(MachineInstr::FrameSetup);
866 }
867
868 // SEH funclets are passed the frame pointer in X1. If the parent
869 // function uses the base register, then the base register is used
870 // directly, and is not retrieved from X1.
871 if (IsFunclet && F.hasPersonalityFn()) {
872 EHPersonality Per = classifyEHPersonality(Pers: F.getPersonalityFn());
873 if (isAsynchronousEHPersonality(Pers: Per)) {
874 BuildMI(BB&: MBB, I: AfterSVESavesI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY),
875 DestReg: AArch64::FP)
876 .addReg(RegNo: AArch64::X1)
877 .setMIFlag(MachineInstr::FrameSetup);
878 MBB.addLiveIn(PhysReg: AArch64::X1);
879 }
880 }
881
882 if (EmitCFI && !EmitAsyncCFI) {
883 if (HasFP) {
884 emitDefineCFAWithFP(MBBI: AfterSVESavesI, FixedObject);
885 } else {
886 StackOffset TotalSize =
887 AFL.getSVEStackSize(MF) +
888 StackOffset::getFixed(Fixed: (int64_t)MFI.getStackSize());
889 CFIInstBuilder CFIBuilder(MBB, AfterSVESavesI, MachineInstr::FrameSetup);
890 CFIBuilder.insertCFIInst(
891 CFIInst: createDefCFA(TRI: RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
892 Offset: TotalSize, /*LastAdjustmentWasScalable=*/false));
893 }
894 emitCalleeSavedGPRLocations(MBBI: AfterSVESavesI);
895 emitCalleeSavedSVELocations(MBBI: AfterSVESavesI);
896 }
897}
898
899void AArch64PrologueEmitter::emitShadowCallStackPrologue(
900 MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const {
901 // Shadow call stack prolog: str x30, [x18], #8
902 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::STRXpost))
903 .addReg(RegNo: AArch64::X18, Flags: RegState::Define)
904 .addReg(RegNo: AArch64::LR)
905 .addReg(RegNo: AArch64::X18)
906 .addImm(Val: 8)
907 .setMIFlag(MachineInstr::FrameSetup);
908
909 // This instruction also makes x18 live-in to the entry block.
910 MBB.addLiveIn(PhysReg: AArch64::X18);
911
912 if (NeedsWinCFI)
913 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_Nop))
914 .setMIFlag(MachineInstr::FrameSetup);
915
916 if (EmitCFI) {
917 // Emit a CFI instruction that causes 8 to be subtracted from the value of
918 // x18 when unwinding past this frame.
919 static const char CFIInst[] = {
920 dwarf::DW_CFA_val_expression,
921 18, // register
922 2, // length
923 static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
924 static_cast<char>(-8) & 0x7f, // addend (sleb128)
925 };
926 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
927 .buildEscape(Bytes: StringRef(CFIInst, sizeof(CFIInst)));
928 }
929}
930
931void AArch64PrologueEmitter::emitSwiftAsyncContextFramePointer(
932 MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const {
933 switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
934 case SwiftAsyncFramePointerMode::DeploymentBased:
935 if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
936 // The special symbol below is absolute and has a *value* that can be
937 // combined with the frame pointer to signal an extended frame.
938 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::LOADgot), DestReg: AArch64::X16)
939 .addExternalSymbol(FnName: "swift_async_extendedFramePointerFlags",
940 TargetFlags: AArch64II::MO_GOT);
941 if (NeedsWinCFI) {
942 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_Nop))
943 .setMIFlags(MachineInstr::FrameSetup);
944 HasWinCFI = true;
945 }
946 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ORRXrs), DestReg: AArch64::FP)
947 .addUse(RegNo: AArch64::FP)
948 .addUse(RegNo: AArch64::X16)
949 .addImm(Val: Subtarget.isTargetILP32() ? 32 : 0);
950 if (NeedsWinCFI) {
951 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_Nop))
952 .setMIFlags(MachineInstr::FrameSetup);
953 HasWinCFI = true;
954 }
955 break;
956 }
957 [[fallthrough]];
958
959 case SwiftAsyncFramePointerMode::Always:
960 // ORR x29, x29, #0x1000_0000_0000_0000
961 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ORRXri), DestReg: AArch64::FP)
962 .addUse(RegNo: AArch64::FP)
963 .addImm(Val: 0x1100)
964 .setMIFlag(MachineInstr::FrameSetup);
965 if (NeedsWinCFI) {
966 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_Nop))
967 .setMIFlags(MachineInstr::FrameSetup);
968 HasWinCFI = true;
969 }
970 break;
971
972 case SwiftAsyncFramePointerMode::Never:
973 break;
974 }
975}
976
977void AArch64PrologueEmitter::emitEmptyStackFramePrologue(
978 int64_t NumBytes, MachineBasicBlock::iterator MBBI,
979 const DebugLoc &DL) const {
980 assert(!HasFP && "unexpected function without stack frame but with FP");
981 assert(!AFL.getSVEStackSize(MF) &&
982 "unexpected function without stack frame but with SVE objects");
983 // All of the stack allocation is for locals.
984 AFI->setLocalStackSize(NumBytes);
985 if (!NumBytes) {
986 if (NeedsWinCFI && HasWinCFI) {
987 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_PrologEnd))
988 .setMIFlag(MachineInstr::FrameSetup);
989 }
990 return;
991 }
992 // REDZONE: If the stack size is less than 128 bytes, we don't need
993 // to actually allocate.
994 if (AFL.canUseRedZone(MF)) {
995 AFI->setHasRedZone(true);
996 ++NumRedZoneFunctions;
997 } else {
998 emitFrameOffset(MBB, MBBI, DL, DestReg: AArch64::SP, SrcReg: AArch64::SP,
999 Offset: StackOffset::getFixed(Fixed: -NumBytes), TII,
1000 MachineInstr::FrameSetup, SetNZCV: false, NeedsWinCFI, HasWinCFI: &HasWinCFI);
1001 if (EmitCFI) {
1002 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
1003 MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
1004 // Encode the stack size of the leaf function.
1005 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
1006 .buildDefCFAOffset(Offset: NumBytes, Label: FrameLabel);
1007 }
1008 }
1009
1010 if (NeedsWinCFI) {
1011 HasWinCFI = true;
1012 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_PrologEnd))
1013 .setMIFlag(MachineInstr::FrameSetup);
1014 }
1015}
1016
1017void AArch64PrologueEmitter::emitFramePointerSetup(
1018 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
1019 unsigned FixedObject) {
1020 int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
1021 if (CombineSPBump)
1022 FPOffset += AFI->getLocalStackSize();
1023
1024 if (AFI->hasSwiftAsyncContext()) {
1025 // Before we update the live FP we have to ensure there's a valid (or
1026 // null) asynchronous context in its slot just before FP in the frame
1027 // record, so store it now.
1028 const auto &Attrs = MF.getFunction().getAttributes();
1029 bool HaveInitialContext = Attrs.hasAttrSomewhere(Kind: Attribute::SwiftAsync);
1030 if (HaveInitialContext)
1031 MBB.addLiveIn(PhysReg: AArch64::X22);
1032 Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
1033 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::StoreSwiftAsyncContext))
1034 .addUse(RegNo: Reg)
1035 .addUse(RegNo: AArch64::SP)
1036 .addImm(Val: FPOffset - 8)
1037 .setMIFlags(MachineInstr::FrameSetup);
1038 if (NeedsWinCFI) {
1039 // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
1040 // to multiple instructions, should be mutually-exclusive.
1041 assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
1042 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_Nop))
1043 .setMIFlags(MachineInstr::FrameSetup);
1044 HasWinCFI = true;
1045 }
1046 }
1047
1048 if (HomPrologEpilog) {
1049 auto Prolog = MBBI;
1050 --Prolog;
1051 assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
1052 Prolog->addOperand(Op: MachineOperand::CreateImm(Val: FPOffset));
1053 } else {
1054 // Issue sub fp, sp, FPOffset or
1055 // mov fp,sp when FPOffset is zero.
1056 // Note: All stores of callee-saved registers are marked as "FrameSetup".
1057 // This code marks the instruction(s) that set the FP also.
1058 emitFrameOffset(MBB, MBBI, DL, DestReg: AArch64::FP, SrcReg: AArch64::SP,
1059 Offset: StackOffset::getFixed(Fixed: FPOffset), TII,
1060 MachineInstr::FrameSetup, SetNZCV: false, NeedsWinCFI, HasWinCFI: &HasWinCFI);
1061 if (NeedsWinCFI && HasWinCFI) {
1062 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_PrologEnd))
1063 .setMIFlag(MachineInstr::FrameSetup);
1064 // After setting up the FP, the rest of the prolog doesn't need to be
1065 // included in the SEH unwind info.
1066 NeedsWinCFI = false;
1067 }
1068 }
1069 if (EmitAsyncCFI)
1070 emitDefineCFAWithFP(MBBI, FixedObject);
1071}
1072
1073// Define the current CFA rule to use the provided FP.
1074void AArch64PrologueEmitter::emitDefineCFAWithFP(
1075 MachineBasicBlock::iterator MBBI, unsigned FixedObject) const {
1076 const int OffsetToFirstCalleeSaveFromFP =
1077 AFI->getCalleeSaveBaseToFrameRecordOffset() -
1078 AFI->getCalleeSavedStackSize();
1079 Register FramePtr = RegInfo.getFrameRegister(MF);
1080 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
1081 .buildDefCFA(Reg: FramePtr, Offset: FixedObject - OffsetToFirstCalleeSaveFromFP);
1082}
1083
1084void AArch64PrologueEmitter::emitWindowsStackProbe(
1085 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t &NumBytes,
1086 int64_t RealignmentPadding) const {
1087 if (AFI->getSVECalleeSavedStackSize())
1088 report_fatal_error(reason: "SVE callee saves not yet supported with stack probing");
1089
1090 // Find an available register to spill the value of X15 to, if X15 is being
1091 // used already for nest.
1092 unsigned X15Scratch = AArch64::NoRegister;
1093 if (llvm::any_of(Range: MBB.liveins(),
1094 P: [this](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
1095 return RegInfo.isSuperOrSubRegisterEq(RegA: AArch64::X15,
1096 RegB: LiveIn.PhysReg);
1097 })) {
1098 X15Scratch = AFL.findScratchNonCalleeSaveRegister(MBB: &MBB, /*HasCall=*/true);
1099 assert(X15Scratch != AArch64::NoRegister &&
1100 (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17));
1101#ifndef NDEBUG
1102 LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it
1103#endif
1104 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ORRXrr), DestReg: X15Scratch)
1105 .addReg(RegNo: AArch64::XZR)
1106 .addReg(RegNo: AArch64::X15, Flags: RegState::Undef)
1107 .addReg(RegNo: AArch64::X15, Flags: RegState::Implicit)
1108 .setMIFlag(MachineInstr::FrameSetup);
1109 }
1110
1111 uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
1112 if (NeedsWinCFI) {
1113 HasWinCFI = true;
1114 // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
1115 // exceed this amount. We need to move at most 2^24 - 1 into x15.
1116 // This is at most two instructions, MOVZ followed by MOVK.
1117 // TODO: Fix to use multiple stack alloc unwind codes for stacks
1118 // exceeding 256MB in size.
1119 if (NumBytes >= (1 << 28))
1120 report_fatal_error(reason: "Stack size cannot exceed 256MB for stack "
1121 "unwinding purposes");
1122
1123 uint32_t LowNumWords = NumWords & 0xFFFF;
1124 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::MOVZXi), DestReg: AArch64::X15)
1125 .addImm(Val: LowNumWords)
1126 .addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0))
1127 .setMIFlag(MachineInstr::FrameSetup);
1128 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_Nop))
1129 .setMIFlag(MachineInstr::FrameSetup);
1130 if ((NumWords & 0xFFFF0000) != 0) {
1131 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::MOVKXi), DestReg: AArch64::X15)
1132 .addReg(RegNo: AArch64::X15)
1133 .addImm(Val: (NumWords & 0xFFFF0000) >> 16) // High half
1134 .addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 16))
1135 .setMIFlag(MachineInstr::FrameSetup);
1136 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_Nop))
1137 .setMIFlag(MachineInstr::FrameSetup);
1138 }
1139 } else {
1140 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::MOVi64imm), DestReg: AArch64::X15)
1141 .addImm(Val: NumWords)
1142 .setMIFlags(MachineInstr::FrameSetup);
1143 }
1144
1145 const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
1146 RTLIB::LibcallImpl ChkStkLibcall = TLI->getLibcallImpl(Call: RTLIB::STACK_PROBE);
1147 if (ChkStkLibcall == RTLIB::Unsupported)
1148 reportFatalUsageError(reason: "no available implementation of __chkstk");
1149
1150 const char *ChkStk = TLI->getLibcallImplName(Call: ChkStkLibcall).data();
1151 switch (MF.getTarget().getCodeModel()) {
1152 case CodeModel::Tiny:
1153 case CodeModel::Small:
1154 case CodeModel::Medium:
1155 case CodeModel::Kernel:
1156 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::BL))
1157 .addExternalSymbol(FnName: ChkStk)
1158 .addReg(RegNo: AArch64::X15, Flags: RegState::Implicit)
1159 .addReg(RegNo: AArch64::X16,
1160 Flags: RegState::Implicit | RegState::Define | RegState::Dead)
1161 .addReg(RegNo: AArch64::X17,
1162 Flags: RegState::Implicit | RegState::Define | RegState::Dead)
1163 .addReg(RegNo: AArch64::NZCV,
1164 Flags: RegState::Implicit | RegState::Define | RegState::Dead)
1165 .setMIFlags(MachineInstr::FrameSetup);
1166 if (NeedsWinCFI) {
1167 HasWinCFI = true;
1168 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_Nop))
1169 .setMIFlag(MachineInstr::FrameSetup);
1170 }
1171 break;
1172 case CodeModel::Large:
1173 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::MOVaddrEXT))
1174 .addReg(RegNo: AArch64::X16, Flags: RegState::Define)
1175 .addExternalSymbol(FnName: ChkStk)
1176 .addExternalSymbol(FnName: ChkStk)
1177 .setMIFlags(MachineInstr::FrameSetup);
1178 if (NeedsWinCFI) {
1179 HasWinCFI = true;
1180 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_Nop))
1181 .setMIFlag(MachineInstr::FrameSetup);
1182 }
1183
1184 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: getBLRCallOpcode(MF)))
1185 .addReg(RegNo: AArch64::X16, Flags: RegState::Kill)
1186 .addReg(RegNo: AArch64::X15, Flags: RegState::Implicit | RegState::Define)
1187 .addReg(RegNo: AArch64::X16,
1188 Flags: RegState::Implicit | RegState::Define | RegState::Dead)
1189 .addReg(RegNo: AArch64::X17,
1190 Flags: RegState::Implicit | RegState::Define | RegState::Dead)
1191 .addReg(RegNo: AArch64::NZCV,
1192 Flags: RegState::Implicit | RegState::Define | RegState::Dead)
1193 .setMIFlags(MachineInstr::FrameSetup);
1194 if (NeedsWinCFI) {
1195 HasWinCFI = true;
1196 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_Nop))
1197 .setMIFlag(MachineInstr::FrameSetup);
1198 }
1199 break;
1200 }
1201
1202 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SUBXrx64), DestReg: AArch64::SP)
1203 .addReg(RegNo: AArch64::SP, Flags: RegState::Kill)
1204 .addReg(RegNo: AArch64::X15, Flags: RegState::Kill)
1205 .addImm(Val: AArch64_AM::getArithExtendImm(ET: AArch64_AM::UXTX, Imm: 4))
1206 .setMIFlags(MachineInstr::FrameSetup);
1207 if (NeedsWinCFI) {
1208 HasWinCFI = true;
1209 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_StackAlloc))
1210 .addImm(Val: NumBytes)
1211 .setMIFlag(MachineInstr::FrameSetup);
1212 }
1213 NumBytes = 0;
1214
1215 if (RealignmentPadding > 0) {
1216 if (RealignmentPadding >= 4096) {
1217 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::MOVi64imm))
1218 .addReg(RegNo: AArch64::X16, Flags: RegState::Define)
1219 .addImm(Val: RealignmentPadding)
1220 .setMIFlags(MachineInstr::FrameSetup);
1221 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ADDXrx64), DestReg: AArch64::X15)
1222 .addReg(RegNo: AArch64::SP)
1223 .addReg(RegNo: AArch64::X16, Flags: RegState::Kill)
1224 .addImm(Val: AArch64_AM::getArithExtendImm(ET: AArch64_AM::UXTX, Imm: 0))
1225 .setMIFlag(MachineInstr::FrameSetup);
1226 } else {
1227 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ADDXri), DestReg: AArch64::X15)
1228 .addReg(RegNo: AArch64::SP)
1229 .addImm(Val: RealignmentPadding)
1230 .addImm(Val: 0)
1231 .setMIFlag(MachineInstr::FrameSetup);
1232 }
1233
1234 uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
1235 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ANDXri), DestReg: AArch64::SP)
1236 .addReg(RegNo: AArch64::X15, Flags: RegState::Kill)
1237 .addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: AndMask, regSize: 64));
1238 AFI->setStackRealigned(true);
1239
1240 // No need for SEH instructions here; if we're realigning the stack,
1241 // we've set a frame pointer and already finished the SEH prologue.
1242 assert(!NeedsWinCFI);
1243 }
1244 if (X15Scratch != AArch64::NoRegister) {
1245 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ORRXrr), DestReg: AArch64::X15)
1246 .addReg(RegNo: AArch64::XZR)
1247 .addReg(RegNo: X15Scratch, Flags: RegState::Undef)
1248 .addReg(RegNo: X15Scratch, Flags: RegState::Implicit)
1249 .setMIFlag(MachineInstr::FrameSetup);
1250 }
1251}
1252
1253void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
1254 MachineBasicBlock::iterator MBBI) const {
1255 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1256 if (CSI.empty())
1257 return;
1258
1259 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
1260 for (const auto &Info : CSI) {
1261 unsigned FrameIdx = Info.getFrameIdx();
1262 if (MFI.hasScalableStackID(ObjectIdx: FrameIdx))
1263 continue;
1264
1265 assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
1266 int64_t Offset = MFI.getObjectOffset(ObjectIdx: FrameIdx) - AFL.getOffsetOfLocalArea();
1267 CFIBuilder.buildOffset(Reg: Info.getReg(), Offset);
1268 }
1269}
1270
1271void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
1272 MachineBasicBlock::iterator MBBI) const {
1273 // Add callee saved registers to move list.
1274 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1275 if (CSI.empty())
1276 return;
1277
1278 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
1279
1280 std::optional<int64_t> IncomingVGOffsetFromDefCFA;
1281 if (AFL.requiresSaveVG(MF)) {
1282 auto IncomingVG = *find_if(
1283 Range: reverse(C: CSI), P: [](auto &Info) { return Info.getReg() == AArch64::VG; });
1284 IncomingVGOffsetFromDefCFA = MFI.getObjectOffset(ObjectIdx: IncomingVG.getFrameIdx()) -
1285 AFL.getOffsetOfLocalArea();
1286 }
1287
1288 StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
1289 for (const auto &Info : CSI) {
1290 int FI = Info.getFrameIdx();
1291 if (!MFI.hasScalableStackID(ObjectIdx: FI))
1292 continue;
1293
1294 // Not all unwinders may know about SVE registers, so assume the lowest
1295 // common denominator.
1296 assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
1297 MCRegister Reg = Info.getReg();
1298 if (!RegInfo.regNeedsCFI(Reg, RegToUseForCFI&: Reg))
1299 continue;
1300
1301 StackOffset Offset =
1302 StackOffset::getScalable(Scalable: MFI.getObjectOffset(ObjectIdx: FI)) -
1303 StackOffset::getFixed(Fixed: AFI->getCalleeSavedStackSize(MFI));
1304
1305 // The scalable vectors are below (lower address) the scalable predicates
1306 // with split SVE objects, so we must subtract the size of the predicates.
1307 if (SVELayout == SVEStackLayout::Split &&
1308 MFI.getStackID(ObjectIdx: FI) == TargetStackID::ScalableVector)
1309 Offset -= PPRStackSize;
1310
1311 CFIBuilder.insertCFIInst(
1312 CFIInst: createCFAOffset(MRI: RegInfo, Reg, OffsetFromDefCFA: Offset, IncomingVGOffsetFromDefCFA));
1313 }
1314}
1315
1316static bool isFuncletReturnInstr(const MachineInstr &MI) {
1317 switch (MI.getOpcode()) {
1318 default:
1319 return false;
1320 case AArch64::CATCHRET:
1321 case AArch64::CLEANUPRET:
1322 return true;
1323 }
1324}
1325
1326AArch64EpilogueEmitter::AArch64EpilogueEmitter(MachineFunction &MF,
1327 MachineBasicBlock &MBB,
1328 const AArch64FrameLowering &AFL)
1329 : AArch64PrologueEpilogueCommon(MF, MBB, AFL) {
1330 EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
1331 HomPrologEpilog = AFL.homogeneousPrologEpilog(MF, Exit: &MBB);
1332 SEHEpilogueStartI = MBB.end();
1333}
1334
1335void AArch64EpilogueEmitter::moveSPBelowFP(MachineBasicBlock::iterator MBBI,
1336 StackOffset Offset) {
1337 // Other combinations could be supported, but are not currently needed.
1338 assert(Offset.getScalable() < 0 && Offset.getFixed() <= 0 &&
1339 "expected negative offset (with optional fixed portion)");
1340 Register Base = AArch64::FP;
1341 if (int64_t FixedOffset = Offset.getFixed()) {
1342 // If we have a negative fixed offset, we need to first subtract it in a
1343 // temporary register first (to avoid briefly deallocating the scalable
1344 // portion of the offset).
1345 Base = MF.getRegInfo().createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
1346 emitFrameOffset(MBB, MBBI, DL, DestReg: Base, SrcReg: AArch64::FP,
1347 Offset: StackOffset::getFixed(Fixed: FixedOffset), TII,
1348 MachineInstr::FrameDestroy);
1349 }
1350 emitFrameOffset(MBB, MBBI, DL, DestReg: AArch64::SP, SrcReg: Base,
1351 Offset: StackOffset::getScalable(Scalable: Offset.getScalable()), TII,
1352 MachineInstr::FrameDestroy);
1353}
1354
1355void AArch64EpilogueEmitter::emitEpilogue() {
1356 MachineBasicBlock::iterator EpilogueEndI = MBB.getLastNonDebugInstr();
1357 if (MBB.end() != EpilogueEndI) {
1358 DL = EpilogueEndI->getDebugLoc();
1359 IsFunclet = isFuncletReturnInstr(MI: *EpilogueEndI);
1360 }
1361
1362 int64_t NumBytes =
1363 IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
1364
1365 // All calls are tail calls in GHC calling conv, and functions have no
1366 // prologue/epilogue.
1367 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1368 return;
1369
1370 // How much of the stack used by incoming arguments this function is expected
1371 // to restore in this particular epilogue.
1372 int64_t ArgumentStackToRestore = AFL.getArgumentStackToRestore(MF, MBB);
1373 bool IsWin64 = Subtarget.isCallingConvWin64(CC: MF.getFunction().getCallingConv(),
1374 IsVarArg: MF.getFunction().isVarArg());
1375 unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
1376
1377 int64_t AfterCSRPopSize = ArgumentStackToRestore;
1378 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
1379 // We cannot rely on the local stack size set in emitPrologue if the function
1380 // has funclets, as funclets have different local stack size requirements, and
1381 // the current value set in emitPrologue may be that of the containing
1382 // function.
1383 if (MF.hasEHFunclets())
1384 AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
1385
1386 if (HomPrologEpilog) {
1387 assert(!NeedsWinCFI);
1388 auto FirstHomogenousEpilogI = MBB.getFirstTerminator();
1389 if (FirstHomogenousEpilogI != MBB.begin()) {
1390 auto HomogeneousEpilog = std::prev(x: FirstHomogenousEpilogI);
1391 if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
1392 FirstHomogenousEpilogI = HomogeneousEpilog;
1393 }
1394
1395 // Adjust local stack
1396 emitFrameOffset(MBB, MBBI: FirstHomogenousEpilogI, DL, DestReg: AArch64::SP, SrcReg: AArch64::SP,
1397 Offset: StackOffset::getFixed(Fixed: AFI->getLocalStackSize()), TII,
1398 MachineInstr::FrameDestroy, SetNZCV: false, NeedsWinCFI, HasWinCFI: &HasWinCFI);
1399
1400 // SP has been already adjusted while restoring callee save regs.
1401 // We've bailed-out the case with adjusting SP for arguments.
1402 assert(AfterCSRPopSize == 0);
1403 return;
1404 }
1405
1406 bool CombineSPBump = shouldCombineCSRLocalStackBump(StackBumpBytes: NumBytes);
1407
1408 unsigned ProloguePopSize = PrologueSaveSize;
1409 if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
1410 // With CalleeSavesAboveFrameRecord ProloguePopSize is the amount of stack
1411 // that needs to be popped until we reach the start of the SVE save area.
1412 // The "FixedObject" stack occurs after the SVE area and must be popped
1413 // later.
1414 ProloguePopSize -= FixedObject;
1415 AfterCSRPopSize += FixedObject;
1416 }
1417
1418 // Assume we can't combine the last pop with the sp restore.
1419 if (!CombineSPBump && ProloguePopSize != 0) {
1420 MachineBasicBlock::iterator Pop = std::prev(x: MBB.getFirstTerminator());
1421 while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
1422 AArch64InstrInfo::isSEHInstruction(MI: *Pop) ||
1423 (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord &&
1424 isPartOfSVECalleeSaves(I: Pop)))
1425 Pop = std::prev(x: Pop);
1426 // Converting the last ldp to a post-index ldp is valid only if the last
1427 // ldp's offset is 0.
1428 const MachineOperand &OffsetOp = Pop->getOperand(i: Pop->getNumOperands() - 1);
1429 // If the offset is 0 and the AfterCSR pop is not actually trying to
1430 // allocate more stack for arguments (in space that an untimely interrupt
1431 // may clobber), convert it to a post-index ldp.
1432 if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
1433 convertCalleeSaveRestoreToSPPrePostIncDec(
1434 MBBI: Pop, DL, CSStackSizeInc: ProloguePopSize, EmitCFI, FrameFlag: MachineInstr::FrameDestroy,
1435 CFAOffset: ProloguePopSize);
1436 } else if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
1437 MachineBasicBlock::iterator AfterLastPop = std::next(x: Pop);
1438 if (AArch64InstrInfo::isSEHInstruction(MI: *AfterLastPop))
1439 ++AfterLastPop;
1440 // If not, and CalleeSavesAboveFrameRecord is enabled, deallocate
1441 // callee-save non-SVE registers to move the stack pointer to the start of
1442 // the SVE area.
1443 emitFrameOffset(MBB, MBBI: AfterLastPop, DL, DestReg: AArch64::SP, SrcReg: AArch64::SP,
1444 Offset: StackOffset::getFixed(Fixed: ProloguePopSize), TII,
1445 MachineInstr::FrameDestroy, SetNZCV: false, NeedsWinCFI,
1446 HasWinCFI: &HasWinCFI);
1447 } else {
1448 // Otherwise, make sure to emit an add after the last ldp.
1449 // We're doing this by transferring the size to be restored from the
1450 // adjustment *before* the CSR pops to the adjustment *after* the CSR
1451 // pops.
1452 AfterCSRPopSize += ProloguePopSize;
1453 }
1454 }
1455
1456 // Move past the restores of the callee-saved registers.
1457 // If we plan on combining the sp bump of the local stack size and the callee
1458 // save stack size, we might need to adjust the CSR save and restore offsets.
1459 MachineBasicBlock::iterator FirstGPRRestoreI = MBB.getFirstTerminator();
1460 MachineBasicBlock::iterator Begin = MBB.begin();
1461 while (FirstGPRRestoreI != Begin) {
1462 --FirstGPRRestoreI;
1463 if (!FirstGPRRestoreI->getFlag(Flag: MachineInstr::FrameDestroy) ||
1464 (SVELayout != SVEStackLayout::CalleeSavesAboveFrameRecord &&
1465 isPartOfSVECalleeSaves(I: FirstGPRRestoreI))) {
1466 ++FirstGPRRestoreI;
1467 break;
1468 } else if (CombineSPBump)
1469 fixupCalleeSaveRestoreStackOffset(MI&: *FirstGPRRestoreI,
1470 LocalStackSize: AFI->getLocalStackSize());
1471 }
1472
1473 if (NeedsWinCFI) {
1474 // Note that there are cases where we insert SEH opcodes in the
1475 // epilogue when we had no SEH opcodes in the prologue. For
1476 // example, when there is no stack frame but there are stack
1477 // arguments. Insert the SEH_EpilogStart and remove it later if it
1478 // we didn't emit any SEH opcodes to avoid generating WinCFI for
1479 // functions that don't need it.
1480 BuildMI(BB&: MBB, I: FirstGPRRestoreI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_EpilogStart))
1481 .setMIFlag(MachineInstr::FrameDestroy);
1482 SEHEpilogueStartI = FirstGPRRestoreI;
1483 --SEHEpilogueStartI;
1484 }
1485
1486 // Determine the ranges of SVE callee-saves. This is done before emitting any
1487 // code at the end of the epilogue (for Swift async), which can get in the way
1488 // of finding SVE callee-saves with CalleeSavesAboveFrameRecord.
1489 auto [PPR, ZPR] = getSVEStackFrameSizes();
1490 auto [PPRRange, ZPRRange] = partitionSVECS(
1491 MBB,
1492 MBBI: SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord
1493 ? MBB.getFirstTerminator()
1494 : FirstGPRRestoreI,
1495 PPRCalleeSavesSize: PPR.CalleeSavesSize, ZPRCalleeSavesSize: ZPR.CalleeSavesSize, /*IsEpilogue=*/true);
1496
1497 if (HasFP && AFI->hasSwiftAsyncContext())
1498 emitSwiftAsyncContextFramePointer(MBBI: EpilogueEndI, DL);
1499
1500 // If there is a single SP update, insert it before the ret and we're done.
1501 if (CombineSPBump) {
1502 assert(!AFI->hasSVEStackSize() && "Cannot combine SP bump with SVE");
1503
1504 // When we are about to restore the CSRs, the CFA register is SP again.
1505 if (EmitCFI && HasFP)
1506 CFIInstBuilder(MBB, FirstGPRRestoreI, MachineInstr::FrameDestroy)
1507 .buildDefCFA(Reg: AArch64::SP, Offset: NumBytes);
1508
1509 emitFrameOffset(MBB, MBBI: MBB.getFirstTerminator(), DL, DestReg: AArch64::SP, SrcReg: AArch64::SP,
1510 Offset: StackOffset::getFixed(Fixed: NumBytes + AfterCSRPopSize), TII,
1511 MachineInstr::FrameDestroy, SetNZCV: false, NeedsWinCFI, HasWinCFI: &HasWinCFI,
1512 EmitCFAOffset: EmitCFI, InitialOffset: StackOffset::getFixed(Fixed: NumBytes));
1513 return;
1514 }
1515
1516 NumBytes -= PrologueSaveSize;
1517 assert(NumBytes >= 0 && "Negative stack allocation size!?");
1518
1519 StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
1520 SVEStackAllocations SVEAllocs = getSVEStackAllocations(SVE: {.PPR: PPR, .ZPR: ZPR});
1521
1522 // Deallocate the SVE area.
1523 if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
1524 assert(!SVEAllocs.AfterPPRs &&
1525 "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
1526 // If the callee-save area is before FP, restoring the FP implicitly
1527 // deallocates non-callee-save SVE allocations. Otherwise, deallocate them
1528 // explicitly.
1529 if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
1530 emitFrameOffset(MBB, MBBI: FirstGPRRestoreI, DL, DestReg: AArch64::SP, SrcReg: AArch64::SP,
1531 Offset: SVEAllocs.AfterZPRs, TII, MachineInstr::FrameDestroy,
1532 SetNZCV: false, NeedsWinCFI, HasWinCFI: &HasWinCFI);
1533 }
1534
1535 // Deallocate callee-save SVE registers.
1536 emitFrameOffset(MBB, MBBI: PPRRange.End, DL, DestReg: AArch64::SP, SrcReg: AArch64::SP,
1537 Offset: SVEAllocs.BeforePPRs, TII, MachineInstr::FrameDestroy,
1538 SetNZCV: false, NeedsWinCFI, HasWinCFI: &HasWinCFI);
1539 } else if (AFI->hasSVEStackSize()) {
1540 // If we have stack realignment or variable-sized objects we must use the FP
1541 // to restore SVE callee saves (as there is an unknown amount of
1542 // data/padding between the SP and SVE CS area).
1543 Register BaseForSVEDealloc =
1544 (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
1545 : AArch64::SP;
1546 if (SVECalleeSavesSize && BaseForSVEDealloc == AArch64::FP) {
1547 if (ZPR.CalleeSavesSize || SVELayout != SVEStackLayout::Split) {
1548 // The offset from the frame-pointer to the start of the ZPR saves.
1549 StackOffset FPOffsetZPR =
1550 -SVECalleeSavesSize - PPR.LocalsSize -
1551 StackOffset::getFixed(Fixed: AFI->getCalleeSaveBaseToFrameRecordOffset());
1552 // Deallocate the stack space space by moving the SP to the start of the
1553 // ZPR/PPR callee-save area.
1554 moveSPBelowFP(MBBI: ZPRRange.Begin, Offset: FPOffsetZPR);
1555 }
1556 // With split SVE, the predicates are stored in a separate area above the
1557 // ZPR saves, so we must adjust the stack to the start of the PPRs.
1558 if (PPR.CalleeSavesSize && SVELayout == SVEStackLayout::Split) {
1559 // The offset from the frame-pointer to the start of the PPR saves.
1560 StackOffset FPOffsetPPR = -PPR.CalleeSavesSize;
1561 // Move to the start of the PPR area.
1562 assert(!FPOffsetPPR.getFixed() && "expected only scalable offset");
1563 emitFrameOffset(MBB, MBBI: ZPRRange.End, DL, DestReg: AArch64::SP, SrcReg: AArch64::FP,
1564 Offset: FPOffsetPPR, TII, MachineInstr::FrameDestroy);
1565 }
1566 } else if (BaseForSVEDealloc == AArch64::SP) {
1567 auto NonSVELocals = StackOffset::getFixed(Fixed: NumBytes);
1568 auto CFAOffset = NonSVELocals + StackOffset::getFixed(Fixed: PrologueSaveSize) +
1569 SVEAllocs.totalSize();
1570
1571 if (SVECalleeSavesSize || SVELayout == SVEStackLayout::Split) {
1572 // Deallocate non-SVE locals now. This is needed to reach the SVE callee
1573 // saves, but may also allow combining stack hazard bumps for split SVE.
1574 SVEAllocs.AfterZPRs += NonSVELocals;
1575 NumBytes -= NonSVELocals.getFixed();
1576 }
1577 // To deallocate the SVE stack adjust by the allocations in reverse.
1578 emitFrameOffset(MBB, MBBI: ZPRRange.Begin, DL, DestReg: AArch64::SP, SrcReg: AArch64::SP,
1579 Offset: SVEAllocs.AfterZPRs, TII, MachineInstr::FrameDestroy,
1580 SetNZCV: false, NeedsWinCFI, HasWinCFI: &HasWinCFI, EmitCFAOffset: EmitCFI && !HasFP,
1581 InitialOffset: CFAOffset);
1582 CFAOffset -= SVEAllocs.AfterZPRs;
1583 assert(PPRRange.Begin == ZPRRange.End &&
1584 "Expected PPR restores after ZPR");
1585 emitFrameOffset(MBB, MBBI: PPRRange.Begin, DL, DestReg: AArch64::SP, SrcReg: AArch64::SP,
1586 Offset: SVEAllocs.AfterPPRs, TII, MachineInstr::FrameDestroy,
1587 SetNZCV: false, NeedsWinCFI, HasWinCFI: &HasWinCFI, EmitCFAOffset: EmitCFI && !HasFP,
1588 InitialOffset: CFAOffset);
1589 CFAOffset -= SVEAllocs.AfterPPRs;
1590 emitFrameOffset(MBB, MBBI: PPRRange.End, DL, DestReg: AArch64::SP, SrcReg: AArch64::SP,
1591 Offset: SVEAllocs.BeforePPRs, TII, MachineInstr::FrameDestroy,
1592 SetNZCV: false, NeedsWinCFI, HasWinCFI: &HasWinCFI, EmitCFAOffset: EmitCFI && !HasFP,
1593 InitialOffset: CFAOffset);
1594 }
1595
1596 if (EmitCFI)
1597 emitCalleeSavedSVERestores(
1598 MBBI: SVELayout == SVEStackLayout::Split ? ZPRRange.End : PPRRange.End);
1599 }
1600
1601 if (!HasFP) {
1602 bool RedZone = AFL.canUseRedZone(MF);
1603 // If this was a redzone leaf function, we don't need to restore the
1604 // stack pointer (but we may need to pop stack args for fastcc).
1605 if (RedZone && AfterCSRPopSize == 0)
1606 return;
1607
1608 // Pop the local variables off the stack. If there are no callee-saved
1609 // registers, it means we are actually positioned at the terminator and can
1610 // combine stack increment for the locals and the stack increment for
1611 // callee-popped arguments into (possibly) a single instruction and be done.
1612 bool NoCalleeSaveRestore = PrologueSaveSize == 0;
1613 int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
1614 if (NoCalleeSaveRestore)
1615 StackRestoreBytes += AfterCSRPopSize;
1616
1617 emitFrameOffset(
1618 MBB, MBBI: FirstGPRRestoreI, DL, DestReg: AArch64::SP, SrcReg: AArch64::SP,
1619 Offset: StackOffset::getFixed(Fixed: StackRestoreBytes), TII,
1620 MachineInstr::FrameDestroy, SetNZCV: false, NeedsWinCFI, HasWinCFI: &HasWinCFI, EmitCFAOffset: EmitCFI,
1621 InitialOffset: StackOffset::getFixed(Fixed: (RedZone ? 0 : NumBytes) + PrologueSaveSize));
1622
1623 // If we were able to combine the local stack pop with the argument pop,
1624 // then we're done.
1625 if (NoCalleeSaveRestore || AfterCSRPopSize == 0)
1626 return;
1627
1628 NumBytes = 0;
1629 }
1630
1631 // Restore the original stack pointer.
1632 // FIXME: Rather than doing the math here, we should instead just use
1633 // non-post-indexed loads for the restores if we aren't actually going to
1634 // be able to save any instructions.
1635 if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
1636 emitFrameOffset(
1637 MBB, MBBI: FirstGPRRestoreI, DL, DestReg: AArch64::SP, SrcReg: AArch64::FP,
1638 Offset: StackOffset::getFixed(Fixed: -AFI->getCalleeSaveBaseToFrameRecordOffset()),
1639 TII, MachineInstr::FrameDestroy, SetNZCV: false, NeedsWinCFI, HasWinCFI: &HasWinCFI);
1640 } else if (NumBytes)
1641 emitFrameOffset(MBB, MBBI: FirstGPRRestoreI, DL, DestReg: AArch64::SP, SrcReg: AArch64::SP,
1642 Offset: StackOffset::getFixed(Fixed: NumBytes), TII,
1643 MachineInstr::FrameDestroy, SetNZCV: false, NeedsWinCFI, HasWinCFI: &HasWinCFI);
1644
1645 // When we are about to restore the CSRs, the CFA register is SP again.
1646 if (EmitCFI && HasFP)
1647 CFIInstBuilder(MBB, FirstGPRRestoreI, MachineInstr::FrameDestroy)
1648 .buildDefCFA(Reg: AArch64::SP, Offset: PrologueSaveSize);
1649
1650 // This must be placed after the callee-save restore code because that code
1651 // assumes the SP is at the same location as it was after the callee-save save
1652 // code in the prologue.
1653 if (AfterCSRPopSize) {
1654 assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "
1655 "interrupt may have clobbered");
1656
1657 emitFrameOffset(
1658 MBB, MBBI: MBB.getFirstTerminator(), DL, DestReg: AArch64::SP, SrcReg: AArch64::SP,
1659 Offset: StackOffset::getFixed(Fixed: AfterCSRPopSize), TII, MachineInstr::FrameDestroy,
1660 SetNZCV: false, NeedsWinCFI, HasWinCFI: &HasWinCFI, EmitCFAOffset: EmitCFI,
1661 InitialOffset: StackOffset::getFixed(Fixed: AfterCSRPopSize - ArgumentStackToRestore));
1662 }
1663}
1664
1665bool AArch64EpilogueEmitter::shouldCombineCSRLocalStackBump(
1666 uint64_t StackBumpBytes) const {
1667 if (!AArch64PrologueEpilogueCommon::shouldCombineCSRLocalStackBump(
1668 StackBumpBytes))
1669 return false;
1670 if (MBB.empty())
1671 return true;
1672
1673 // Disable combined SP bump if the last instruction is an MTE tag store. It
1674 // is almost always better to merge SP adjustment into those instructions.
1675 MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
1676 MachineBasicBlock::iterator Begin = MBB.begin();
1677 while (LastI != Begin) {
1678 --LastI;
1679 if (LastI->isTransient())
1680 continue;
1681 if (!LastI->getFlag(Flag: MachineInstr::FrameDestroy))
1682 break;
1683 }
1684 switch (LastI->getOpcode()) {
1685 case AArch64::STGloop:
1686 case AArch64::STZGloop:
1687 case AArch64::STGi:
1688 case AArch64::STZGi:
1689 case AArch64::ST2Gi:
1690 case AArch64::STZ2Gi:
1691 return false;
1692 default:
1693 return true;
1694 }
1695 llvm_unreachable("unreachable");
1696}
1697
1698void AArch64EpilogueEmitter::emitSwiftAsyncContextFramePointer(
1699 MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const {
1700 switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
1701 case SwiftAsyncFramePointerMode::DeploymentBased:
1702 // Avoid the reload as it is GOT relative, and instead fall back to the
1703 // hardcoded value below. This allows a mismatch between the OS and
1704 // application without immediately terminating on the difference.
1705 [[fallthrough]];
1706 case SwiftAsyncFramePointerMode::Always:
1707 // We need to reset FP to its untagged state on return. Bit 60 is
1708 // currently used to show the presence of an extended frame.
1709
1710 // BIC x29, x29, #0x1000_0000_0000_0000
1711 BuildMI(BB&: MBB, I: MBB.getFirstTerminator(), MIMD: DL, MCID: TII->get(Opcode: AArch64::ANDXri),
1712 DestReg: AArch64::FP)
1713 .addUse(RegNo: AArch64::FP)
1714 .addImm(Val: 0x10fe)
1715 .setMIFlag(MachineInstr::FrameDestroy);
1716 if (NeedsWinCFI) {
1717 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_Nop))
1718 .setMIFlags(MachineInstr::FrameDestroy);
1719 HasWinCFI = true;
1720 }
1721 break;
1722
1723 case SwiftAsyncFramePointerMode::Never:
1724 break;
1725 }
1726}
1727
1728void AArch64EpilogueEmitter::emitShadowCallStackEpilogue(
1729 MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const {
1730 // Shadow call stack epilog: ldr x30, [x18, #-8]!
1731 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::LDRXpre))
1732 .addReg(RegNo: AArch64::X18, Flags: RegState::Define)
1733 .addReg(RegNo: AArch64::LR, Flags: RegState::Define)
1734 .addReg(RegNo: AArch64::X18)
1735 .addImm(Val: -8)
1736 .setMIFlag(MachineInstr::FrameDestroy);
1737
1738 if (NeedsWinCFI)
1739 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_Nop))
1740 .setMIFlag(MachineInstr::FrameDestroy);
1741
1742 if (AFI->needsAsyncDwarfUnwindInfo(MF))
1743 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
1744 .buildRestore(Reg: AArch64::X18);
1745}
1746
1747void AArch64EpilogueEmitter::emitCalleeSavedRestores(
1748 MachineBasicBlock::iterator MBBI, bool SVE) const {
1749 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1750 if (CSI.empty())
1751 return;
1752
1753 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameDestroy);
1754
1755 for (const auto &Info : CSI) {
1756 if (SVE != MFI.hasScalableStackID(ObjectIdx: Info.getFrameIdx()))
1757 continue;
1758
1759 MCRegister Reg = Info.getReg();
1760 if (SVE && !RegInfo.regNeedsCFI(Reg, RegToUseForCFI&: Reg))
1761 continue;
1762
1763 CFIBuilder.buildRestore(Reg: Info.getReg());
1764 }
1765}
1766
1767void AArch64EpilogueEmitter::finalizeEpilogue() const {
1768 if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
1769 emitShadowCallStackEpilogue(MBBI: MBB.getFirstTerminator(), DL);
1770 HasWinCFI |= NeedsWinCFI;
1771 }
1772 if (EmitCFI)
1773 emitCalleeSavedGPRRestores(MBBI: MBB.getFirstTerminator());
1774 if (AFI->shouldSignReturnAddress(MF)) {
1775 // If pac-ret+leaf is in effect, PAUTH_EPILOGUE pseudo instructions
1776 // are inserted by emitPacRetPlusLeafHardening().
1777 if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
1778 BuildMI(BB&: MBB, I: MBB.getFirstTerminator(), MIMD: DL,
1779 MCID: TII->get(Opcode: AArch64::PAUTH_EPILOGUE))
1780 .setMIFlag(MachineInstr::FrameDestroy);
1781 }
1782 // AArch64PointerAuth pass will insert SEH_PACSignLR
1783 HasWinCFI |= NeedsWinCFI;
1784 }
1785 if (HasWinCFI) {
1786 BuildMI(BB&: MBB, I: MBB.getFirstTerminator(), MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_EpilogEnd))
1787 .setMIFlag(MachineInstr::FrameDestroy);
1788 if (!MF.hasWinCFI())
1789 MF.setHasWinCFI(true);
1790 }
1791 if (NeedsWinCFI) {
1792 assert(SEHEpilogueStartI != MBB.end());
1793 if (!HasWinCFI)
1794 MBB.erase(I: SEHEpilogueStartI);
1795 }
1796}
1797
1798} // namespace llvm
1799