1//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the ARM implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12//
13// This file contains the ARM implementation of TargetFrameLowering class.
14//
15// On ARM, stack frames are structured as follows:
16//
17// The stack grows downward.
18//
19// All of the individual frame areas on the frame below are optional, i.e. it's
20// possible to create a function so that the particular area isn't present
21// in the frame.
22//
23// At function entry, the "frame" looks as follows:
24//
25// | | Higher address
26// |-----------------------------------|
27// | |
28// | arguments passed on the stack |
29// | |
30// |-----------------------------------| <- sp
31// | | Lower address
32//
33//
34// After the prologue has run, the frame has the following general structure.
35// Technically the last frame area (VLAs) doesn't get created until in the
36// main function body, after the prologue is run. However, it's depicted here
37// for completeness.
38//
39// | | Higher address
40// |-----------------------------------|
41// | |
42// | arguments passed on the stack |
43// | |
44// |-----------------------------------| <- (sp at function entry)
45// | |
46// | varargs from registers |
47// | |
48// |-----------------------------------|
49// | |
50// | prev_lr |
51// | prev_fp |
52// | (a.k.a. "frame record") |
53// | |
54// |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
55// | |
56// | callee-saved gpr registers |
57// | |
58// |-----------------------------------|
59// | |
60// | callee-saved fp/simd regs |
61// | |
62// |-----------------------------------|
63// |.empty.space.to.make.part.below....|
64// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
65// |.the.standard.8-byte.alignment.....| compile time; if present)
66// |-----------------------------------|
67// | |
68// | local variables of fixed size |
69// | including spill slots |
70// |-----------------------------------| <- base pointer (not defined by ABI,
71// |.variable-sized.local.variables....| LLVM chooses r6)
72// |.(VLAs)............................| (size of this area is unknown at
73// |...................................| compile time)
74// |-----------------------------------| <- sp
75// | | Lower address
76//
77//
78// To access the data in a frame, at-compile time, a constant offset must be
79// computable from one of the pointers (fp, bp, sp) to access it. The size
80// of the areas with a dotted background cannot be computed at compile-time
81// if they are present, making it required to have all three of fp, bp and
82// sp to be set up to be able to access all contents in the frame areas,
83// assuming all of the frame areas are non-empty.
84//
85// For most functions, some of the frame areas are empty. For those functions,
86// it may not be necessary to set up fp or bp:
87// * A base pointer is definitely needed when there are both VLAs and local
88// variables with more-than-default alignment requirements.
89// * A frame pointer is definitely needed when there are local variables with
90// more-than-default alignment requirements.
91//
92// In some cases when a base pointer is not strictly needed, it is generated
93// anyway when offsets from the frame pointer to access local variables become
94// so large that the offset can't be encoded in the immediate fields of loads
95// or stores.
96//
97// The frame pointer might be chosen to be r7 or r11, depending on the target
98// architecture and operating system. See ARMSubtarget::getFramePointerReg for
99// details.
100//
101// Outgoing function arguments must be at the bottom of the stack frame when
102// calling another function. If we do not have variable-sized stack objects, we
103// can allocate a "reserved call frame" area at the bottom of the local
104// variable area, large enough for all outgoing calls. If we do have VLAs, then
105// the stack pointer must be decremented and incremented around each call to
106// make space for the arguments below the VLAs.
107//
108//===----------------------------------------------------------------------===//
109
110#include "ARMFrameLowering.h"
111#include "ARMBaseInstrInfo.h"
112#include "ARMBaseRegisterInfo.h"
113#include "ARMConstantPoolValue.h"
114#include "ARMMachineFunctionInfo.h"
115#include "ARMSubtarget.h"
116#include "MCTargetDesc/ARMAddressingModes.h"
117#include "MCTargetDesc/ARMBaseInfo.h"
118#include "Utils/ARMBaseInfo.h"
119#include "llvm/ADT/BitVector.h"
120#include "llvm/ADT/STLExtras.h"
121#include "llvm/ADT/SmallPtrSet.h"
122#include "llvm/ADT/SmallVector.h"
123#include "llvm/CodeGen/CFIInstBuilder.h"
124#include "llvm/CodeGen/MachineBasicBlock.h"
125#include "llvm/CodeGen/MachineConstantPool.h"
126#include "llvm/CodeGen/MachineFrameInfo.h"
127#include "llvm/CodeGen/MachineFunction.h"
128#include "llvm/CodeGen/MachineInstr.h"
129#include "llvm/CodeGen/MachineInstrBuilder.h"
130#include "llvm/CodeGen/MachineJumpTableInfo.h"
131#include "llvm/CodeGen/MachineModuleInfo.h"
132#include "llvm/CodeGen/MachineOperand.h"
133#include "llvm/CodeGen/MachineRegisterInfo.h"
134#include "llvm/CodeGen/RegisterScavenging.h"
135#include "llvm/CodeGen/TargetInstrInfo.h"
136#include "llvm/CodeGen/TargetRegisterInfo.h"
137#include "llvm/CodeGen/TargetSubtargetInfo.h"
138#include "llvm/IR/Attributes.h"
139#include "llvm/IR/CallingConv.h"
140#include "llvm/IR/DebugLoc.h"
141#include "llvm/IR/Function.h"
142#include "llvm/MC/MCAsmInfo.h"
143#include "llvm/MC/MCInstrDesc.h"
144#include "llvm/Support/CodeGen.h"
145#include "llvm/Support/CommandLine.h"
146#include "llvm/Support/Compiler.h"
147#include "llvm/Support/Debug.h"
148#include "llvm/Support/ErrorHandling.h"
149#include "llvm/Support/raw_ostream.h"
150#include "llvm/Target/TargetMachine.h"
151#include "llvm/Target/TargetOptions.h"
152#include <algorithm>
153#include <cassert>
154#include <cstddef>
155#include <cstdint>
156#include <iterator>
157#include <utility>
158#include <vector>
159
160#define DEBUG_TYPE "arm-frame-lowering"
161
162using namespace llvm;
163
164static cl::opt<bool>
165SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(Val: true),
166 cl::desc("Align ARM NEON spills in prolog and epilog"));
167
168static MachineBasicBlock::iterator
169skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
170 unsigned NumAlignedDPRCS2Regs);
171
172enum class SpillArea {
173 GPRCS1,
174 GPRCS2,
175 FPStatus,
176 DPRCS1,
177 DPRCS2,
178 GPRCS3,
179 FPCXT,
180};
181
182/// Get the spill area that Reg should be saved into in the prologue.
183SpillArea getSpillArea(Register Reg,
184 ARMSubtarget::PushPopSplitVariation Variation,
185 unsigned NumAlignedDPRCS2Regs,
186 const ARMBaseRegisterInfo *RegInfo) {
187 // NoSplit:
188 // push {r0-r12, lr} GPRCS1
189 // vpush {r8-d15} DPRCS1
190 //
191 // SplitR7:
192 // push {r0-r7, lr} GPRCS1
193 // push {r8-r12} GPRCS2
194 // vpush {r8-d15} DPRCS1
195 //
196 // SplitR11WindowsSEH:
197 // push {r0-r10, r12} GPRCS1
198 // vpush {r8-d15} DPRCS1
199 // push {r11, lr} GPRCS3
200 //
201 // SplitR11AAPCSSignRA:
202 // push {r0-r10, r12} GPRSC1
203 // push {r11, lr} GPRCS2
204 // vpush {r8-d15} DPRCS1
205
206 // If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
207 // the top of the stack frame.
208 // The DPRCS2 region is used for ABIs which only guarantee 4-byte alignment
209 // of SP. If used, it will be below the other save areas, after the stack has
210 // been re-aligned.
211
212 switch (Reg) {
213 default:
214 dbgs() << "Don't know where to spill " << printReg(Reg, TRI: RegInfo) << "\n";
215 llvm_unreachable("Don't know where to spill this register");
216 break;
217
218 case ARM::FPCXTNS:
219 return SpillArea::FPCXT;
220
221 case ARM::FPSCR:
222 case ARM::FPEXC:
223 return SpillArea::FPStatus;
224
225 case ARM::R0:
226 case ARM::R1:
227 case ARM::R2:
228 case ARM::R3:
229 case ARM::R4:
230 case ARM::R5:
231 case ARM::R6:
232 case ARM::R7:
233 return SpillArea::GPRCS1;
234
235 case ARM::R8:
236 case ARM::R9:
237 case ARM::R10:
238 if (Variation == ARMSubtarget::SplitR7)
239 return SpillArea::GPRCS2;
240 else
241 return SpillArea::GPRCS1;
242
243 case ARM::R11:
244 if (Variation == ARMSubtarget::SplitR7 ||
245 Variation == ARMSubtarget::SplitR11AAPCSSignRA)
246 return SpillArea::GPRCS2;
247 if (Variation == ARMSubtarget::SplitR11WindowsSEH)
248 return SpillArea::GPRCS3;
249
250 return SpillArea::GPRCS1;
251
252 case ARM::R12:
253 if (Variation == ARMSubtarget::SplitR7)
254 return SpillArea::GPRCS2;
255 else
256 return SpillArea::GPRCS1;
257
258 case ARM::LR:
259 if (Variation == ARMSubtarget::SplitR11AAPCSSignRA)
260 return SpillArea::GPRCS2;
261 if (Variation == ARMSubtarget::SplitR11WindowsSEH)
262 return SpillArea::GPRCS3;
263
264 return SpillArea::GPRCS1;
265
266 case ARM::D0:
267 case ARM::D1:
268 case ARM::D2:
269 case ARM::D3:
270 case ARM::D4:
271 case ARM::D5:
272 case ARM::D6:
273 case ARM::D7:
274 return SpillArea::DPRCS1;
275
276 case ARM::D8:
277 case ARM::D9:
278 case ARM::D10:
279 case ARM::D11:
280 case ARM::D12:
281 case ARM::D13:
282 case ARM::D14:
283 case ARM::D15:
284 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
285 return SpillArea::DPRCS2;
286 else
287 return SpillArea::DPRCS1;
288
289 case ARM::D16:
290 case ARM::D17:
291 case ARM::D18:
292 case ARM::D19:
293 case ARM::D20:
294 case ARM::D21:
295 case ARM::D22:
296 case ARM::D23:
297 case ARM::D24:
298 case ARM::D25:
299 case ARM::D26:
300 case ARM::D27:
301 case ARM::D28:
302 case ARM::D29:
303 case ARM::D30:
304 case ARM::D31:
305 return SpillArea::DPRCS1;
306 }
307}
308
309ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
310 : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
311 STI(sti) {}
312
313bool ARMFrameLowering::keepFramePointer(const MachineFunction &MF) const {
314 // iOS always has a FP for backtracking, force other targets to keep their FP
315 // when doing FastISel. The emitted code is currently superior, and in cases
316 // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
317 return MF.getSubtarget<ARMSubtarget>().useFastISel();
318}
319
320/// Returns true if the target can safely skip saving callee-saved registers
321/// for noreturn nounwind functions.
322bool ARMFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const {
323 assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
324 MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
325 !MF.getFunction().hasFnAttribute(Attribute::UWTable));
326
327 // Frame pointer and link register are not treated as normal CSR, thus we
328 // can always skip CSR saves for nonreturning functions.
329 return true;
330}
331
332/// hasFPImpl - Return true if the specified function should have a dedicated
333/// frame pointer register. This is true if the function has variable sized
334/// allocas or if frame pointer elimination is disabled.
335bool ARMFrameLowering::hasFPImpl(const MachineFunction &MF) const {
336 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
337 const MachineFrameInfo &MFI = MF.getFrameInfo();
338
339 // Check to see if the target want to forcibly keep frame pointer.
340 if (keepFramePointer(MF))
341 return true;
342
343 // ABI-required frame pointer.
344 if (MF.getTarget().Options.DisableFramePointerElim(MF))
345 return true;
346
347 // Frame pointer required for use within this function.
348 return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
349 MFI.isFrameAddressTaken());
350}
351
352/// isFPReserved - Return true if the frame pointer register should be
353/// considered a reserved register on the scope of the specified function.
354bool ARMFrameLowering::isFPReserved(const MachineFunction &MF) const {
355 return hasFP(MF) || MF.getTarget().Options.FramePointerIsReserved(MF);
356}
357
358/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
359/// not required, we reserve argument space for call sites in the function
360/// immediately on entry to the current function. This eliminates the need for
361/// add/sub sp brackets around call sites. Returns true if the call frame is
362/// included as part of the stack frame.
363bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
364 const MachineFrameInfo &MFI = MF.getFrameInfo();
365 unsigned CFSize = MFI.getMaxCallFrameSize();
366 // It's not always a good idea to include the call frame as part of the
367 // stack frame. ARM (especially Thumb) has small immediate offset to
368 // address the stack frame. So a large call frame can cause poor codegen
369 // and may even makes it impossible to scavenge a register.
370 if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
371 return false;
372
373 return !MFI.hasVarSizedObjects();
374}
375
376/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
377/// call frame pseudos can be simplified. Unlike most targets, having a FP
378/// is not sufficient here since we still may reference some objects via SP
379/// even when FP is available in Thumb2 mode.
380bool
381ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
382 return hasReservedCallFrame(MF) || MF.getFrameInfo().hasVarSizedObjects();
383}
384
385// Returns how much of the incoming argument stack area we should clean up in an
386// epilogue. For the C calling convention this will be 0, for guaranteed tail
387// call conventions it can be positive (a normal return or a tail call to a
388// function that uses less stack space for arguments) or negative (for a tail
389// call to a function that needs more stack space than us for arguments).
390static int getArgumentStackToRestore(MachineFunction &MF,
391 MachineBasicBlock &MBB) {
392 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
393 bool IsTailCallReturn = false;
394 if (MBB.end() != MBBI) {
395 unsigned RetOpcode = MBBI->getOpcode();
396 IsTailCallReturn = RetOpcode == ARM::TCRETURNdi ||
397 RetOpcode == ARM::TCRETURNri ||
398 RetOpcode == ARM::TCRETURNrinotr12;
399 }
400 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
401
402 int ArgumentPopSize = 0;
403 if (IsTailCallReturn) {
404 MachineOperand &StackAdjust = MBBI->getOperand(i: 1);
405
406 // For a tail-call in a callee-pops-arguments environment, some or all of
407 // the stack may actually be in use for the call's arguments, this is
408 // calculated during LowerCall and consumed here...
409 ArgumentPopSize = StackAdjust.getImm();
410 } else {
411 // ... otherwise the amount to pop is *all* of the argument space,
412 // conveniently stored in the MachineFunctionInfo by
413 // LowerFormalArguments. This will, of course, be zero for the C calling
414 // convention.
415 ArgumentPopSize = AFI->getArgumentStackToRestore();
416 }
417
418 return ArgumentPopSize;
419}
420
421static bool needsWinCFI(const MachineFunction &MF) {
422 const Function &F = MF.getFunction();
423 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
424 F.needsUnwindTableEntry();
425}
426
427// Given a load or a store instruction, generate an appropriate unwinding SEH
428// code on Windows.
429static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
430 const TargetInstrInfo &TII,
431 unsigned Flags) {
432 unsigned Opc = MBBI->getOpcode();
433 MachineBasicBlock *MBB = MBBI->getParent();
434 MachineFunction &MF = *MBB->getParent();
435 DebugLoc DL = MBBI->getDebugLoc();
436 MachineInstrBuilder MIB;
437 const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
438 const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
439
440 Flags |= MachineInstr::NoMerge;
441
442 switch (Opc) {
443 default:
444 report_fatal_error(reason: "No SEH Opcode for instruction " + TII.getName(Opcode: Opc));
445 break;
446 case ARM::t2ADDri: // add.w r11, sp, #xx
447 case ARM::t2ADDri12: // add.w r11, sp, #xx
448 case ARM::t2MOVTi16: // movt r4, #xx
449 case ARM::tBL: // bl __chkstk
450 // These are harmless if used for just setting up a frame pointer,
451 // but that frame pointer can't be relied upon for unwinding, unless
452 // set up with SEH_SaveSP.
453 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
454 .addImm(/*Wide=*/Val: 1)
455 .setMIFlags(Flags);
456 break;
457
458 case ARM::t2MOVi16: { // mov(w) r4, #xx
459 bool Wide = MBBI->getOperand(i: 1).getImm() >= 256;
460 if (!Wide) {
461 MachineInstrBuilder NewInstr =
462 BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVi8)).setMIFlags(MBBI->getFlags());
463 NewInstr.add(MO: MBBI->getOperand(i: 0));
464 NewInstr.add(MO: t1CondCodeOp(/*isDead=*/true));
465 for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MBBI->operands()))
466 NewInstr.add(MO);
467 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(I: MBBI, MI: NewInstr);
468 MBB->erase(I: MBBI);
469 MBBI = NewMBBI;
470 }
471 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop)).addImm(Val: Wide).setMIFlags(Flags);
472 break;
473 }
474
475 case ARM::tBLXr: // blx r12 (__chkstk)
476 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
477 .addImm(/*Wide=*/Val: 0)
478 .setMIFlags(Flags);
479 break;
480
481 case ARM::t2MOVi32imm: // movw+movt
482 // This pseudo instruction expands into two mov instructions. If the
483 // second operand is a symbol reference, this will stay as two wide
484 // instructions, movw+movt. If they're immediates, the first one can
485 // end up as a narrow mov though.
486 // As two SEH instructions are appended here, they won't get interleaved
487 // between the two final movw/movt instructions, but it doesn't make any
488 // practical difference.
489 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
490 .addImm(/*Wide=*/Val: 1)
491 .setMIFlags(Flags);
492 MBB->insertAfter(I: MBBI, MI: MIB);
493 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
494 .addImm(/*Wide=*/Val: 1)
495 .setMIFlags(Flags);
496 break;
497
498 case ARM::t2STR_PRE:
499 if (MBBI->getOperand(i: 0).getReg() == ARM::SP &&
500 MBBI->getOperand(i: 2).getReg() == ARM::SP &&
501 MBBI->getOperand(i: 3).getImm() == -4) {
502 unsigned Reg = RegInfo->getSEHRegNum(i: MBBI->getOperand(i: 1).getReg());
503 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveRegs))
504 .addImm(Val: 1ULL << Reg)
505 .addImm(/*Wide=*/Val: 1)
506 .setMIFlags(Flags);
507 } else {
508 report_fatal_error(reason: "No matching SEH Opcode for t2STR_PRE");
509 }
510 break;
511
512 case ARM::t2LDR_POST:
513 if (MBBI->getOperand(i: 1).getReg() == ARM::SP &&
514 MBBI->getOperand(i: 2).getReg() == ARM::SP &&
515 MBBI->getOperand(i: 3).getImm() == 4) {
516 unsigned Reg = RegInfo->getSEHRegNum(i: MBBI->getOperand(i: 0).getReg());
517 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveRegs))
518 .addImm(Val: 1ULL << Reg)
519 .addImm(/*Wide=*/Val: 1)
520 .setMIFlags(Flags);
521 } else {
522 report_fatal_error(reason: "No matching SEH Opcode for t2LDR_POST");
523 }
524 break;
525
526 case ARM::t2LDMIA_RET:
527 case ARM::t2LDMIA_UPD:
528 case ARM::t2STMDB_UPD: {
529 unsigned Mask = 0;
530 bool Wide = false;
531 for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
532 const MachineOperand &MO = MBBI->getOperand(i);
533 if (!MO.isReg() || MO.isImplicit())
534 continue;
535 unsigned Reg = RegInfo->getSEHRegNum(i: MO.getReg());
536 if (Reg == 15)
537 Reg = 14;
538 if (Reg >= 8 && Reg <= 13)
539 Wide = true;
540 else if (Opc == ARM::t2LDMIA_UPD && Reg == 14)
541 Wide = true;
542 Mask |= 1 << Reg;
543 }
544 if (!Wide) {
545 unsigned NewOpc;
546 switch (Opc) {
547 case ARM::t2LDMIA_RET:
548 NewOpc = ARM::tPOP_RET;
549 break;
550 case ARM::t2LDMIA_UPD:
551 NewOpc = ARM::tPOP;
552 break;
553 case ARM::t2STMDB_UPD:
554 NewOpc = ARM::tPUSH;
555 break;
556 default:
557 llvm_unreachable("");
558 }
559 MachineInstrBuilder NewInstr =
560 BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: NewOpc)).setMIFlags(MBBI->getFlags());
561 for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
562 NewInstr.add(MO: MBBI->getOperand(i));
563 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(I: MBBI, MI: NewInstr);
564 MBB->erase(I: MBBI);
565 MBBI = NewMBBI;
566 }
567 unsigned SEHOpc =
568 (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
569 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: SEHOpc))
570 .addImm(Val: Mask)
571 .addImm(Val: Wide ? 1 : 0)
572 .setMIFlags(Flags);
573 break;
574 }
575 case ARM::VSTMDDB_UPD:
576 case ARM::VLDMDIA_UPD: {
577 int First = -1, Last = 0;
578 for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MBBI->operands(), N: 4)) {
579 unsigned Reg = RegInfo->getSEHRegNum(i: MO.getReg());
580 if (First == -1)
581 First = Reg;
582 Last = Reg;
583 }
584 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveFRegs))
585 .addImm(Val: First)
586 .addImm(Val: Last)
587 .setMIFlags(Flags);
588 break;
589 }
590 case ARM::tSUBspi:
591 case ARM::tADDspi:
592 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_StackAlloc))
593 .addImm(Val: MBBI->getOperand(i: 2).getImm() * 4)
594 .addImm(/*Wide=*/Val: 0)
595 .setMIFlags(Flags);
596 break;
597 case ARM::t2SUBspImm:
598 case ARM::t2SUBspImm12:
599 case ARM::t2ADDspImm:
600 case ARM::t2ADDspImm12:
601 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_StackAlloc))
602 .addImm(Val: MBBI->getOperand(i: 2).getImm())
603 .addImm(/*Wide=*/Val: 1)
604 .setMIFlags(Flags);
605 break;
606
607 case ARM::tMOVr:
608 if (MBBI->getOperand(i: 1).getReg() == ARM::SP &&
609 (Flags & MachineInstr::FrameSetup)) {
610 unsigned Reg = RegInfo->getSEHRegNum(i: MBBI->getOperand(i: 0).getReg());
611 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveSP))
612 .addImm(Val: Reg)
613 .setMIFlags(Flags);
614 } else if (MBBI->getOperand(i: 0).getReg() == ARM::SP &&
615 (Flags & MachineInstr::FrameDestroy)) {
616 unsigned Reg = RegInfo->getSEHRegNum(i: MBBI->getOperand(i: 1).getReg());
617 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveSP))
618 .addImm(Val: Reg)
619 .setMIFlags(Flags);
620 } else {
621 report_fatal_error(reason: "No SEH Opcode for MOV");
622 }
623 break;
624
625 case ARM::tBX_RET:
626 case ARM::TCRETURNri:
627 case ARM::TCRETURNrinotr12:
628 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop_Ret))
629 .addImm(/*Wide=*/Val: 0)
630 .setMIFlags(Flags);
631 break;
632
633 case ARM::TCRETURNdi:
634 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop_Ret))
635 .addImm(/*Wide=*/Val: 1)
636 .setMIFlags(Flags);
637 break;
638 }
639 return MBB->insertAfter(I: MBBI, MI: MIB);
640}
641
642static MachineBasicBlock::iterator
643initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI) {
644 if (MBBI == MBB.begin())
645 return MachineBasicBlock::iterator();
646 return std::prev(x: MBBI);
647}
648
649static void insertSEHRange(MachineBasicBlock &MBB,
650 MachineBasicBlock::iterator Start,
651 const MachineBasicBlock::iterator &End,
652 const ARMBaseInstrInfo &TII, unsigned MIFlags) {
653 if (Start.isValid())
654 Start = std::next(x: Start);
655 else
656 Start = MBB.begin();
657
658 for (auto MI = Start; MI != End;) {
659 auto Next = std::next(x: MI);
660 // Check if this instruction already has got a SEH opcode added. In that
661 // case, don't do this generic mapping.
662 if (Next != End && isSEHInstruction(MI: *Next)) {
663 MI = std::next(x: Next);
664 while (MI != End && isSEHInstruction(MI: *MI))
665 ++MI;
666 continue;
667 }
668 insertSEH(MBBI: MI, TII, Flags: MIFlags);
669 MI = Next;
670 }
671}
672
673static void emitRegPlusImmediate(
674 bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
675 const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
676 unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
677 ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
678 if (isARM)
679 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, BaseReg: SrcReg, NumBytes,
680 Pred, PredReg, TII, MIFlags);
681 else
682 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, BaseReg: SrcReg, NumBytes,
683 Pred, PredReg, TII, MIFlags);
684}
685
686static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
687 MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
688 const ARMBaseInstrInfo &TII, int NumBytes,
689 unsigned MIFlags = MachineInstr::NoFlags,
690 ARMCC::CondCodes Pred = ARMCC::AL,
691 unsigned PredReg = 0) {
692 emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, DestReg: ARM::SP, SrcReg: ARM::SP, NumBytes,
693 MIFlags, Pred, PredReg);
694}
695
696static int sizeOfSPAdjustment(const MachineInstr &MI) {
697 int RegSize;
698 switch (MI.getOpcode()) {
699 case ARM::VSTMDDB_UPD:
700 RegSize = 8;
701 break;
702 case ARM::STMDB_UPD:
703 case ARM::t2STMDB_UPD:
704 RegSize = 4;
705 break;
706 case ARM::t2STR_PRE:
707 case ARM::STR_PRE_IMM:
708 return 4;
709 default:
710 llvm_unreachable("Unknown push or pop like instruction");
711 }
712
713 int count = 0;
714 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
715 // pred) so the list starts at 4.
716 for (int i = MI.getNumOperands() - 1; i >= 4; --i)
717 count += RegSize;
718 return count;
719}
720
721static bool WindowsRequiresStackProbe(const MachineFunction &MF,
722 size_t StackSizeInBytes) {
723 const MachineFrameInfo &MFI = MF.getFrameInfo();
724 const Function &F = MF.getFunction();
725 unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
726
727 StackProbeSize =
728 F.getFnAttributeAsParsedInteger(Kind: "stack-probe-size", Default: StackProbeSize);
729 return (StackSizeInBytes >= StackProbeSize) &&
730 !F.hasFnAttribute(Kind: "no-stack-arg-probe");
731}
732
733namespace {
734
735struct StackAdjustingInsts {
736 struct InstInfo {
737 MachineBasicBlock::iterator I;
738 unsigned SPAdjust;
739 bool BeforeFPSet;
740
741#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
742 void dump() {
743 dbgs() << " " << (BeforeFPSet ? "before-fp " : " ")
744 << "sp-adjust=" << SPAdjust;
745 I->dump();
746 }
747#endif
748 };
749
750 SmallVector<InstInfo, 4> Insts;
751
752 void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
753 bool BeforeFPSet = false) {
754 InstInfo Info = {.I: I, .SPAdjust: SPAdjust, .BeforeFPSet: BeforeFPSet};
755 Insts.push_back(Elt: Info);
756 }
757
758 void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
759 auto Info =
760 llvm::find_if(Range&: Insts, P: [&](InstInfo &Info) { return Info.I == I; });
761 assert(Info != Insts.end() && "invalid sp adjusting instruction");
762 Info->SPAdjust += ExtraBytes;
763 }
764
765 void emitDefCFAOffsets(MachineBasicBlock &MBB, bool HasFP) {
766 CFIInstBuilder CFIBuilder(MBB, MBB.end(), MachineInstr::FrameSetup);
767 unsigned CFAOffset = 0;
768 for (auto &Info : Insts) {
769 if (HasFP && !Info.BeforeFPSet)
770 return;
771
772 CFAOffset += Info.SPAdjust;
773 CFIBuilder.setInsertPoint(std::next(x: Info.I));
774 CFIBuilder.buildDefCFAOffset(Offset: CFAOffset);
775 }
776 }
777
778#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
779 void dump() {
780 dbgs() << "StackAdjustingInsts:\n";
781 for (auto &Info : Insts)
782 Info.dump();
783 }
784#endif
785};
786
787} // end anonymous namespace
788
789/// Emit an instruction sequence that will align the address in
790/// register Reg by zero-ing out the lower bits. For versions of the
791/// architecture that support Neon, this must be done in a single
792/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
793/// single instruction. That function only gets called when optimizing
794/// spilling of D registers on a core with the Neon instruction set
795/// present.
796static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
797 const TargetInstrInfo &TII,
798 MachineBasicBlock &MBB,
799 MachineBasicBlock::iterator MBBI,
800 const DebugLoc &DL, const unsigned Reg,
801 const Align Alignment,
802 const bool MustBeSingleInstruction) {
803 const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
804 const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
805 const unsigned AlignMask = Alignment.value() - 1U;
806 const unsigned NrBitsToZero = Log2(A: Alignment);
807 assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
808 if (!AFI->isThumbFunction()) {
809 // if the BFC instruction is available, use that to zero the lower
810 // bits:
811 // bfc Reg, #0, log2(Alignment)
812 // otherwise use BIC, if the mask to zero the required number of bits
813 // can be encoded in the bic immediate field
814 // bic Reg, Reg, Alignment-1
815 // otherwise, emit
816 // lsr Reg, Reg, log2(Alignment)
817 // lsl Reg, Reg, log2(Alignment)
818 if (CanUseBFC) {
819 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::BFC), DestReg: Reg)
820 .addReg(RegNo: Reg, Flags: RegState::Kill)
821 .addImm(Val: ~AlignMask)
822 .add(MOs: predOps(Pred: ARMCC::AL));
823 } else if (AlignMask <= 255) {
824 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::BICri), DestReg: Reg)
825 .addReg(RegNo: Reg, Flags: RegState::Kill)
826 .addImm(Val: AlignMask)
827 .add(MOs: predOps(Pred: ARMCC::AL))
828 .add(MO: condCodeOp());
829 } else {
830 assert(!MustBeSingleInstruction &&
831 "Shouldn't call emitAligningInstructions demanding a single "
832 "instruction to be emitted for large stack alignment for a target "
833 "without BFC.");
834 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVsi), DestReg: Reg)
835 .addReg(RegNo: Reg, Flags: RegState::Kill)
836 .addImm(Val: ARM_AM::getSORegOpc(ShOp: ARM_AM::lsr, Imm: NrBitsToZero))
837 .add(MOs: predOps(Pred: ARMCC::AL))
838 .add(MO: condCodeOp());
839 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVsi), DestReg: Reg)
840 .addReg(RegNo: Reg, Flags: RegState::Kill)
841 .addImm(Val: ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: NrBitsToZero))
842 .add(MOs: predOps(Pred: ARMCC::AL))
843 .add(MO: condCodeOp());
844 }
845 } else {
846 // Since this is only reached for Thumb-2 targets, the BFC instruction
847 // should always be available.
848 assert(CanUseBFC);
849 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::t2BFC), DestReg: Reg)
850 .addReg(RegNo: Reg, Flags: RegState::Kill)
851 .addImm(Val: ~AlignMask)
852 .add(MOs: predOps(Pred: ARMCC::AL));
853 }
854}
855
856/// We need the offset of the frame pointer relative to other MachineFrameInfo
857/// offsets which are encoded relative to SP at function begin.
858/// See also emitPrologue() for how the FP is set up.
859/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
860/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
861/// this to produce a conservative estimate that we check in an assert() later.
862static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
863 const MachineFunction &MF) {
864 ARMSubtarget::PushPopSplitVariation PushPopSplit =
865 STI.getPushPopSplitVariation(MF);
866 // For Thumb1, push.w isn't available, so the first push will always push
867 // r7 and lr onto the stack first.
868 if (AFI.isThumb1OnlyFunction())
869 return -AFI.getArgRegsSaveSize() - (2 * 4);
870 // This is a conservative estimation: Assume the frame pointer being r7 and
871 // pc("r15") up to r8 getting spilled before (= 8 registers).
872 int MaxRegBytes = 8 * 4;
873 if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
874 // Here, r11 can be stored below all of r4-r15.
875 MaxRegBytes = 11 * 4;
876 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
877 // Here, r11 can be stored below all of r4-r15 plus d8-d15.
878 MaxRegBytes = 11 * 4 + 8 * 8;
879 }
880 int FPCXTSaveSize =
881 (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
882 return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
883}
884
885void ARMFrameLowering::emitPrologue(MachineFunction &MF,
886 MachineBasicBlock &MBB) const {
887 MachineBasicBlock::iterator MBBI = MBB.begin();
888 MachineFrameInfo &MFI = MF.getFrameInfo();
889 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
890 const TargetMachine &TM = MF.getTarget();
891 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
892 const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
893 assert(!AFI->isThumb1OnlyFunction() &&
894 "This emitPrologue does not support Thumb1!");
895 bool isARM = !AFI->isThumbFunction();
896 Align Alignment = STI.getFrameLowering()->getStackAlign();
897 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
898 unsigned NumBytes = MFI.getStackSize();
899 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
900 int FPCXTSaveSize = 0;
901 bool NeedsWinCFI = needsWinCFI(MF);
902 ARMSubtarget::PushPopSplitVariation PushPopSplit =
903 STI.getPushPopSplitVariation(MF);
904
905 LLVM_DEBUG(dbgs() << "Emitting prologue for " << MF.getName() << "\n");
906
907 // Debug location must be unknown since the first debug location is used
908 // to determine the end of the prologue.
909 DebugLoc dl;
910
911 Register FramePtr = RegInfo->getFrameRegister(MF);
912
913 // Determine the sizes of each callee-save spill areas and record which frame
914 // belongs to which callee-save spill areas.
915 unsigned GPRCS1Size = 0, GPRCS2Size = 0, FPStatusSize = 0,
916 DPRCS1Size = 0, GPRCS3Size = 0, DPRCS2Size = 0;
917 int FramePtrSpillFI = 0;
918 int D8SpillFI = 0;
919
920 // All calls are tail calls in GHC calling conv, and functions have no
921 // prologue/epilogue.
922 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
923 return;
924
925 StackAdjustingInsts DefCFAOffsetCandidates;
926 bool HasFP = hasFP(MF);
927
928 if (!AFI->hasStackFrame() &&
929 (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, StackSizeInBytes: NumBytes))) {
930 if (NumBytes != 0) {
931 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -NumBytes,
932 MIFlags: MachineInstr::FrameSetup);
933 DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: NumBytes, BeforeFPSet: true);
934 }
935 if (!NeedsWinCFI)
936 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, HasFP);
937 if (NeedsWinCFI && MBBI != MBB.begin()) {
938 insertSEHRange(MBB, Start: {}, End: MBBI, TII, MIFlags: MachineInstr::FrameSetup);
939 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_PrologEnd))
940 .setMIFlag(MachineInstr::FrameSetup);
941 MF.setHasWinCFI(true);
942 }
943 return;
944 }
945
946 // Determine spill area sizes, and some important frame indices.
947 SpillArea FramePtrSpillArea = SpillArea::GPRCS1;
948 bool BeforeFPPush = true;
949 for (const CalleeSavedInfo &I : CSI) {
950 MCRegister Reg = I.getReg();
951 int FI = I.getFrameIdx();
952
953 SpillArea Area = getSpillArea(Reg, Variation: PushPopSplit,
954 NumAlignedDPRCS2Regs: AFI->getNumAlignedDPRCS2Regs(), RegInfo);
955
956 if (Reg == FramePtr.asMCReg()) {
957 FramePtrSpillFI = FI;
958 FramePtrSpillArea = Area;
959 }
960 if (Reg == ARM::D8)
961 D8SpillFI = FI;
962
963 switch (Area) {
964 case SpillArea::FPCXT:
965 FPCXTSaveSize += 4;
966 break;
967 case SpillArea::GPRCS1:
968 GPRCS1Size += 4;
969 break;
970 case SpillArea::GPRCS2:
971 GPRCS2Size += 4;
972 break;
973 case SpillArea::FPStatus:
974 FPStatusSize += 4;
975 break;
976 case SpillArea::DPRCS1:
977 DPRCS1Size += 8;
978 break;
979 case SpillArea::GPRCS3:
980 GPRCS3Size += 4;
981 break;
982 case SpillArea::DPRCS2:
983 DPRCS2Size += 8;
984 break;
985 }
986 }
987
988 MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push,
989 DPRCS1Push, GPRCS3Push;
990
991 // Move past the PAC computation.
992 if (AFI->shouldSignReturnAddress())
993 LastPush = MBBI++;
994
995 // Move past FPCXT area.
996 if (FPCXTSaveSize > 0) {
997 LastPush = MBBI++;
998 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: FPCXTSaveSize, BeforeFPSet: BeforeFPPush);
999 }
1000
1001 // Allocate the vararg register save area.
1002 if (ArgRegsSaveSize) {
1003 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -ArgRegsSaveSize,
1004 MIFlags: MachineInstr::FrameSetup);
1005 LastPush = std::prev(x: MBBI);
1006 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: ArgRegsSaveSize, BeforeFPSet: BeforeFPPush);
1007 }
1008
1009 // Move past area 1.
1010 if (GPRCS1Size > 0) {
1011 GPRCS1Push = LastPush = MBBI++;
1012 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS1Size, BeforeFPSet: BeforeFPPush);
1013 if (FramePtrSpillArea == SpillArea::GPRCS1)
1014 BeforeFPPush = false;
1015 }
1016
1017 // Determine starting offsets of spill areas. These offsets are all positive
1018 // offsets from the bottom of the lowest-addressed callee-save area
1019 // (excluding DPRCS2, which is th the re-aligned stack region) to the bottom
1020 // of the spill area in question.
1021 unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
1022 unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
1023 unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
1024 unsigned FPStatusOffset = GPRCS2Offset - FPStatusSize;
1025
1026 Align DPRAlign = DPRCS1Size ? std::min(a: Align(8), b: Alignment) : Align(4);
1027 unsigned DPRGapSize = (ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1028 GPRCS2Size + FPStatusSize) %
1029 DPRAlign.value();
1030
1031 unsigned DPRCS1Offset = FPStatusOffset - DPRGapSize - DPRCS1Size;
1032
1033 if (HasFP) {
1034 // Offset from the CFA to the saved frame pointer, will be negative.
1035 [[maybe_unused]] int FPOffset = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI);
1036 LLVM_DEBUG(dbgs() << "FramePtrSpillFI: " << FramePtrSpillFI
1037 << ", FPOffset: " << FPOffset << "\n");
1038 assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
1039 "Max FP estimation is wrong");
1040 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
1041 NumBytes);
1042 }
1043 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
1044 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
1045 AFI->setDPRCalleeSavedArea1Offset(DPRCS1Offset);
1046
1047 // Move past area 2.
1048 if (GPRCS2Size > 0) {
1049 assert(PushPopSplit != ARMSubtarget::SplitR11WindowsSEH);
1050 GPRCS2Push = LastPush = MBBI++;
1051 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS2Size, BeforeFPSet: BeforeFPPush);
1052 if (FramePtrSpillArea == SpillArea::GPRCS2)
1053 BeforeFPPush = false;
1054 }
1055
1056 // Move past FP status save area.
1057 if (FPStatusSize > 0) {
1058 while (MBBI != MBB.end()) {
1059 unsigned Opc = MBBI->getOpcode();
1060 if (Opc == ARM::VMRS || Opc == ARM::VMRS_FPEXC)
1061 MBBI++;
1062 else
1063 break;
1064 }
1065 LastPush = MBBI++;
1066 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: FPStatusSize);
1067 }
1068
1069 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
1070 // .cfi_offset operations will reflect that.
1071 if (DPRGapSize) {
1072 assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
1073 if (LastPush != MBB.end() &&
1074 tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*LastPush, NumBytes: DPRGapSize))
1075 DefCFAOffsetCandidates.addExtraBytes(I: LastPush, ExtraBytes: DPRGapSize);
1076 else {
1077 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -DPRGapSize,
1078 MIFlags: MachineInstr::FrameSetup);
1079 DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: DPRGapSize, BeforeFPSet: BeforeFPPush);
1080 }
1081 }
1082
1083 // Move past DPRCS1Size.
1084 if (DPRCS1Size > 0) {
1085 // Since vpush register list cannot have gaps, there may be multiple vpush
1086 // instructions in the prologue.
1087 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
1088 DefCFAOffsetCandidates.addInst(I: MBBI, SPAdjust: sizeOfSPAdjustment(MI: *MBBI),
1089 BeforeFPSet: BeforeFPPush);
1090 DPRCS1Push = LastPush = MBBI++;
1091 }
1092 }
1093
1094 // Move past the aligned DPRCS2 area.
1095 if (DPRCS2Size > 0) {
1096 MBBI = skipAlignedDPRCS2Spills(MI: MBBI, NumAlignedDPRCS2Regs: AFI->getNumAlignedDPRCS2Regs());
1097 // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
1098 // leaves the stack pointer pointing to the DPRCS2 area.
1099 //
1100 // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
1101 NumBytes += MFI.getObjectOffset(ObjectIdx: D8SpillFI);
1102 } else
1103 NumBytes = DPRCS1Offset;
1104
1105 // Move GPRCS3, if using using SplitR11WindowsSEH.
1106 if (GPRCS3Size > 0) {
1107 assert(PushPopSplit == ARMSubtarget::SplitR11WindowsSEH);
1108 GPRCS3Push = LastPush = MBBI++;
1109 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS3Size, BeforeFPSet: BeforeFPPush);
1110 if (FramePtrSpillArea == SpillArea::GPRCS3)
1111 BeforeFPPush = false;
1112 }
1113
1114 bool NeedsWinCFIStackAlloc = NeedsWinCFI;
1115 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH && HasFP)
1116 NeedsWinCFIStackAlloc = false;
1117
1118 if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, StackSizeInBytes: NumBytes)) {
1119 uint32_t NumWords = NumBytes >> 2;
1120
1121 if (NumWords < 65536) {
1122 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVi16), DestReg: ARM::R4)
1123 .addImm(Val: NumWords)
1124 .setMIFlags(MachineInstr::FrameSetup)
1125 .add(MOs: predOps(Pred: ARMCC::AL));
1126 } else {
1127 // Split into two instructions here, instead of using t2MOVi32imm,
1128 // to allow inserting accurate SEH instructions (including accurate
1129 // instruction size for each of them).
1130 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVi16), DestReg: ARM::R4)
1131 .addImm(Val: NumWords & 0xffff)
1132 .setMIFlags(MachineInstr::FrameSetup)
1133 .add(MOs: predOps(Pred: ARMCC::AL));
1134 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVTi16), DestReg: ARM::R4)
1135 .addReg(RegNo: ARM::R4)
1136 .addImm(Val: NumWords >> 16)
1137 .setMIFlags(MachineInstr::FrameSetup)
1138 .add(MOs: predOps(Pred: ARMCC::AL));
1139 }
1140
1141 const ARMTargetLowering *TLI = STI.getTargetLowering();
1142 RTLIB::LibcallImpl ChkStkLibcall = TLI->getLibcallImpl(Call: RTLIB::STACK_PROBE);
1143 if (ChkStkLibcall == RTLIB::Unsupported)
1144 reportFatalUsageError(reason: "no available implementation of __chkstk");
1145 const char *ChkStk = TLI->getLibcallImplName(Call: ChkStkLibcall).data();
1146
1147 switch (TM.getCodeModel()) {
1148 case CodeModel::Tiny:
1149 llvm_unreachable("Tiny code model not available on ARM.");
1150 case CodeModel::Small:
1151 case CodeModel::Medium:
1152 case CodeModel::Kernel:
1153 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tBL))
1154 .add(MOs: predOps(Pred: ARMCC::AL))
1155 .addExternalSymbol(FnName: ChkStk)
1156 .addReg(RegNo: ARM::R4, Flags: RegState::Implicit)
1157 .setMIFlags(MachineInstr::FrameSetup);
1158 break;
1159 case CodeModel::Large:
1160 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVi32imm), DestReg: ARM::R12)
1161 .addExternalSymbol(FnName: ChkStk)
1162 .setMIFlags(MachineInstr::FrameSetup);
1163
1164 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tBLXr))
1165 .add(MOs: predOps(Pred: ARMCC::AL))
1166 .addReg(RegNo: ARM::R12, Flags: RegState::Kill)
1167 .addReg(RegNo: ARM::R4, Flags: RegState::Implicit)
1168 .setMIFlags(MachineInstr::FrameSetup);
1169 break;
1170 }
1171
1172 MachineInstrBuilder Instr, SEH;
1173 Instr = BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2SUBrr), DestReg: ARM::SP)
1174 .addReg(RegNo: ARM::SP, Flags: RegState::Kill)
1175 .addReg(RegNo: ARM::R4, Flags: RegState::Kill)
1176 .setMIFlags(MachineInstr::FrameSetup)
1177 .add(MOs: predOps(Pred: ARMCC::AL))
1178 .add(MO: condCodeOp());
1179 if (NeedsWinCFIStackAlloc) {
1180 SEH = BuildMI(MF, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_StackAlloc))
1181 .addImm(Val: NumBytes)
1182 .addImm(/*Wide=*/Val: 1)
1183 .setMIFlags(MachineInstr::FrameSetup);
1184 MBB.insertAfter(I: Instr, MI: SEH);
1185 }
1186 NumBytes = 0;
1187 }
1188
1189 if (NumBytes) {
1190 // Adjust SP after all the callee-save spills.
1191 if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
1192 tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*LastPush, NumBytes))
1193 DefCFAOffsetCandidates.addExtraBytes(I: LastPush, ExtraBytes: NumBytes);
1194 else {
1195 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -NumBytes,
1196 MIFlags: MachineInstr::FrameSetup);
1197 DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: NumBytes);
1198 }
1199
1200 if (HasFP && isARM)
1201 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1202 // Note it's not safe to do this in Thumb2 mode because it would have
1203 // taken two instructions:
1204 // mov sp, r7
1205 // sub sp, #24
1206 // If an interrupt is taken between the two instructions, then sp is in
1207 // an inconsistent state (pointing to the middle of callee-saved area).
1208 // The interrupt handler can end up clobbering the registers.
1209 AFI->setShouldRestoreSPFromFP(true);
1210 }
1211
1212 // Set FP to point to the stack slot that contains the previous FP.
1213 // For iOS, FP is R7, which has now been stored in spill area 1.
1214 // Otherwise, if this is not iOS, all the callee-saved registers go
1215 // into spill area 1, including the FP in R11. In either case, it
1216 // is in area one and the adjustment needs to take place just after
1217 // that push.
1218 MachineBasicBlock::iterator AfterPush;
1219 if (HasFP) {
1220 MachineBasicBlock::iterator FPPushInst;
1221 // Offset from SP immediately after the push which saved the FP to the FP
1222 // save slot.
1223 int64_t FPOffsetAfterPush;
1224 switch (FramePtrSpillArea) {
1225 case SpillArea::GPRCS1:
1226 FPPushInst = GPRCS1Push;
1227 FPOffsetAfterPush = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
1228 ArgRegsSaveSize + FPCXTSaveSize +
1229 sizeOfSPAdjustment(MI: *FPPushInst);
1230 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS1, offset "
1231 << FPOffsetAfterPush << " after that push\n");
1232 break;
1233 case SpillArea::GPRCS2:
1234 FPPushInst = GPRCS2Push;
1235 FPOffsetAfterPush = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
1236 ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1237 sizeOfSPAdjustment(MI: *FPPushInst);
1238 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS2, offset "
1239 << FPOffsetAfterPush << " after that push\n");
1240 break;
1241 case SpillArea::GPRCS3:
1242 FPPushInst = GPRCS3Push;
1243 FPOffsetAfterPush = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
1244 ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1245 FPStatusSize + GPRCS2Size + DPRCS1Size + DPRGapSize +
1246 sizeOfSPAdjustment(MI: *FPPushInst);
1247 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS3, offset "
1248 << FPOffsetAfterPush << " after that push\n");
1249 break;
1250 default:
1251 llvm_unreachable("frame pointer in unknown spill area");
1252 break;
1253 }
1254 AfterPush = std::next(x: FPPushInst);
1255 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1256 assert(FPOffsetAfterPush == 0);
1257
1258 // Emit the MOV or ADD to set up the frame pointer register.
1259 emitRegPlusImmediate(isARM: !AFI->isThumbFunction(), MBB, MBBI&: AfterPush, dl, TII,
1260 DestReg: FramePtr, SrcReg: ARM::SP, NumBytes: FPOffsetAfterPush,
1261 MIFlags: MachineInstr::FrameSetup);
1262
1263 if (!NeedsWinCFI) {
1264 // Emit DWARF info to find the CFA using the frame pointer from this
1265 // point onward.
1266 CFIInstBuilder CFIBuilder(MBB, AfterPush, MachineInstr::FrameSetup);
1267 if (FPOffsetAfterPush != 0)
1268 CFIBuilder.buildDefCFA(Reg: FramePtr, Offset: -MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI));
1269 else
1270 CFIBuilder.buildDefCFARegister(Reg: FramePtr);
1271 }
1272 }
1273
1274 // Emit a SEH opcode indicating the prologue end. The rest of the prologue
1275 // instructions below don't need to be replayed to unwind the stack.
1276 if (NeedsWinCFI && MBBI != MBB.begin()) {
1277 MachineBasicBlock::iterator End = MBBI;
1278 if (HasFP && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1279 End = AfterPush;
1280 insertSEHRange(MBB, Start: {}, End, TII, MIFlags: MachineInstr::FrameSetup);
1281 BuildMI(BB&: MBB, I: End, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_PrologEnd))
1282 .setMIFlag(MachineInstr::FrameSetup);
1283 MF.setHasWinCFI(true);
1284 }
1285
1286 // Now that the prologue's actual instructions are finalised, we can insert
1287 // the necessary DWARF cf instructions to describe the situation. Start by
1288 // recording where each register ended up:
1289 if (!NeedsWinCFI) {
1290 for (const auto &Entry : reverse(C: CSI)) {
1291 MCRegister Reg = Entry.getReg();
1292 int FI = Entry.getFrameIdx();
1293 MachineBasicBlock::iterator CFIPos;
1294 switch (getSpillArea(Reg, Variation: PushPopSplit, NumAlignedDPRCS2Regs: AFI->getNumAlignedDPRCS2Regs(),
1295 RegInfo)) {
1296 case SpillArea::GPRCS1:
1297 CFIPos = std::next(x: GPRCS1Push);
1298 break;
1299 case SpillArea::GPRCS2:
1300 CFIPos = std::next(x: GPRCS2Push);
1301 break;
1302 case SpillArea::DPRCS1:
1303 CFIPos = std::next(x: DPRCS1Push);
1304 break;
1305 case SpillArea::GPRCS3:
1306 CFIPos = std::next(x: GPRCS3Push);
1307 break;
1308 case SpillArea::FPStatus:
1309 case SpillArea::FPCXT:
1310 case SpillArea::DPRCS2:
1311 // FPCXT and DPRCS2 are not represented in the DWARF info.
1312 break;
1313 }
1314
1315 if (CFIPos.isValid()) {
1316 CFIInstBuilder(MBB, CFIPos, MachineInstr::FrameSetup)
1317 .buildOffset(Reg: Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg,
1318 Offset: MFI.getObjectOffset(ObjectIdx: FI));
1319 }
1320 }
1321 }
1322
1323 // Now we can emit descriptions of where the canonical frame address was
1324 // throughout the process. If we have a frame pointer, it takes over the job
1325 // half-way through, so only the first few .cfi_def_cfa_offset instructions
1326 // actually get emitted.
1327 if (!NeedsWinCFI) {
1328 LLVM_DEBUG(DefCFAOffsetCandidates.dump());
1329 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, HasFP);
1330 }
1331
1332 if (STI.isTargetELF() && hasFP(MF))
1333 MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
1334 AFI->getFramePtrSpillOffset());
1335
1336 AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1337 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1338 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1339 AFI->setFPStatusSavesSize(FPStatusSize);
1340 AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1341 AFI->setDPRCalleeSavedArea1Size(DPRCS1Size);
1342 AFI->setGPRCalleeSavedArea3Size(GPRCS3Size);
1343
1344 // If we need dynamic stack realignment, do it here. Be paranoid and make
1345 // sure if we also have VLAs, we have a base pointer for frame access.
1346 // If aligned NEON registers were spilled, the stack has already been
1347 // realigned.
1348 if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1349 Align MaxAlign = MFI.getMaxAlign();
1350 assert(!AFI->isThumb1OnlyFunction());
1351 if (!AFI->isThumbFunction()) {
1352 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, DL: dl, Reg: ARM::SP, Alignment: MaxAlign,
1353 MustBeSingleInstruction: false);
1354 } else {
1355 // We cannot use sp as source/dest register here, thus we're using r4 to
1356 // perform the calculations. We're emitting the following sequence:
1357 // mov r4, sp
1358 // -- use emitAligningInstructions to produce best sequence to zero
1359 // -- out lower bits in r4
1360 // mov sp, r4
1361 // FIXME: It will be better just to find spare register here.
1362 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::R4)
1363 .addReg(RegNo: ARM::SP, Flags: RegState::Kill)
1364 .add(MOs: predOps(Pred: ARMCC::AL));
1365 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, DL: dl, Reg: ARM::R4, Alignment: MaxAlign,
1366 MustBeSingleInstruction: false);
1367 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP)
1368 .addReg(RegNo: ARM::R4, Flags: RegState::Kill)
1369 .add(MOs: predOps(Pred: ARMCC::AL));
1370 }
1371
1372 AFI->setShouldRestoreSPFromFP(true);
1373 }
1374
1375 // If we need a base pointer, set it up here. It's whatever the value
1376 // of the stack pointer is at this point. Any variable size objects
1377 // will be allocated after this, so we can still use the base pointer
1378 // to reference locals.
1379 // FIXME: Clarify FrameSetup flags here.
1380 if (RegInfo->hasBasePointer(MF)) {
1381 if (isARM)
1382 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::MOVr), DestReg: RegInfo->getBaseRegister())
1383 .addReg(RegNo: ARM::SP)
1384 .add(MOs: predOps(Pred: ARMCC::AL))
1385 .add(MO: condCodeOp());
1386 else
1387 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: RegInfo->getBaseRegister())
1388 .addReg(RegNo: ARM::SP)
1389 .add(MOs: predOps(Pred: ARMCC::AL));
1390 }
1391
1392 // If the frame has variable sized objects then the epilogue must restore
1393 // the sp from fp. We can assume there's an FP here since hasFP already
1394 // checks for hasVarSizedObjects.
1395 if (MFI.hasVarSizedObjects())
1396 AFI->setShouldRestoreSPFromFP(true);
1397}
1398
1399void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
1400 MachineBasicBlock &MBB) const {
1401 MachineFrameInfo &MFI = MF.getFrameInfo();
1402 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1403 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1404 const ARMBaseInstrInfo &TII =
1405 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1406 assert(!AFI->isThumb1OnlyFunction() &&
1407 "This emitEpilogue does not support Thumb1!");
1408 bool isARM = !AFI->isThumbFunction();
1409 ARMSubtarget::PushPopSplitVariation PushPopSplit =
1410 STI.getPushPopSplitVariation(MF);
1411
1412 LLVM_DEBUG(dbgs() << "Emitting epilogue for " << MF.getName() << "\n");
1413
1414 // Amount of stack space we reserved next to incoming args for either
1415 // varargs registers or stack arguments in tail calls made by this function.
1416 unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1417
1418 // How much of the stack used by incoming arguments this function is expected
1419 // to restore in this particular epilogue.
1420 int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1421 int NumBytes = (int)MFI.getStackSize();
1422 Register FramePtr = RegInfo->getFrameRegister(MF);
1423
1424 // All calls are tail calls in GHC calling conv, and functions have no
1425 // prologue/epilogue.
1426 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1427 return;
1428
1429 // First put ourselves on the first (from top) terminator instructions.
1430 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1431 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
1432
1433 MachineBasicBlock::iterator RangeStart;
1434 if (!AFI->hasStackFrame()) {
1435 if (MF.hasWinCFI()) {
1436 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_EpilogStart))
1437 .setMIFlag(MachineInstr::FrameDestroy);
1438 RangeStart = initMBBRange(MBB, MBBI);
1439 }
1440
1441 if (NumBytes + IncomingArgStackToRestore != 0)
1442 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1443 NumBytes: NumBytes + IncomingArgStackToRestore,
1444 MIFlags: MachineInstr::FrameDestroy);
1445 } else {
1446 // Unwind MBBI to point to first LDR / VLDRD.
1447 if (MBBI != MBB.begin()) {
1448 do {
1449 --MBBI;
1450 } while (MBBI != MBB.begin() &&
1451 MBBI->getFlag(Flag: MachineInstr::FrameDestroy));
1452 if (!MBBI->getFlag(Flag: MachineInstr::FrameDestroy))
1453 ++MBBI;
1454 }
1455
1456 if (MF.hasWinCFI()) {
1457 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_EpilogStart))
1458 .setMIFlag(MachineInstr::FrameDestroy);
1459 RangeStart = initMBBRange(MBB, MBBI);
1460 }
1461
1462 // Move SP to start of FP callee save spill area.
1463 NumBytes -=
1464 (ReservedArgStack + AFI->getFPCXTSaveAreaSize() +
1465 AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() +
1466 AFI->getFPStatusSavesSize() + AFI->getDPRCalleeSavedGapSize() +
1467 AFI->getDPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea3Size());
1468
1469 // Reset SP based on frame pointer only if the stack frame extends beyond
1470 // frame pointer stack slot or target is ELF and the function has FP.
1471 if (AFI->shouldRestoreSPFromFP()) {
1472 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1473 if (NumBytes) {
1474 if (isARM)
1475 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg: ARM::SP, BaseReg: FramePtr, NumBytes: -NumBytes,
1476 Pred: ARMCC::AL, PredReg: 0, TII,
1477 MIFlags: MachineInstr::FrameDestroy);
1478 else {
1479 // It's not possible to restore SP from FP in a single instruction.
1480 // For iOS, this looks like:
1481 // mov sp, r7
1482 // sub sp, #24
1483 // This is bad, if an interrupt is taken after the mov, sp is in an
1484 // inconsistent state.
1485 // Use the first callee-saved register as a scratch register.
1486 assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1487 "No scratch register to restore SP from FP!");
1488 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg: ARM::R4, BaseReg: FramePtr, NumBytes: -NumBytes,
1489 Pred: ARMCC::AL, PredReg: 0, TII, MIFlags: MachineInstr::FrameDestroy);
1490 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP)
1491 .addReg(RegNo: ARM::R4)
1492 .add(MOs: predOps(Pred: ARMCC::AL))
1493 .setMIFlag(MachineInstr::FrameDestroy);
1494 }
1495 } else {
1496 // Thumb2 or ARM.
1497 if (isARM)
1498 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::MOVr), DestReg: ARM::SP)
1499 .addReg(RegNo: FramePtr)
1500 .add(MOs: predOps(Pred: ARMCC::AL))
1501 .add(MO: condCodeOp())
1502 .setMIFlag(MachineInstr::FrameDestroy);
1503 else
1504 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP)
1505 .addReg(RegNo: FramePtr)
1506 .add(MOs: predOps(Pred: ARMCC::AL))
1507 .setMIFlag(MachineInstr::FrameDestroy);
1508 }
1509 } else if (NumBytes &&
1510 !tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*MBBI, NumBytes))
1511 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1512 MIFlags: MachineInstr::FrameDestroy);
1513
1514 // Increment past our save areas.
1515 if (AFI->getGPRCalleeSavedArea3Size()) {
1516 assert(PushPopSplit == ARMSubtarget::SplitR11WindowsSEH);
1517 (void)PushPopSplit;
1518 MBBI++;
1519 }
1520
1521 if (MBBI != MBB.end() && AFI->getDPRCalleeSavedArea1Size()) {
1522 MBBI++;
1523 // Since vpop register list cannot have gaps, there may be multiple vpop
1524 // instructions in the epilogue.
1525 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
1526 MBBI++;
1527 }
1528 if (AFI->getDPRCalleeSavedGapSize()) {
1529 assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
1530 "unexpected DPR alignment gap");
1531 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: AFI->getDPRCalleeSavedGapSize(),
1532 MIFlags: MachineInstr::FrameDestroy);
1533 }
1534
1535 if (AFI->getGPRCalleeSavedArea2Size()) {
1536 assert(PushPopSplit != ARMSubtarget::SplitR11WindowsSEH);
1537 (void)PushPopSplit;
1538 MBBI++;
1539 }
1540 if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
1541
1542 if (ReservedArgStack || IncomingArgStackToRestore) {
1543 assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&
1544 "attempting to restore negative stack amount");
1545 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1546 NumBytes: ReservedArgStack + IncomingArgStackToRestore,
1547 MIFlags: MachineInstr::FrameDestroy);
1548 }
1549
1550 // Validate PAC, It should have been already popped into R12. For CMSE entry
1551 // function, the validation instruction is emitted during expansion of the
1552 // tBXNS_RET, since the validation must use the value of SP at function
1553 // entry, before saving, resp. after restoring, FPCXTNS.
1554 if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
1555 BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc(), MCID: STI.getInstrInfo()->get(Opcode: ARM::t2AUT));
1556 }
1557
1558 if (MF.hasWinCFI()) {
1559 insertSEHRange(MBB, Start: RangeStart, End: MBB.end(), TII, MIFlags: MachineInstr::FrameDestroy);
1560 BuildMI(BB&: MBB, I: MBB.end(), MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_EpilogEnd))
1561 .setMIFlag(MachineInstr::FrameDestroy);
1562 }
1563}
1564
1565/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1566/// debug info. It's the same as what we use for resolving the code-gen
1567/// references for now. FIXME: This can go wrong when references are
1568/// SP-relative and simple call frames aren't used.
1569StackOffset ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1570 int FI,
1571 Register &FrameReg) const {
1572 return StackOffset::getFixed(Fixed: ResolveFrameIndexReference(MF, FI, FrameReg, SPAdj: 0));
1573}
1574
1575int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
1576 int FI, Register &FrameReg,
1577 int SPAdj) const {
1578 const MachineFrameInfo &MFI = MF.getFrameInfo();
1579 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1580 MF.getSubtarget().getRegisterInfo());
1581 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1582 int Offset = MFI.getObjectOffset(ObjectIdx: FI) + MFI.getStackSize();
1583 int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1584 bool isFixed = MFI.isFixedObjectIndex(ObjectIdx: FI);
1585
1586 FrameReg = ARM::SP;
1587 Offset += SPAdj;
1588
1589 // SP can move around if there are allocas. We may also lose track of SP
1590 // when emergency spilling inside a non-reserved call frame setup.
1591 bool hasMovingSP = !hasReservedCallFrame(MF);
1592
1593 // When dynamically realigning the stack, use the frame pointer for
1594 // parameters, and the stack/base pointer for locals.
1595 if (RegInfo->hasStackRealignment(MF)) {
1596 assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1597 if (isFixed) {
1598 FrameReg = RegInfo->getFrameRegister(MF);
1599 Offset = FPOffset;
1600 } else if (hasMovingSP) {
1601 assert(RegInfo->hasBasePointer(MF) &&
1602 "VLAs and dynamic stack alignment, but missing base pointer!");
1603 FrameReg = RegInfo->getBaseRegister();
1604 Offset -= SPAdj;
1605 }
1606 return Offset;
1607 }
1608
1609 // If there is a frame pointer, use it when we can.
1610 if (hasFP(MF) && AFI->hasStackFrame()) {
1611 // Use frame pointer to reference fixed objects. Use it for locals if
1612 // there are VLAs (and thus the SP isn't reliable as a base).
1613 if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1614 FrameReg = RegInfo->getFrameRegister(MF);
1615 return FPOffset;
1616 } else if (hasMovingSP) {
1617 assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1618 if (AFI->isThumb2Function()) {
1619 // Try to use the frame pointer if we can, else use the base pointer
1620 // since it's available. This is handy for the emergency spill slot, in
1621 // particular.
1622 if (FPOffset >= -255 && FPOffset < 0) {
1623 FrameReg = RegInfo->getFrameRegister(MF);
1624 return FPOffset;
1625 }
1626 }
1627 } else if (AFI->isThumbFunction()) {
1628 // Prefer SP to base pointer, if the offset is suitably aligned and in
1629 // range as the effective range of the immediate offset is bigger when
1630 // basing off SP.
1631 // Use add <rd>, sp, #<imm8>
1632 // ldr <rd>, [sp, #<imm8>]
1633 if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
1634 return Offset;
1635 // In Thumb2 mode, the negative offset is very limited. Try to avoid
1636 // out of range references. ldr <rt>,[<rn>, #-<imm8>]
1637 if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
1638 FrameReg = RegInfo->getFrameRegister(MF);
1639 return FPOffset;
1640 }
1641 } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
1642 // Otherwise, use SP or FP, whichever is closer to the stack slot.
1643 FrameReg = RegInfo->getFrameRegister(MF);
1644 return FPOffset;
1645 }
1646 }
1647 // Use the base pointer if we have one.
1648 // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1649 // That can happen if we forced a base pointer for a large call frame.
1650 if (RegInfo->hasBasePointer(MF)) {
1651 FrameReg = RegInfo->getBaseRegister();
1652 Offset -= SPAdj;
1653 }
1654 return Offset;
1655}
1656
1657void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1658 MachineBasicBlock::iterator MI,
1659 ArrayRef<CalleeSavedInfo> CSI,
1660 unsigned StmOpc, unsigned StrOpc,
1661 bool NoGap,
1662 function_ref<bool(unsigned)> Func) const {
1663 MachineFunction &MF = *MBB.getParent();
1664 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1665 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1666
1667 DebugLoc DL;
1668
1669 using RegAndKill = std::pair<unsigned, bool>;
1670
1671 SmallVector<RegAndKill, 4> Regs;
1672 unsigned i = CSI.size();
1673 while (i != 0) {
1674 unsigned LastReg = 0;
1675 for (; i != 0; --i) {
1676 MCRegister Reg = CSI[i-1].getReg();
1677 if (!Func(Reg))
1678 continue;
1679
1680 const MachineRegisterInfo &MRI = MF.getRegInfo();
1681 bool isLiveIn = MRI.isLiveIn(Reg);
1682 if (!isLiveIn && !MRI.isReserved(PhysReg: Reg))
1683 MBB.addLiveIn(PhysReg: Reg);
1684 // If NoGap is true, push consecutive registers and then leave the rest
1685 // for other instructions. e.g.
1686 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1687 if (NoGap && LastReg && LastReg != Reg-1)
1688 break;
1689 LastReg = Reg;
1690 // Do not set a kill flag on values that are also marked as live-in. This
1691 // happens with the @llvm-returnaddress intrinsic and with arguments
1692 // passed in callee saved registers.
1693 // Omitting the kill flags is conservatively correct even if the live-in
1694 // is not used after all.
1695 Regs.push_back(Elt: std::make_pair(x&: Reg, /*isKill=*/y: !isLiveIn));
1696 }
1697
1698 if (Regs.empty())
1699 continue;
1700
1701 llvm::sort(C&: Regs, Comp: [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1702 return TRI.getEncodingValue(Reg: LHS.first) < TRI.getEncodingValue(Reg: RHS.first);
1703 });
1704
1705 if (Regs.size() > 1 || StrOpc== 0) {
1706 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: StmOpc), DestReg: ARM::SP)
1707 .addReg(RegNo: ARM::SP)
1708 .setMIFlags(MachineInstr::FrameSetup)
1709 .add(MOs: predOps(Pred: ARMCC::AL));
1710 for (const auto &[Reg, Kill] : Regs)
1711 MIB.addReg(RegNo: Reg, Flags: getKillRegState(B: Kill));
1712 } else if (Regs.size() == 1) {
1713 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: StrOpc), DestReg: ARM::SP)
1714 .addReg(RegNo: Regs[0].first, Flags: getKillRegState(B: Regs[0].second))
1715 .addReg(RegNo: ARM::SP)
1716 .setMIFlags(MachineInstr::FrameSetup)
1717 .addImm(Val: -4)
1718 .add(MOs: predOps(Pred: ARMCC::AL));
1719 }
1720 Regs.clear();
1721
1722 // Put any subsequent vpush instructions before this one: they will refer to
1723 // higher register numbers so need to be pushed first in order to preserve
1724 // monotonicity.
1725 if (MI != MBB.begin())
1726 --MI;
1727 }
1728}
1729
1730void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1731 MachineBasicBlock::iterator MI,
1732 MutableArrayRef<CalleeSavedInfo> CSI,
1733 unsigned LdmOpc, unsigned LdrOpc,
1734 bool isVarArg, bool NoGap,
1735 function_ref<bool(unsigned)> Func) const {
1736 MachineFunction &MF = *MBB.getParent();
1737 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1738 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1739 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1740 bool hasPAC = AFI->shouldSignReturnAddress();
1741 DebugLoc DL;
1742 bool isTailCall = false;
1743 bool isInterrupt = false;
1744 bool isTrap = false;
1745 bool isCmseEntry = false;
1746 ARMSubtarget::PushPopSplitVariation PushPopSplit =
1747 STI.getPushPopSplitVariation(MF);
1748 if (MBB.end() != MI) {
1749 DL = MI->getDebugLoc();
1750 unsigned RetOpcode = MI->getOpcode();
1751 isTailCall =
1752 (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri ||
1753 RetOpcode == ARM::TCRETURNrinotr12);
1754 isInterrupt =
1755 RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1756 isTrap = RetOpcode == ARM::TRAP || RetOpcode == ARM::tTRAP;
1757 isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
1758 }
1759
1760 SmallVector<unsigned, 4> Regs;
1761 unsigned i = CSI.size();
1762 while (i != 0) {
1763 unsigned LastReg = 0;
1764 bool DeleteRet = false;
1765 for (; i != 0; --i) {
1766 CalleeSavedInfo &Info = CSI[i-1];
1767 MCRegister Reg = Info.getReg();
1768 if (!Func(Reg))
1769 continue;
1770
1771 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1772 !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
1773 STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1774 (PushPopSplit != ARMSubtarget::SplitR11WindowsSEH &&
1775 PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) {
1776 Reg = ARM::PC;
1777 // Fold the return instruction into the LDM.
1778 DeleteRet = true;
1779 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1780 }
1781
1782 // If NoGap is true, pop consecutive registers and then leave the rest
1783 // for other instructions. e.g.
1784 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1785 if (NoGap && LastReg && LastReg != Reg-1)
1786 break;
1787
1788 LastReg = Reg;
1789 Regs.push_back(Elt: Reg);
1790 }
1791
1792 if (Regs.empty())
1793 continue;
1794
1795 llvm::sort(C&: Regs, Comp: [&](unsigned LHS, unsigned RHS) {
1796 return TRI.getEncodingValue(Reg: LHS) < TRI.getEncodingValue(Reg: RHS);
1797 });
1798
1799 if (Regs.size() > 1 || LdrOpc == 0) {
1800 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: LdmOpc), DestReg: ARM::SP)
1801 .addReg(RegNo: ARM::SP)
1802 .add(MOs: predOps(Pred: ARMCC::AL))
1803 .setMIFlags(MachineInstr::FrameDestroy);
1804 for (unsigned Reg : Regs)
1805 MIB.addReg(RegNo: Reg, Flags: getDefRegState(B: true));
1806 if (DeleteRet) {
1807 if (MI != MBB.end()) {
1808 MIB.copyImplicitOps(OtherMI: *MI);
1809 MI->eraseFromParent();
1810 }
1811 }
1812 MI = MIB;
1813 } else if (Regs.size() == 1) {
1814 // If we adjusted the reg to PC from LR above, switch it back here. We
1815 // only do that for LDM.
1816 if (Regs[0] == ARM::PC)
1817 Regs[0] = ARM::LR;
1818 MachineInstrBuilder MIB =
1819 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: LdrOpc), DestReg: Regs[0])
1820 .addReg(RegNo: ARM::SP, Flags: RegState::Define)
1821 .addReg(RegNo: ARM::SP)
1822 .setMIFlags(MachineInstr::FrameDestroy);
1823 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1824 // that refactoring is complete (eventually).
1825 if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1826 MIB.addReg(RegNo: 0);
1827 MIB.addImm(Val: ARM_AM::getAM2Opc(Opc: ARM_AM::add, Imm12: 4, SO: ARM_AM::no_shift));
1828 } else
1829 MIB.addImm(Val: 4);
1830 MIB.add(MOs: predOps(Pred: ARMCC::AL));
1831 }
1832 Regs.clear();
1833
1834 // Put any subsequent vpop instructions after this one: they will refer to
1835 // higher register numbers so need to be popped afterwards.
1836 if (MI != MBB.end())
1837 ++MI;
1838 }
1839}
1840
1841void ARMFrameLowering::emitFPStatusSaves(MachineBasicBlock &MBB,
1842 MachineBasicBlock::iterator MI,
1843 ArrayRef<CalleeSavedInfo> CSI,
1844 unsigned PushOpc) const {
1845 MachineFunction &MF = *MBB.getParent();
1846 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1847
1848 SmallVector<MCRegister> Regs;
1849 auto RegPresent = [&CSI](MCRegister Reg) {
1850 return llvm::any_of(Range&: CSI, P: [Reg](const CalleeSavedInfo &C) {
1851 return C.getReg() == Reg;
1852 });
1853 };
1854
1855 // If we need to save FPSCR, then we must move FPSCR into R4 with the VMRS
1856 // instruction.
1857 if (RegPresent(ARM::FPSCR)) {
1858 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: TII.get(Opcode: ARM::VMRS), DestReg: ARM::R4)
1859 .add(MOs: predOps(Pred: ARMCC::AL))
1860 .setMIFlags(MachineInstr::FrameSetup);
1861
1862 Regs.push_back(Elt: ARM::R4);
1863 }
1864
1865 // If we need to save FPEXC, then we must move FPEXC into R5 with the
1866 // VMRS_FPEXC instruction.
1867 if (RegPresent(ARM::FPEXC)) {
1868 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: TII.get(Opcode: ARM::VMRS_FPEXC), DestReg: ARM::R5)
1869 .add(MOs: predOps(Pred: ARMCC::AL))
1870 .setMIFlags(MachineInstr::FrameSetup);
1871
1872 Regs.push_back(Elt: ARM::R5);
1873 }
1874
1875 // If neither FPSCR and FPEXC are present, then do nothing.
1876 if (Regs.size() == 0)
1877 return;
1878
1879 // Push both R4 and R5 onto the stack, if present.
1880 MachineInstrBuilder MIB =
1881 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: TII.get(Opcode: PushOpc), DestReg: ARM::SP)
1882 .addReg(RegNo: ARM::SP)
1883 .add(MOs: predOps(Pred: ARMCC::AL))
1884 .setMIFlags(MachineInstr::FrameSetup);
1885
1886 for (Register Reg : Regs) {
1887 MIB.addReg(RegNo: Reg);
1888 }
1889}
1890
1891void ARMFrameLowering::emitFPStatusRestores(
1892 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1893 MutableArrayRef<CalleeSavedInfo> CSI, unsigned LdmOpc) const {
1894 MachineFunction &MF = *MBB.getParent();
1895 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1896
1897 auto RegPresent = [&CSI](MCRegister Reg) {
1898 return llvm::any_of(Range&: CSI, P: [Reg](const CalleeSavedInfo &C) {
1899 return C.getReg() == Reg;
1900 });
1901 };
1902
1903 // Do nothing if we don't need to restore any FP status registers.
1904 if (!RegPresent(ARM::FPSCR) && !RegPresent(ARM::FPEXC))
1905 return;
1906
1907 // Pop registers off of the stack.
1908 MachineInstrBuilder MIB =
1909 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: TII.get(Opcode: LdmOpc), DestReg: ARM::SP)
1910 .addReg(RegNo: ARM::SP)
1911 .add(MOs: predOps(Pred: ARMCC::AL))
1912 .setMIFlags(MachineInstr::FrameDestroy);
1913
1914 // If FPSCR was saved, it will be popped into R4.
1915 if (RegPresent(ARM::FPSCR)) {
1916 MIB.addReg(RegNo: ARM::R4, Flags: RegState::Define);
1917 }
1918
1919 // If FPEXC was saved, it will be popped into R5.
1920 if (RegPresent(ARM::FPEXC)) {
1921 MIB.addReg(RegNo: ARM::R5, Flags: RegState::Define);
1922 }
1923
1924 // Move the FPSCR value back into the register with the VMSR instruction.
1925 if (RegPresent(ARM::FPSCR)) {
1926 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: STI.getInstrInfo()->get(Opcode: ARM::VMSR))
1927 .addReg(RegNo: ARM::R4)
1928 .add(MOs: predOps(Pred: ARMCC::AL))
1929 .setMIFlags(MachineInstr::FrameDestroy);
1930 }
1931
1932 // Move the FPEXC value back into the register with the VMSR_FPEXC
1933 // instruction.
1934 if (RegPresent(ARM::FPEXC)) {
1935 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: STI.getInstrInfo()->get(Opcode: ARM::VMSR_FPEXC))
1936 .addReg(RegNo: ARM::R5)
1937 .add(MOs: predOps(Pred: ARMCC::AL))
1938 .setMIFlags(MachineInstr::FrameDestroy);
1939 }
1940}
1941
1942/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1943/// starting from d8. Also insert stack realignment code and leave the stack
1944/// pointer pointing to the d8 spill slot.
1945static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
1946 MachineBasicBlock::iterator MI,
1947 unsigned NumAlignedDPRCS2Regs,
1948 ArrayRef<CalleeSavedInfo> CSI,
1949 const TargetRegisterInfo *TRI) {
1950 MachineFunction &MF = *MBB.getParent();
1951 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1952 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1953 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1954 MachineFrameInfo &MFI = MF.getFrameInfo();
1955
1956 // Mark the D-register spill slots as properly aligned. Since MFI computes
1957 // stack slot layout backwards, this can actually mean that the d-reg stack
1958 // slot offsets can be wrong. The offset for d8 will always be correct.
1959 for (const CalleeSavedInfo &I : CSI) {
1960 unsigned DNum = I.getReg() - ARM::D8;
1961 if (DNum > NumAlignedDPRCS2Regs - 1)
1962 continue;
1963 int FI = I.getFrameIdx();
1964 // The even-numbered registers will be 16-byte aligned, the odd-numbered
1965 // registers will be 8-byte aligned.
1966 MFI.setObjectAlignment(ObjectIdx: FI, Alignment: DNum % 2 ? Align(8) : Align(16));
1967
1968 // The stack slot for D8 needs to be maximally aligned because this is
1969 // actually the point where we align the stack pointer. MachineFrameInfo
1970 // computes all offsets relative to the incoming stack pointer which is a
1971 // bit weird when realigning the stack. Any extra padding for this
1972 // over-alignment is not realized because the code inserted below adjusts
1973 // the stack pointer by numregs * 8 before aligning the stack pointer.
1974 if (DNum == 0)
1975 MFI.setObjectAlignment(ObjectIdx: FI, Alignment: MFI.getMaxAlign());
1976 }
1977
1978 // Move the stack pointer to the d8 spill slot, and align it at the same
1979 // time. Leave the stack slot address in the scratch register r4.
1980 //
1981 // sub r4, sp, #numregs * 8
1982 // bic r4, r4, #align - 1
1983 // mov sp, r4
1984 //
1985 bool isThumb = AFI->isThumbFunction();
1986 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1987 AFI->setShouldRestoreSPFromFP(true);
1988
1989 // sub r4, sp, #numregs * 8
1990 // The immediate is <= 64, so it doesn't need any special encoding.
1991 unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1992 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: ARM::R4)
1993 .addReg(RegNo: ARM::SP)
1994 .addImm(Val: 8 * NumAlignedDPRCS2Regs)
1995 .add(MOs: predOps(Pred: ARMCC::AL))
1996 .add(MO: condCodeOp());
1997
1998 Align MaxAlign = MF.getFrameInfo().getMaxAlign();
1999 // We must set parameter MustBeSingleInstruction to true, since
2000 // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
2001 // stack alignment. Luckily, this can always be done since all ARM
2002 // architecture versions that support Neon also support the BFC
2003 // instruction.
2004 emitAligningInstructions(MF, AFI, TII, MBB, MBBI: MI, DL, Reg: ARM::R4, Alignment: MaxAlign, MustBeSingleInstruction: true);
2005
2006 // mov sp, r4
2007 // The stack pointer must be adjusted before spilling anything, otherwise
2008 // the stack slots could be clobbered by an interrupt handler.
2009 // Leave r4 live, it is used below.
2010 Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
2011 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: ARM::SP)
2012 .addReg(RegNo: ARM::R4)
2013 .add(MOs: predOps(Pred: ARMCC::AL));
2014 if (!isThumb)
2015 MIB.add(MO: condCodeOp());
2016
2017 // Now spill NumAlignedDPRCS2Regs registers starting from d8.
2018 // r4 holds the stack slot address.
2019 unsigned NextReg = ARM::D8;
2020
2021 // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
2022 // The writeback is only needed when emitting two vst1.64 instructions.
2023 if (NumAlignedDPRCS2Regs >= 6) {
2024 MCRegister SupReg =
2025 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QQPRRegClass);
2026 MBB.addLiveIn(PhysReg: SupReg);
2027 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VST1d64Qwb_fixed), DestReg: ARM::R4)
2028 .addReg(RegNo: ARM::R4, Flags: RegState::Kill)
2029 .addImm(Val: 16)
2030 .addReg(RegNo: NextReg)
2031 .addReg(RegNo: SupReg, Flags: RegState::ImplicitKill)
2032 .add(MOs: predOps(Pred: ARMCC::AL));
2033 NextReg += 4;
2034 NumAlignedDPRCS2Regs -= 4;
2035 }
2036
2037 // We won't modify r4 beyond this point. It currently points to the next
2038 // register to be spilled.
2039 unsigned R4BaseReg = NextReg;
2040
2041 // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
2042 if (NumAlignedDPRCS2Regs >= 4) {
2043 MCRegister SupReg =
2044 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QQPRRegClass);
2045 MBB.addLiveIn(PhysReg: SupReg);
2046 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VST1d64Q))
2047 .addReg(RegNo: ARM::R4)
2048 .addImm(Val: 16)
2049 .addReg(RegNo: NextReg)
2050 .addReg(RegNo: SupReg, Flags: RegState::ImplicitKill)
2051 .add(MOs: predOps(Pred: ARMCC::AL));
2052 NextReg += 4;
2053 NumAlignedDPRCS2Regs -= 4;
2054 }
2055
2056 // 16-byte aligned vst1.64 with 2 d-regs.
2057 if (NumAlignedDPRCS2Regs >= 2) {
2058 MCRegister SupReg =
2059 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QPRRegClass);
2060 MBB.addLiveIn(PhysReg: SupReg);
2061 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VST1q64))
2062 .addReg(RegNo: ARM::R4)
2063 .addImm(Val: 16)
2064 .addReg(RegNo: SupReg)
2065 .add(MOs: predOps(Pred: ARMCC::AL));
2066 NextReg += 2;
2067 NumAlignedDPRCS2Regs -= 2;
2068 }
2069
2070 // Finally, use a vanilla vstr.64 for the odd last register.
2071 if (NumAlignedDPRCS2Regs) {
2072 MBB.addLiveIn(PhysReg: NextReg);
2073 // vstr.64 uses addrmode5 which has an offset scale of 4.
2074 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VSTRD))
2075 .addReg(RegNo: NextReg)
2076 .addReg(RegNo: ARM::R4)
2077 .addImm(Val: (NextReg - R4BaseReg) * 2)
2078 .add(MOs: predOps(Pred: ARMCC::AL));
2079 }
2080
2081 // The last spill instruction inserted should kill the scratch register r4.
2082 std::prev(x: MI)->addRegisterKilled(IncomingReg: ARM::R4, RegInfo: TRI);
2083}
2084
2085/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
2086/// iterator to the following instruction.
2087static MachineBasicBlock::iterator
2088skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
2089 unsigned NumAlignedDPRCS2Regs) {
2090 // sub r4, sp, #numregs * 8
2091 // bic r4, r4, #align - 1
2092 // mov sp, r4
2093 ++MI; ++MI; ++MI;
2094 assert(MI->mayStore() && "Expecting spill instruction");
2095
2096 // These switches all fall through.
2097 switch(NumAlignedDPRCS2Regs) {
2098 case 7:
2099 ++MI;
2100 assert(MI->mayStore() && "Expecting spill instruction");
2101 [[fallthrough]];
2102 default:
2103 ++MI;
2104 assert(MI->mayStore() && "Expecting spill instruction");
2105 [[fallthrough]];
2106 case 1:
2107 case 2:
2108 case 4:
2109 assert(MI->killsRegister(ARM::R4, /*TRI=*/nullptr) && "Missed kill flag");
2110 ++MI;
2111 }
2112 return MI;
2113}
2114
2115/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
2116/// starting from d8. These instructions are assumed to execute while the
2117/// stack is still aligned, unlike the code inserted by emitPopInst.
2118static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
2119 MachineBasicBlock::iterator MI,
2120 unsigned NumAlignedDPRCS2Regs,
2121 ArrayRef<CalleeSavedInfo> CSI,
2122 const TargetRegisterInfo *TRI) {
2123 MachineFunction &MF = *MBB.getParent();
2124 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2125 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
2126 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2127
2128 // Find the frame index assigned to d8.
2129 int D8SpillFI = 0;
2130 for (const CalleeSavedInfo &I : CSI)
2131 if (I.getReg() == ARM::D8) {
2132 D8SpillFI = I.getFrameIdx();
2133 break;
2134 }
2135
2136 // Materialize the address of the d8 spill slot into the scratch register r4.
2137 // This can be fairly complicated if the stack frame is large, so just use
2138 // the normal frame index elimination mechanism to do it. This code runs as
2139 // the initial part of the epilog where the stack and base pointers haven't
2140 // been changed yet.
2141 bool isThumb = AFI->isThumbFunction();
2142 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
2143
2144 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
2145 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: ARM::R4)
2146 .addFrameIndex(Idx: D8SpillFI)
2147 .addImm(Val: 0)
2148 .add(MOs: predOps(Pred: ARMCC::AL))
2149 .add(MO: condCodeOp());
2150
2151 // Now restore NumAlignedDPRCS2Regs registers starting from d8.
2152 unsigned NextReg = ARM::D8;
2153
2154 // 16-byte aligned vld1.64 with 4 d-regs and writeback.
2155 if (NumAlignedDPRCS2Regs >= 6) {
2156 MCRegister SupReg =
2157 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QQPRRegClass);
2158 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLD1d64Qwb_fixed), DestReg: NextReg)
2159 .addReg(RegNo: ARM::R4, Flags: RegState::Define)
2160 .addReg(RegNo: ARM::R4, Flags: RegState::Kill)
2161 .addImm(Val: 16)
2162 .addReg(RegNo: SupReg, Flags: RegState::ImplicitDefine)
2163 .add(MOs: predOps(Pred: ARMCC::AL));
2164 NextReg += 4;
2165 NumAlignedDPRCS2Regs -= 4;
2166 }
2167
2168 // We won't modify r4 beyond this point. It currently points to the next
2169 // register to be spilled.
2170 unsigned R4BaseReg = NextReg;
2171
2172 // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
2173 if (NumAlignedDPRCS2Regs >= 4) {
2174 MCRegister SupReg =
2175 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QQPRRegClass);
2176 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLD1d64Q), DestReg: NextReg)
2177 .addReg(RegNo: ARM::R4)
2178 .addImm(Val: 16)
2179 .addReg(RegNo: SupReg, Flags: RegState::ImplicitDefine)
2180 .add(MOs: predOps(Pred: ARMCC::AL));
2181 NextReg += 4;
2182 NumAlignedDPRCS2Regs -= 4;
2183 }
2184
2185 // 16-byte aligned vld1.64 with 2 d-regs.
2186 if (NumAlignedDPRCS2Regs >= 2) {
2187 MCRegister SupReg =
2188 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QPRRegClass);
2189 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLD1q64), DestReg: SupReg)
2190 .addReg(RegNo: ARM::R4)
2191 .addImm(Val: 16)
2192 .add(MOs: predOps(Pred: ARMCC::AL));
2193 NextReg += 2;
2194 NumAlignedDPRCS2Regs -= 2;
2195 }
2196
2197 // Finally, use a vanilla vldr.64 for the remaining odd register.
2198 if (NumAlignedDPRCS2Regs)
2199 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLDRD), DestReg: NextReg)
2200 .addReg(RegNo: ARM::R4)
2201 .addImm(Val: 2 * (NextReg - R4BaseReg))
2202 .add(MOs: predOps(Pred: ARMCC::AL));
2203
2204 // Last store kills r4.
2205 std::prev(x: MI)->addRegisterKilled(IncomingReg: ARM::R4, RegInfo: TRI);
2206}
2207
2208bool ARMFrameLowering::spillCalleeSavedRegisters(
2209 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2210 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2211 if (CSI.empty())
2212 return false;
2213
2214 MachineFunction &MF = *MBB.getParent();
2215 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2216 ARMSubtarget::PushPopSplitVariation PushPopSplit =
2217 STI.getPushPopSplitVariation(MF);
2218 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
2219
2220 unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
2221 unsigned PushOneOpc = AFI->isThumbFunction() ?
2222 ARM::t2STR_PRE : ARM::STR_PRE_IMM;
2223 unsigned FltOpc = ARM::VSTMDDB_UPD;
2224 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2225 // Compute PAC in R12.
2226 if (AFI->shouldSignReturnAddress()) {
2227 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: STI.getInstrInfo()->get(Opcode: ARM::t2PAC))
2228 .setMIFlags(MachineInstr::FrameSetup);
2229 }
2230 // Save the non-secure floating point context.
2231 if (llvm::any_of(Range&: CSI, P: [](const CalleeSavedInfo &C) {
2232 return C.getReg() == ARM::FPCXTNS;
2233 })) {
2234 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: STI.getInstrInfo()->get(Opcode: ARM::VSTR_FPCXTNS_pre),
2235 DestReg: ARM::SP)
2236 .addReg(RegNo: ARM::SP)
2237 .addImm(Val: -4)
2238 .add(MOs: predOps(Pred: ARMCC::AL));
2239 }
2240
2241 auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
2242 RegInfo](unsigned Reg, SpillArea TestArea) {
2243 return getSpillArea(Reg, Variation: PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
2244 TestArea;
2245 };
2246 auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
2247 return CheckRegArea(Reg, SpillArea::GPRCS1);
2248 };
2249 auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
2250 return CheckRegArea(Reg, SpillArea::GPRCS2);
2251 };
2252 auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
2253 return CheckRegArea(Reg, SpillArea::DPRCS1);
2254 };
2255 auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
2256 return CheckRegArea(Reg, SpillArea::GPRCS3);
2257 };
2258
2259 emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false, Func: IsGPRCS1);
2260 emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false, Func: IsGPRCS2);
2261 emitFPStatusSaves(MBB, MI, CSI, PushOpc);
2262 emitPushInst(MBB, MI, CSI, StmOpc: FltOpc, StrOpc: 0, NoGap: true, Func: IsDPRCS1);
2263 emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false, Func: IsGPRCS3);
2264
2265 // The code above does not insert spill code for the aligned DPRCS2 registers.
2266 // The stack realignment code will be inserted between the push instructions
2267 // and these spills.
2268 if (NumAlignedDPRCS2Regs)
2269 emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2270
2271 return true;
2272}
2273
2274bool ARMFrameLowering::restoreCalleeSavedRegisters(
2275 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2276 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2277 if (CSI.empty())
2278 return false;
2279
2280 MachineFunction &MF = *MBB.getParent();
2281 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2282 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
2283
2284 bool isVarArg = AFI->getArgRegsSaveSize() > 0;
2285 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2286 ARMSubtarget::PushPopSplitVariation PushPopSplit =
2287 STI.getPushPopSplitVariation(MF);
2288
2289 // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2290 // registers. Do that here instead.
2291 if (NumAlignedDPRCS2Regs)
2292 emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2293
2294 unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
2295 unsigned LdrOpc =
2296 AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
2297 unsigned FltOpc = ARM::VLDMDIA_UPD;
2298
2299 auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
2300 RegInfo](unsigned Reg, SpillArea TestArea) {
2301 return getSpillArea(Reg, Variation: PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
2302 TestArea;
2303 };
2304 auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
2305 return CheckRegArea(Reg, SpillArea::GPRCS1);
2306 };
2307 auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
2308 return CheckRegArea(Reg, SpillArea::GPRCS2);
2309 };
2310 auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
2311 return CheckRegArea(Reg, SpillArea::DPRCS1);
2312 };
2313 auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
2314 return CheckRegArea(Reg, SpillArea::GPRCS3);
2315 };
2316
2317 emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false, Func: IsGPRCS3);
2318 emitPopInst(MBB, MI, CSI, LdmOpc: FltOpc, LdrOpc: 0, isVarArg, NoGap: true, Func: IsDPRCS1);
2319 emitFPStatusRestores(MBB, MI, CSI, LdmOpc: PopOpc);
2320 emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false, Func: IsGPRCS2);
2321 emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false, Func: IsGPRCS1);
2322
2323 return true;
2324}
2325
2326// FIXME: Make generic?
2327static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF,
2328 const ARMBaseInstrInfo &TII) {
2329 unsigned FnSize = 0;
2330 for (auto &MBB : MF) {
2331 for (auto &MI : MBB)
2332 FnSize += TII.getInstSizeInBytes(MI);
2333 }
2334 if (MF.getJumpTableInfo())
2335 for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
2336 FnSize += Table.MBBs.size() * 4;
2337 FnSize += MF.getConstantPool()->getConstants().size() * 4;
2338 LLVM_DEBUG(dbgs() << "Estimated function size for " << MF.getName() << " = "
2339 << FnSize << " bytes\n");
2340 return FnSize;
2341}
2342
2343/// estimateRSStackSizeLimit - Look at each instruction that references stack
2344/// frames and return the stack size limit beyond which some of these
2345/// instructions will require a scratch register during their expansion later.
2346// FIXME: Move to TII?
2347static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
2348 const TargetFrameLowering *TFI,
2349 bool &HasNonSPFrameIndex) {
2350 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2351 const ARMBaseInstrInfo &TII =
2352 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2353 unsigned Limit = (1 << 12) - 1;
2354 for (auto &MBB : MF) {
2355 for (auto &MI : MBB) {
2356 if (MI.isDebugInstr())
2357 continue;
2358 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
2359 if (!MI.getOperand(i).isFI())
2360 continue;
2361
2362 // When using ADDri to get the address of a stack object, 255 is the
2363 // largest offset guaranteed to fit in the immediate offset.
2364 if (MI.getOpcode() == ARM::ADDri) {
2365 Limit = std::min(a: Limit, b: (1U << 8) - 1);
2366 break;
2367 }
2368 // t2ADDri will not require an extra register, it can reuse the
2369 // destination.
2370 if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
2371 break;
2372
2373 const MCInstrDesc &MCID = MI.getDesc();
2374 const TargetRegisterClass *RegClass = TII.getRegClass(MCID, OpNum: i);
2375 if (RegClass && !RegClass->contains(Reg: ARM::SP))
2376 HasNonSPFrameIndex = true;
2377
2378 // Otherwise check the addressing mode.
2379 switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
2380 case ARMII::AddrMode_i12:
2381 case ARMII::AddrMode2:
2382 // Default 12 bit limit.
2383 break;
2384 case ARMII::AddrMode3:
2385 case ARMII::AddrModeT2_i8neg:
2386 Limit = std::min(a: Limit, b: (1U << 8) - 1);
2387 break;
2388 case ARMII::AddrMode5FP16:
2389 Limit = std::min(a: Limit, b: ((1U << 8) - 1) * 2);
2390 break;
2391 case ARMII::AddrMode5:
2392 case ARMII::AddrModeT2_i8s4:
2393 case ARMII::AddrModeT2_ldrex:
2394 Limit = std::min(a: Limit, b: ((1U << 8) - 1) * 4);
2395 break;
2396 case ARMII::AddrModeT2_i12:
2397 // i12 supports only positive offset so these will be converted to
2398 // i8 opcodes. See llvm::rewriteT2FrameIndex.
2399 if (TFI->hasFP(MF) && AFI->hasStackFrame())
2400 Limit = std::min(a: Limit, b: (1U << 8) - 1);
2401 break;
2402 case ARMII::AddrMode4:
2403 case ARMII::AddrMode6:
2404 // Addressing modes 4 & 6 (load/store) instructions can't encode an
2405 // immediate offset for stack references.
2406 return 0;
2407 case ARMII::AddrModeT2_i7:
2408 Limit = std::min(a: Limit, b: ((1U << 7) - 1) * 1);
2409 break;
2410 case ARMII::AddrModeT2_i7s2:
2411 Limit = std::min(a: Limit, b: ((1U << 7) - 1) * 2);
2412 break;
2413 case ARMII::AddrModeT2_i7s4:
2414 Limit = std::min(a: Limit, b: ((1U << 7) - 1) * 4);
2415 break;
2416 default:
2417 llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2418 }
2419 break; // At most one FI per instruction
2420 }
2421 }
2422 }
2423
2424 return Limit;
2425}
2426
2427// In functions that realign the stack, it can be an advantage to spill the
2428// callee-saved vector registers after realigning the stack. The vst1 and vld1
2429// instructions take alignment hints that can improve performance.
2430static void
2431checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
2432 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
2433 if (!SpillAlignedNEONRegs)
2434 return;
2435
2436 // Naked functions don't spill callee-saved registers.
2437 if (MF.getFunction().hasFnAttribute(Kind: Attribute::Naked))
2438 return;
2439
2440 // We are planning to use NEON instructions vst1 / vld1.
2441 if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2442 return;
2443
2444 // Don't bother if the default stack alignment is sufficiently high.
2445 if (MF.getSubtarget().getFrameLowering()->getStackAlign() >= Align(8))
2446 return;
2447
2448 // Aligned spills require stack realignment.
2449 if (!static_cast<const ARMBaseRegisterInfo *>(
2450 MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
2451 return;
2452
2453 // We always spill contiguous d-registers starting from d8. Count how many
2454 // needs spilling. The register allocator will almost always use the
2455 // callee-saved registers in order, but it can happen that there are holes in
2456 // the range. Registers above the hole will be spilled to the standard DPRCS
2457 // area.
2458 unsigned NumSpills = 0;
2459 for (; NumSpills < 8; ++NumSpills)
2460 if (!SavedRegs.test(Idx: ARM::D8 + NumSpills))
2461 break;
2462
2463 // Don't do this for just one d-register. It's not worth it.
2464 if (NumSpills < 2)
2465 return;
2466
2467 // Spill the first NumSpills D-registers after realigning the stack.
2468 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2469
2470 // A scratch register is required for the vst1 / vld1 instructions.
2471 SavedRegs.set(ARM::R4);
2472}
2473
2474bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2475 // For CMSE entry functions, we want to save the FPCXT_NS immediately
2476 // upon function entry (resp. restore it immmediately before return)
2477 if (STI.hasV8_1MMainlineOps() &&
2478 MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction())
2479 return false;
2480
2481 // We are disabling shrinkwrapping for now when PAC is enabled, as
2482 // shrinkwrapping can cause clobbering of r12 when the PAC code is
2483 // generated. A follow-up patch will fix this in a more performant manner.
2484 if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(
2485 SpillsLR: true /* SpillsLR */))
2486 return false;
2487
2488 return true;
2489}
2490
2491bool ARMFrameLowering::requiresAAPCSFrameRecord(
2492 const MachineFunction &MF) const {
2493 const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
2494 return Subtarget.createAAPCSFrameChain() && hasFP(MF);
2495}
2496
2497// Thumb1 may require a spill when storing to a frame index through FP (or any
2498// access with execute-only), for cases where FP is a high register (R11). This
2499// scans the function for cases where this may happen.
2500static bool canSpillOnFrameIndexAccess(const MachineFunction &MF,
2501 const TargetFrameLowering &TFI) {
2502 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2503 if (!AFI->isThumb1OnlyFunction())
2504 return false;
2505
2506 const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
2507 for (const auto &MBB : MF)
2508 for (const auto &MI : MBB)
2509 if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi ||
2510 STI.genExecuteOnly())
2511 for (const auto &Op : MI.operands())
2512 if (Op.isFI()) {
2513 Register Reg;
2514 TFI.getFrameIndexReference(MF, FI: Op.getIndex(), FrameReg&: Reg);
2515 if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
2516 return true;
2517 }
2518 return false;
2519}
2520
2521void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
2522 BitVector &SavedRegs,
2523 RegScavenger *RS) const {
2524 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2525 // This tells PEI to spill the FP as if it is any other callee-save register
2526 // to take advantage the eliminateFrameIndex machinery. This also ensures it
2527 // is spilled in the order specified by getCalleeSavedRegs() to make it easier
2528 // to combine multiple loads / stores.
2529 bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF)) &&
2530 !MF.getTarget().Options.DisableFramePointerElim(MF);
2531 bool CS1Spilled = false;
2532 bool LRSpilled = false;
2533 unsigned NumGPRSpills = 0;
2534 unsigned NumFPRSpills = 0;
2535 SmallVector<unsigned, 4> UnspilledCS1GPRs;
2536 SmallVector<unsigned, 4> UnspilledCS2GPRs;
2537 const Function &F = MF.getFunction();
2538 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
2539 MF.getSubtarget().getRegisterInfo());
2540 const ARMBaseInstrInfo &TII =
2541 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2542 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2543 MachineFrameInfo &MFI = MF.getFrameInfo();
2544 MachineRegisterInfo &MRI = MF.getRegInfo();
2545 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2546 (void)TRI; // Silence unused warning in non-assert builds.
2547 Register FramePtr = STI.getFramePointerReg();
2548 ARMSubtarget::PushPopSplitVariation PushPopSplit =
2549 STI.getPushPopSplitVariation(MF);
2550
2551 // For a floating point interrupt, save these registers always, since LLVM
2552 // currently doesn't model reads/writes to these registers.
2553 if (F.hasFnAttribute(Kind: "interrupt") && F.hasFnAttribute(Kind: "save-fp")) {
2554 SavedRegs.set(ARM::FPSCR);
2555 SavedRegs.set(ARM::R4);
2556
2557 // This register will only be present on non-MClass registers.
2558 if (STI.isMClass()) {
2559 SavedRegs.reset(Idx: ARM::FPEXC);
2560 } else {
2561 SavedRegs.set(ARM::FPEXC);
2562 SavedRegs.set(ARM::R5);
2563 }
2564 }
2565
2566 // Spill R4 if Thumb2 function requires stack realignment - it will be used as
2567 // scratch register. Also spill R4 if Thumb2 function has varsized objects,
2568 // since it's not always possible to restore sp from fp in a single
2569 // instruction.
2570 // FIXME: It will be better just to find spare register here.
2571 if (AFI->isThumb2Function() &&
2572 (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)))
2573 SavedRegs.set(ARM::R4);
2574
2575 // If a stack probe will be emitted, spill R4 and LR, since they are
2576 // clobbered by the stack probe call.
2577 // This estimate should be a safe, conservative estimate. The actual
2578 // stack probe is enabled based on the size of the local objects;
2579 // this estimate also includes the varargs store size.
2580 if (STI.isTargetWindows() &&
2581 WindowsRequiresStackProbe(MF, StackSizeInBytes: MFI.estimateStackSize(MF))) {
2582 SavedRegs.set(ARM::R4);
2583 SavedRegs.set(ARM::LR);
2584 }
2585
2586 if (AFI->isThumb1OnlyFunction()) {
2587 // Spill LR if Thumb1 function uses variable length argument lists.
2588 if (AFI->getArgRegsSaveSize() > 0)
2589 SavedRegs.set(ARM::LR);
2590
2591 // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2592 // requires stack alignment. We don't know for sure what the stack size
2593 // will be, but for this, an estimate is good enough. If there anything
2594 // changes it, it'll be a spill, which implies we've used all the registers
2595 // and so R4 is already used, so not marking it here will be OK.
2596 // FIXME: It will be better just to find spare register here.
2597 if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||
2598 MFI.estimateStackSize(MF) > 508)
2599 SavedRegs.set(ARM::R4);
2600 }
2601
2602 // See if we can spill vector registers to aligned stack.
2603 checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2604
2605 // Spill the BasePtr if it's used.
2606 if (RegInfo->hasBasePointer(MF))
2607 SavedRegs.set(RegInfo->getBaseRegister());
2608
2609 // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2610 if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2611 CanEliminateFrame = false;
2612
2613 // When return address signing is enabled R12 is treated as callee-saved.
2614 if (AFI->shouldSignReturnAddress())
2615 CanEliminateFrame = false;
2616
2617 // Don't spill FP if the frame can be eliminated. This is determined
2618 // by scanning the callee-save registers to see if any is modified.
2619 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF: &MF);
2620 for (unsigned i = 0; CSRegs[i]; ++i) {
2621 unsigned Reg = CSRegs[i];
2622 bool Spilled = false;
2623 if (SavedRegs.test(Idx: Reg)) {
2624 Spilled = true;
2625 CanEliminateFrame = false;
2626 }
2627
2628 if (!ARM::GPRRegClass.contains(Reg)) {
2629 if (Spilled) {
2630 if (ARM::SPRRegClass.contains(Reg))
2631 NumFPRSpills++;
2632 else if (ARM::DPRRegClass.contains(Reg))
2633 NumFPRSpills += 2;
2634 else if (ARM::QPRRegClass.contains(Reg))
2635 NumFPRSpills += 4;
2636 }
2637 continue;
2638 }
2639
2640 if (Spilled) {
2641 NumGPRSpills++;
2642
2643 if (PushPopSplit != ARMSubtarget::SplitR7) {
2644 if (Reg == ARM::LR)
2645 LRSpilled = true;
2646 CS1Spilled = true;
2647 continue;
2648 }
2649
2650 // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2651 switch (Reg) {
2652 case ARM::LR:
2653 LRSpilled = true;
2654 [[fallthrough]];
2655 case ARM::R0: case ARM::R1:
2656 case ARM::R2: case ARM::R3:
2657 case ARM::R4: case ARM::R5:
2658 case ARM::R6: case ARM::R7:
2659 CS1Spilled = true;
2660 break;
2661 default:
2662 break;
2663 }
2664 } else {
2665 if (PushPopSplit != ARMSubtarget::SplitR7) {
2666 UnspilledCS1GPRs.push_back(Elt: Reg);
2667 continue;
2668 }
2669
2670 switch (Reg) {
2671 case ARM::R0: case ARM::R1:
2672 case ARM::R2: case ARM::R3:
2673 case ARM::R4: case ARM::R5:
2674 case ARM::R6: case ARM::R7:
2675 case ARM::LR:
2676 UnspilledCS1GPRs.push_back(Elt: Reg);
2677 break;
2678 default:
2679 UnspilledCS2GPRs.push_back(Elt: Reg);
2680 break;
2681 }
2682 }
2683 }
2684
2685 bool ForceLRSpill = false;
2686 if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2687 unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2688 // Force LR to be spilled if the Thumb function size is > 2048. This enables
2689 // use of BL to implement far jump.
2690 if (FnSize >= (1 << 11)) {
2691 CanEliminateFrame = false;
2692 ForceLRSpill = true;
2693 }
2694 }
2695
2696 // If any of the stack slot references may be out of range of an immediate
2697 // offset, make sure a register (or a spill slot) is available for the
2698 // register scavenger. Note that if we're indexing off the frame pointer, the
2699 // effective stack size is 4 bytes larger since the FP points to the stack
2700 // slot of the previous FP. Also, if we have variable sized objects in the
2701 // function, stack slot references will often be negative, and some of
2702 // our instructions are positive-offset only, so conservatively consider
2703 // that case to want a spill slot (or register) as well. Similarly, if
2704 // the function adjusts the stack pointer during execution and the
2705 // adjustments aren't already part of our stack size estimate, our offset
2706 // calculations may be off, so be conservative.
2707 // FIXME: We could add logic to be more precise about negative offsets
2708 // and which instructions will need a scratch register for them. Is it
2709 // worth the effort and added fragility?
2710 unsigned EstimatedStackSize =
2711 MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
2712
2713 // Determine biggest (positive) SP offset in MachineFrameInfo.
2714 int MaxFixedOffset = 0;
2715 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
2716 int MaxObjectOffset = MFI.getObjectOffset(ObjectIdx: I) + MFI.getObjectSize(ObjectIdx: I);
2717 MaxFixedOffset = std::max(a: MaxFixedOffset, b: MaxObjectOffset);
2718 }
2719
2720 bool HasFP = hasFP(MF);
2721 if (HasFP) {
2722 if (AFI->hasStackFrame())
2723 EstimatedStackSize += 4;
2724 } else {
2725 // If FP is not used, SP will be used to access arguments, so count the
2726 // size of arguments into the estimation.
2727 EstimatedStackSize += MaxFixedOffset;
2728 }
2729 EstimatedStackSize += 16; // For possible paddings.
2730
2731 unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2732 bool HasNonSPFrameIndex = false;
2733 if (AFI->isThumb1OnlyFunction()) {
2734 // For Thumb1, don't bother to iterate over the function. The only
2735 // instruction that requires an emergency spill slot is a store to a
2736 // frame index.
2737 //
2738 // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2739 // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2740 // a 5-bit unsigned immediate.
2741 //
2742 // We could try to check if the function actually contains a tSTRspi
2743 // that might need the spill slot, but it's not really important.
2744 // Functions with VLAs or extremely large call frames are rare, and
2745 // if a function is allocating more than 1KB of stack, an extra 4-byte
2746 // slot probably isn't relevant.
2747 //
2748 // A special case is the scenario where r11 is used as FP, where accesses
2749 // to a frame index will require its value to be moved into a low reg.
2750 // This is handled later on, once we are able to determine if we have any
2751 // fp-relative accesses.
2752 if (RegInfo->hasBasePointer(MF))
2753 EstimatedRSStackSizeLimit = (1U << 5) * 4;
2754 else
2755 EstimatedRSStackSizeLimit = (1U << 8) * 4;
2756 EstimatedRSFixedSizeLimit = (1U << 5) * 4;
2757 } else {
2758 EstimatedRSStackSizeLimit =
2759 estimateRSStackSizeLimit(MF, TFI: this, HasNonSPFrameIndex);
2760 EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2761 }
2762 // Final estimate of whether sp or bp-relative accesses might require
2763 // scavenging.
2764 bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2765
2766 // If the stack pointer moves and we don't have a base pointer, the
2767 // estimate logic doesn't work. The actual offsets might be larger when
2768 // we're constructing a call frame, or we might need to use negative
2769 // offsets from fp.
2770 bool HasMovingSP = MFI.hasVarSizedObjects() ||
2771 (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2772 bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
2773
2774 // If we have a frame pointer, we assume arguments will be accessed
2775 // relative to the frame pointer. Check whether fp-relative accesses to
2776 // arguments require scavenging.
2777 //
2778 // We could do slightly better on Thumb1; in some cases, an sp-relative
2779 // offset would be legal even though an fp-relative offset is not.
2780 int MaxFPOffset = getMaxFPOffset(STI, AFI: *AFI, MF);
2781 bool HasLargeArgumentList =
2782 HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2783
2784 bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
2785 HasLargeArgumentList || HasNonSPFrameIndex;
2786 LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2787 << "; EstimatedStack: " << EstimatedStackSize
2788 << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2789 << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2790 if (BigFrameOffsets ||
2791 !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
2792 AFI->setHasStackFrame(true);
2793
2794 // Save the FP if:
2795 // 1. We currently need it (HasFP), OR
2796 // 2. We might need it later due to stack realignment from aligned DPRCS2
2797 // saves (which will make hasFP() become true in emitPrologue).
2798 if (HasFP || (isFPReserved(MF) && AFI->getNumAlignedDPRCS2Regs() > 0)) {
2799 SavedRegs.set(FramePtr);
2800 // If the frame pointer is required by the ABI, also spill LR so that we
2801 // emit a complete frame record.
2802 if ((requiresAAPCSFrameRecord(MF) ||
2803 MF.getTarget().Options.DisableFramePointerElim(MF)) &&
2804 !LRSpilled) {
2805 SavedRegs.set(ARM::LR);
2806 LRSpilled = true;
2807 NumGPRSpills++;
2808 auto LRPos = llvm::find(Range&: UnspilledCS1GPRs, Val: ARM::LR);
2809 if (LRPos != UnspilledCS1GPRs.end())
2810 UnspilledCS1GPRs.erase(CI: LRPos);
2811 }
2812 auto FPPos = llvm::find(Range&: UnspilledCS1GPRs, Val: FramePtr);
2813 if (FPPos != UnspilledCS1GPRs.end())
2814 UnspilledCS1GPRs.erase(CI: FPPos);
2815 NumGPRSpills++;
2816 if (FramePtr == ARM::R7)
2817 CS1Spilled = true;
2818 }
2819
2820 // This is the number of extra spills inserted for callee-save GPRs which
2821 // would not otherwise be used by the function. When greater than zero it
2822 // guaranteees that it is possible to scavenge a register to hold the
2823 // address of a stack slot. On Thumb1, the register must be a valid operand
2824 // to tSTRi, i.e. r4-r7. For other subtargets, this is any GPR, i.e. r4-r11
2825 // or lr.
2826 //
2827 // If we don't insert a spill, we instead allocate an emergency spill
2828 // slot, which can be used by scavenging to spill an arbitrary register.
2829 //
2830 // We currently don't try to figure out whether any specific instruction
2831 // requires scavening an additional register.
2832 unsigned NumExtraCSSpill = 0;
2833
2834 if (AFI->isThumb1OnlyFunction()) {
2835 // For Thumb1-only targets, we need some low registers when we save and
2836 // restore the high registers (which aren't allocatable, but could be
2837 // used by inline assembly) because the push/pop instructions can not
2838 // access high registers. If necessary, we might need to push more low
2839 // registers to ensure that there is at least one free that can be used
2840 // for the saving & restoring, and preferably we should ensure that as
2841 // many as are needed are available so that fewer push/pop instructions
2842 // are required.
2843
2844 // Low registers which are not currently pushed, but could be (r4-r7).
2845 SmallVector<unsigned, 4> AvailableRegs;
2846
2847 // Unused argument registers (r0-r3) can be clobbered in the prologue for
2848 // free.
2849 int EntryRegDeficit = 0;
2850 for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2851 if (!MF.getRegInfo().isLiveIn(Reg)) {
2852 --EntryRegDeficit;
2853 LLVM_DEBUG(dbgs()
2854 << printReg(Reg, TRI)
2855 << " is unused argument register, EntryRegDeficit = "
2856 << EntryRegDeficit << "\n");
2857 }
2858 }
2859
2860 // Unused return registers can be clobbered in the epilogue for free.
2861 int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
2862 LLVM_DEBUG(dbgs() << AFI->getReturnRegsCount()
2863 << " return regs used, ExitRegDeficit = "
2864 << ExitRegDeficit << "\n");
2865
2866 int RegDeficit = std::max(a: EntryRegDeficit, b: ExitRegDeficit);
2867 LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2868
2869 // r4-r6 can be used in the prologue if they are pushed by the first push
2870 // instruction.
2871 for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2872 if (SavedRegs.test(Idx: Reg)) {
2873 --RegDeficit;
2874 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2875 << " is saved low register, RegDeficit = "
2876 << RegDeficit << "\n");
2877 } else {
2878 AvailableRegs.push_back(Elt: Reg);
2879 LLVM_DEBUG(
2880 dbgs()
2881 << printReg(Reg, TRI)
2882 << " is non-saved low register, adding to AvailableRegs\n");
2883 }
2884 }
2885
2886 // r7 can be used if it is not being used as the frame pointer.
2887 if (!HasFP || FramePtr != ARM::R7) {
2888 if (SavedRegs.test(Idx: ARM::R7)) {
2889 --RegDeficit;
2890 LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2891 << RegDeficit << "\n");
2892 } else {
2893 AvailableRegs.push_back(Elt: ARM::R7);
2894 LLVM_DEBUG(
2895 dbgs()
2896 << "%r7 is non-saved low register, adding to AvailableRegs\n");
2897 }
2898 }
2899
2900 // Each of r8-r11 needs to be copied to a low register, then pushed.
2901 for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2902 if (SavedRegs.test(Idx: Reg)) {
2903 ++RegDeficit;
2904 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2905 << " is saved high register, RegDeficit = "
2906 << RegDeficit << "\n");
2907 }
2908 }
2909
2910 // LR can only be used by PUSH, not POP, and can't be used at all if the
2911 // llvm.returnaddress intrinsic is used. This is only worth doing if we
2912 // are more limited at function entry than exit.
2913 if ((EntryRegDeficit > ExitRegDeficit) &&
2914 !(MF.getRegInfo().isLiveIn(Reg: ARM::LR) &&
2915 MF.getFrameInfo().isReturnAddressTaken())) {
2916 if (SavedRegs.test(Idx: ARM::LR)) {
2917 --RegDeficit;
2918 LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2919 << RegDeficit << "\n");
2920 } else {
2921 AvailableRegs.push_back(Elt: ARM::LR);
2922 LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2923 }
2924 }
2925
2926 // If there are more high registers that need pushing than low registers
2927 // available, push some more low registers so that we can use fewer push
2928 // instructions. This might not reduce RegDeficit all the way to zero,
2929 // because we can only guarantee that r4-r6 are available, but r8-r11 may
2930 // need saving.
2931 LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2932 for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
2933 unsigned Reg = AvailableRegs.pop_back_val();
2934 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2935 << " to make up reg deficit\n");
2936 SavedRegs.set(Reg);
2937 NumGPRSpills++;
2938 CS1Spilled = true;
2939 assert(!MRI.isReserved(Reg) && "Should not be reserved");
2940 if (Reg != ARM::LR && !MRI.isPhysRegUsed(PhysReg: Reg))
2941 NumExtraCSSpill++;
2942 UnspilledCS1GPRs.erase(CI: llvm::find(Range&: UnspilledCS1GPRs, Val: Reg));
2943 if (Reg == ARM::LR)
2944 LRSpilled = true;
2945 }
2946 LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2947 << "\n");
2948 }
2949
2950 // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2951 // restore LR in that case.
2952 bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2953
2954 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2955 // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2956 if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2957 SavedRegs.set(ARM::LR);
2958 NumGPRSpills++;
2959 SmallVectorImpl<unsigned>::iterator LRPos;
2960 LRPos = llvm::find(Range&: UnspilledCS1GPRs, Val: (unsigned)ARM::LR);
2961 if (LRPos != UnspilledCS1GPRs.end())
2962 UnspilledCS1GPRs.erase(CI: LRPos);
2963
2964 ForceLRSpill = false;
2965 if (!MRI.isReserved(PhysReg: ARM::LR) && !MRI.isPhysRegUsed(PhysReg: ARM::LR) &&
2966 !AFI->isThumb1OnlyFunction())
2967 NumExtraCSSpill++;
2968 }
2969
2970 // If stack and double are 8-byte aligned and we are spilling an odd number
2971 // of GPRs, spill one extra callee save GPR so we won't have to pad between
2972 // the integer and double callee save areas.
2973 LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2974 const Align TargetAlign = getStackAlign();
2975 if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
2976 if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2977 for (unsigned Reg : UnspilledCS1GPRs) {
2978 // Don't spill high register if the function is thumb. In the case of
2979 // Windows on ARM, accept R11 (frame pointer)
2980 if (!AFI->isThumbFunction() ||
2981 (STI.isTargetWindows() && Reg == ARM::R11) ||
2982 isARMLowRegister(Reg) ||
2983 (Reg == ARM::LR && !ExpensiveLRRestore)) {
2984 SavedRegs.set(Reg);
2985 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2986 << " to make up alignment\n");
2987 if (!MRI.isReserved(PhysReg: Reg) && !MRI.isPhysRegUsed(PhysReg: Reg) &&
2988 !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2989 NumExtraCSSpill++;
2990 break;
2991 }
2992 }
2993 } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
2994 unsigned Reg = UnspilledCS2GPRs.front();
2995 SavedRegs.set(Reg);
2996 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2997 << " to make up alignment\n");
2998 if (!MRI.isReserved(PhysReg: Reg) && !MRI.isPhysRegUsed(PhysReg: Reg))
2999 NumExtraCSSpill++;
3000 }
3001 }
3002
3003 // Estimate if we might need to scavenge registers at some point in order
3004 // to materialize a stack offset. If so, either spill one additional
3005 // callee-saved register or reserve a special spill slot to facilitate
3006 // register scavenging. Thumb1 needs a spill slot for stack pointer
3007 // adjustments and for frame index accesses when FP is high register,
3008 // even when the frame itself is small.
3009 unsigned RegsNeeded = 0;
3010 if (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, TFI: *this)) {
3011 RegsNeeded++;
3012 // With thumb1 execute-only we may need an additional register for saving
3013 // and restoring the CPSR.
3014 if (AFI->isThumb1OnlyFunction() && STI.genExecuteOnly() && !STI.useMovt())
3015 RegsNeeded++;
3016 }
3017
3018 if (RegsNeeded > NumExtraCSSpill) {
3019 // If any non-reserved CS register isn't spilled, just spill one or two
3020 // extra. That should take care of it!
3021 unsigned NumExtras = TargetAlign.value() / 4;
3022 SmallVector<unsigned, 2> Extras;
3023 while (NumExtras && !UnspilledCS1GPRs.empty()) {
3024 unsigned Reg = UnspilledCS1GPRs.pop_back_val();
3025 if (!MRI.isReserved(PhysReg: Reg) &&
3026 (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
3027 Extras.push_back(Elt: Reg);
3028 NumExtras--;
3029 }
3030 }
3031 // For non-Thumb1 functions, also check for hi-reg CS registers
3032 if (!AFI->isThumb1OnlyFunction()) {
3033 while (NumExtras && !UnspilledCS2GPRs.empty()) {
3034 unsigned Reg = UnspilledCS2GPRs.pop_back_val();
3035 if (!MRI.isReserved(PhysReg: Reg)) {
3036 Extras.push_back(Elt: Reg);
3037 NumExtras--;
3038 }
3039 }
3040 }
3041 if (NumExtras == 0) {
3042 for (unsigned Reg : Extras) {
3043 SavedRegs.set(Reg);
3044 if (!MRI.isPhysRegUsed(PhysReg: Reg))
3045 NumExtraCSSpill++;
3046 }
3047 }
3048 while ((RegsNeeded > NumExtraCSSpill) && RS) {
3049 // Reserve a slot closest to SP or frame pointer.
3050 LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
3051 const TargetRegisterClass &RC = ARM::GPRRegClass;
3052 unsigned Size = TRI->getSpillSize(RC);
3053 Align Alignment = TRI->getSpillAlign(RC);
3054 RS->addScavengingFrameIndex(
3055 FI: MFI.CreateSpillStackObject(Size, Alignment));
3056 --RegsNeeded;
3057 }
3058 }
3059 }
3060
3061 if (ForceLRSpill)
3062 SavedRegs.set(ARM::LR);
3063 AFI->setLRIsSpilled(SavedRegs.test(Idx: ARM::LR));
3064}
3065
3066void ARMFrameLowering::updateLRRestored(MachineFunction &MF) {
3067 MachineFrameInfo &MFI = MF.getFrameInfo();
3068 if (!MFI.isCalleeSavedInfoValid())
3069 return;
3070
3071 // Check if all terminators do not implicitly use LR. Then we can 'restore' LR
3072 // into PC so it is not live out of the return block: Clear the Restored bit
3073 // in that case.
3074 for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
3075 if (Info.getReg() != ARM::LR)
3076 continue;
3077 if (all_of(Range&: MF, P: [](const MachineBasicBlock &MBB) {
3078 return all_of(Range: MBB.terminators(), P: [](const MachineInstr &Term) {
3079 return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET ||
3080 Term.getOpcode() == ARM::t2LDMIA_RET ||
3081 Term.getOpcode() == ARM::tPOP_RET;
3082 });
3083 })) {
3084 Info.setRestored(false);
3085 break;
3086 }
3087 }
3088}
3089
3090void ARMFrameLowering::processFunctionBeforeFrameFinalized(
3091 MachineFunction &MF, RegScavenger *RS) const {
3092 TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS);
3093 updateLRRestored(MF);
3094}
3095
3096void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF,
3097 BitVector &SavedRegs) const {
3098 TargetFrameLowering::getCalleeSaves(MF, SavedRegs);
3099
3100 // If we have the "returned" parameter attribute which guarantees that we
3101 // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
3102 // record that fact for IPRA.
3103 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3104 if (AFI->getPreservesR0())
3105 SavedRegs.set(ARM::R0);
3106}
3107
3108bool ARMFrameLowering::assignCalleeSavedSpillSlots(
3109 MachineFunction &MF, const TargetRegisterInfo *TRI,
3110 std::vector<CalleeSavedInfo> &CSI) const {
3111 // For CMSE entry functions, handle floating-point context as if it was a
3112 // callee-saved register.
3113 if (STI.hasV8_1MMainlineOps() &&
3114 MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction()) {
3115 CSI.emplace_back(args: ARM::FPCXTNS);
3116 CSI.back().setRestored(false);
3117 }
3118
3119 // For functions, which sign their return address, upon function entry, the
3120 // return address PAC is computed in R12. Treat R12 as a callee-saved register
3121 // in this case.
3122 const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
3123 if (AFI.shouldSignReturnAddress()) {
3124 // The order of register must match the order we push them, because the
3125 // PEI assigns frame indices in that order. That order depends on the
3126 // PushPopSplitVariation, there are only two cases which we use with return
3127 // address signing:
3128 switch (STI.getPushPopSplitVariation(MF)) {
3129 case ARMSubtarget::SplitR7:
3130 // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
3131 CSI.insert(position: find_if(Range&: CSI,
3132 P: [=](const auto &CS) {
3133 MCRegister Reg = CS.getReg();
3134 return Reg == ARM::R10 || Reg == ARM::R11 ||
3135 Reg == ARM::R8 || Reg == ARM::R9 ||
3136 ARM::DPRRegClass.contains(Reg);
3137 }),
3138 x: CalleeSavedInfo(ARM::R12));
3139 break;
3140 case ARMSubtarget::SplitR11AAPCSSignRA:
3141 // With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR
3142 // on the stack.
3143 CSI.insert(position: CSI.begin(), x: CalleeSavedInfo(ARM::R12));
3144 break;
3145 case ARMSubtarget::NoSplit:
3146 assert(!MF.getTarget().Options.DisableFramePointerElim(MF) &&
3147 "ABI-required frame pointers need a CSR split when signing return "
3148 "address.");
3149 CSI.insert(position: find_if(Range&: CSI,
3150 P: [=](const auto &CS) {
3151 MCRegister Reg = CS.getReg();
3152 return Reg != ARM::LR;
3153 }),
3154 x: CalleeSavedInfo(ARM::R12));
3155 break;
3156 default:
3157 llvm_unreachable("Unexpected CSR split with return address signing");
3158 }
3159 }
3160
3161 return false;
3162}
3163
3164const TargetFrameLowering::SpillSlot *
3165ARMFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
3166 static const SpillSlot FixedSpillOffsets[] = {{.Reg: ARM::FPCXTNS, .Offset: -4}};
3167 NumEntries = std::size(FixedSpillOffsets);
3168 return FixedSpillOffsets;
3169}
3170
3171MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
3172 MachineFunction &MF, MachineBasicBlock &MBB,
3173 MachineBasicBlock::iterator I) const {
3174 const ARMBaseInstrInfo &TII =
3175 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3176 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3177 bool isARM = !AFI->isThumbFunction();
3178 DebugLoc dl = I->getDebugLoc();
3179 unsigned Opc = I->getOpcode();
3180 bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
3181 unsigned CalleePopAmount = IsDestroy ? I->getOperand(i: 1).getImm() : 0;
3182
3183 assert(!AFI->isThumb1OnlyFunction() &&
3184 "This eliminateCallFramePseudoInstr does not support Thumb1!");
3185
3186 int PIdx = I->findFirstPredOperandIdx();
3187 ARMCC::CondCodes Pred = (PIdx == -1)
3188 ? ARMCC::AL
3189 : (ARMCC::CondCodes)I->getOperand(i: PIdx).getImm();
3190 unsigned PredReg = TII.getFramePred(MI: *I);
3191
3192 if (!hasReservedCallFrame(MF)) {
3193 // Bail early if the callee is expected to do the adjustment.
3194 if (IsDestroy && CalleePopAmount != -1U)
3195 return MBB.erase(I);
3196
3197 // If we have alloca, convert as follows:
3198 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
3199 // ADJCALLSTACKUP -> add, sp, sp, amount
3200 unsigned Amount = TII.getFrameSize(I: *I);
3201 if (Amount != 0) {
3202 // We need to keep the stack aligned properly. To do this, we round the
3203 // amount of space needed for the outgoing arguments up to the next
3204 // alignment boundary.
3205 Amount = alignSPAdjust(SPAdj: Amount);
3206
3207 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
3208 emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: -Amount, MIFlags: MachineInstr::NoFlags,
3209 Pred, PredReg);
3210 } else {
3211 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
3212 emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: Amount, MIFlags: MachineInstr::NoFlags,
3213 Pred, PredReg);
3214 }
3215 }
3216 } else if (CalleePopAmount != -1U) {
3217 // If the calling convention demands that the callee pops arguments from the
3218 // stack, we want to add it back if we have a reserved call frame.
3219 emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: -CalleePopAmount,
3220 MIFlags: MachineInstr::NoFlags, Pred, PredReg);
3221 }
3222 return MBB.erase(I);
3223}
3224
3225/// Get the minimum constant for ARM that is greater than or equal to the
3226/// argument. In ARM, constants can have any value that can be produced by
3227/// rotating an 8-bit value to the right by an even number of bits within a
3228/// 32-bit word.
3229static uint32_t alignToARMConstant(uint32_t Value) {
3230 unsigned Shifted = 0;
3231
3232 if (Value == 0)
3233 return 0;
3234
3235 while (!(Value & 0xC0000000)) {
3236 Value = Value << 2;
3237 Shifted += 2;
3238 }
3239
3240 bool Carry = (Value & 0x00FFFFFF);
3241 Value = ((Value & 0xFF000000) >> 24) + Carry;
3242
3243 if (Value & 0x0000100)
3244 Value = Value & 0x000001FC;
3245
3246 if (Shifted > 24)
3247 Value = Value >> (Shifted - 24);
3248 else
3249 Value = Value << (24 - Shifted);
3250
3251 return Value;
3252}
3253
3254// The stack limit in the TCB is set to this many bytes above the actual
3255// stack limit.
3256static const uint64_t kSplitStackAvailable = 256;
3257
3258// Adjust the function prologue to enable split stacks. This currently only
3259// supports android and linux.
3260//
3261// The ABI of the segmented stack prologue is a little arbitrarily chosen, but
3262// must be well defined in order to allow for consistent implementations of the
3263// __morestack helper function. The ABI is also not a normal ABI in that it
3264// doesn't follow the normal calling conventions because this allows the
3265// prologue of each function to be optimized further.
3266//
3267// Currently, the ABI looks like (when calling __morestack)
3268//
3269// * r4 holds the minimum stack size requested for this function call
3270// * r5 holds the stack size of the arguments to the function
3271// * the beginning of the function is 3 instructions after the call to
3272// __morestack
3273//
3274// Implementations of __morestack should use r4 to allocate a new stack, r5 to
3275// place the arguments on to the new stack, and the 3-instruction knowledge to
3276// jump directly to the body of the function when working on the new stack.
3277//
3278// An old (and possibly no longer compatible) implementation of __morestack for
3279// ARM can be found at [1].
3280//
3281// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
3282void ARMFrameLowering::adjustForSegmentedStacks(
3283 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3284 unsigned Opcode;
3285 const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
3286 bool Thumb = ST->isThumb();
3287 bool Thumb2 = ST->isThumb2();
3288
3289 // Sadly, this currently doesn't support varargs, platforms other than
3290 // android/linux. Note that thumb1/thumb2 are support for android/linux.
3291 if (MF.getFunction().isVarArg())
3292 report_fatal_error(reason: "Segmented stacks do not support vararg functions.");
3293 if (!ST->isTargetAndroid() && !ST->isTargetLinux())
3294 report_fatal_error(reason: "Segmented stacks not supported on this platform.");
3295
3296 MachineFrameInfo &MFI = MF.getFrameInfo();
3297 const ARMBaseInstrInfo &TII =
3298 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3299 ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
3300 DebugLoc DL;
3301
3302 if (!MFI.needsSplitStackProlog())
3303 return;
3304
3305 uint64_t StackSize = MFI.getStackSize();
3306
3307 // Use R4 and R5 as scratch registers.
3308 // We save R4 and R5 before use and restore them before leaving the function.
3309 unsigned ScratchReg0 = ARM::R4;
3310 unsigned ScratchReg1 = ARM::R5;
3311 unsigned MovOp = ST->useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
3312 uint64_t AlignedStackSize;
3313
3314 MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
3315 MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
3316 MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
3317 MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
3318 MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
3319
3320 // Grab everything that reaches PrologueMBB to update there liveness as well.
3321 SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
3322 SmallVector<MachineBasicBlock *, 2> WalkList;
3323 WalkList.push_back(Elt: &PrologueMBB);
3324
3325 do {
3326 MachineBasicBlock *CurMBB = WalkList.pop_back_val();
3327 for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
3328 if (BeforePrologueRegion.insert(Ptr: PredBB).second)
3329 WalkList.push_back(Elt: PredBB);
3330 }
3331 } while (!WalkList.empty());
3332
3333 // The order in that list is important.
3334 // The blocks will all be inserted before PrologueMBB using that order.
3335 // Therefore the block that should appear first in the CFG should appear
3336 // first in the list.
3337 MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
3338 PostStackMBB};
3339
3340 BeforePrologueRegion.insert_range(R&: AddedBlocks);
3341
3342 for (const auto &LI : PrologueMBB.liveins()) {
3343 for (MachineBasicBlock *PredBB : BeforePrologueRegion)
3344 PredBB->addLiveIn(RegMaskPair: LI);
3345 }
3346
3347 // Remove the newly added blocks from the list, since we know
3348 // we do not have to do the following updates for them.
3349 for (MachineBasicBlock *B : AddedBlocks) {
3350 BeforePrologueRegion.erase(Ptr: B);
3351 MF.insert(MBBI: PrologueMBB.getIterator(), MBB: B);
3352 }
3353
3354 for (MachineBasicBlock *MBB : BeforePrologueRegion) {
3355 // Make sure the LiveIns are still sorted and unique.
3356 MBB->sortUniqueLiveIns();
3357 // Replace the edges to PrologueMBB by edges to the sequences
3358 // we are about to add, but only update for immediate predecessors.
3359 if (MBB->isSuccessor(MBB: &PrologueMBB))
3360 MBB->ReplaceUsesOfBlockWith(Old: &PrologueMBB, New: AddedBlocks[0]);
3361 }
3362
3363 // The required stack size that is aligned to ARM constant criterion.
3364 AlignedStackSize = alignToARMConstant(Value: StackSize);
3365
3366 // When the frame size is less than 256 we just compare the stack
3367 // boundary directly to the value of the stack pointer, per gcc.
3368 bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
3369
3370 // We will use two of the callee save registers as scratch registers so we
3371 // need to save those registers onto the stack.
3372 // We will use SR0 to hold stack limit and SR1 to hold the stack size
3373 // requested and arguments for __morestack().
3374 // SR0: Scratch Register #0
3375 // SR1: Scratch Register #1
3376 // push {SR0, SR1}
3377 if (Thumb) {
3378 BuildMI(BB: PrevStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPUSH))
3379 .add(MOs: predOps(Pred: ARMCC::AL))
3380 .addReg(RegNo: ScratchReg0)
3381 .addReg(RegNo: ScratchReg1);
3382 } else {
3383 BuildMI(BB: PrevStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::STMDB_UPD))
3384 .addReg(RegNo: ARM::SP, Flags: RegState::Define)
3385 .addReg(RegNo: ARM::SP)
3386 .add(MOs: predOps(Pred: ARMCC::AL))
3387 .addReg(RegNo: ScratchReg0)
3388 .addReg(RegNo: ScratchReg1);
3389 }
3390
3391 // Emit the relevant DWARF information about the change in stack pointer as
3392 // well as where to find both r4 and r5 (the callee-save registers)
3393 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3394 CFIInstBuilder CFIBuilder(PrevStackMBB, MachineInstr::NoFlags);
3395 CFIBuilder.buildDefCFAOffset(Offset: 8);
3396 CFIBuilder.buildOffset(Reg: ScratchReg1, Offset: -4);
3397 CFIBuilder.buildOffset(Reg: ScratchReg0, Offset: -8);
3398 }
3399
3400 // mov SR1, sp
3401 if (Thumb) {
3402 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ScratchReg1)
3403 .addReg(RegNo: ARM::SP)
3404 .add(MOs: predOps(Pred: ARMCC::AL));
3405 } else if (CompareStackPointer) {
3406 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVr), DestReg: ScratchReg1)
3407 .addReg(RegNo: ARM::SP)
3408 .add(MOs: predOps(Pred: ARMCC::AL))
3409 .add(MO: condCodeOp());
3410 }
3411
3412 // sub SR1, sp, #StackSize
3413 if (!CompareStackPointer && Thumb) {
3414 if (AlignedStackSize < 256) {
3415 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tSUBi8), DestReg: ScratchReg1)
3416 .add(MO: condCodeOp())
3417 .addReg(RegNo: ScratchReg1)
3418 .addImm(Val: AlignedStackSize)
3419 .add(MOs: predOps(Pred: ARMCC::AL));
3420 } else {
3421 if (Thumb2 || ST->genExecuteOnly()) {
3422 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg0)
3423 .addImm(Val: AlignedStackSize);
3424 } else {
3425 auto MBBI = McrMBB->end();
3426 auto RegInfo = STI.getRegisterInfo();
3427 RegInfo->emitLoadConstPool(MBB&: *McrMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: 0,
3428 Val: AlignedStackSize);
3429 }
3430 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tSUBrr), DestReg: ScratchReg1)
3431 .add(MO: condCodeOp())
3432 .addReg(RegNo: ScratchReg1)
3433 .addReg(RegNo: ScratchReg0)
3434 .add(MOs: predOps(Pred: ARMCC::AL));
3435 }
3436 } else if (!CompareStackPointer) {
3437 if (AlignedStackSize < 256) {
3438 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::SUBri), DestReg: ScratchReg1)
3439 .addReg(RegNo: ARM::SP)
3440 .addImm(Val: AlignedStackSize)
3441 .add(MOs: predOps(Pred: ARMCC::AL))
3442 .add(MO: condCodeOp());
3443 } else {
3444 auto MBBI = McrMBB->end();
3445 auto RegInfo = STI.getRegisterInfo();
3446 RegInfo->emitLoadConstPool(MBB&: *McrMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: 0,
3447 Val: AlignedStackSize);
3448 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::SUBrr), DestReg: ScratchReg1)
3449 .addReg(RegNo: ARM::SP)
3450 .addReg(RegNo: ScratchReg0)
3451 .add(MOs: predOps(Pred: ARMCC::AL))
3452 .add(MO: condCodeOp());
3453 }
3454 }
3455
3456 if (Thumb && ST->isThumb1Only()) {
3457 if (ST->genExecuteOnly()) {
3458 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg0)
3459 .addExternalSymbol(FnName: "__STACK_LIMIT");
3460 } else {
3461 unsigned PCLabelId = ARMFI->createPICLabelUId();
3462 ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(
3463 C&: MF.getFunction().getContext(), s: "__STACK_LIMIT", ID: PCLabelId, PCAdj: 0);
3464 MachineConstantPool *MCP = MF.getConstantPool();
3465 unsigned CPI = MCP->getConstantPoolIndex(V: NewCPV, Alignment: Align(4));
3466
3467 // ldr SR0, [pc, offset(STACK_LIMIT)]
3468 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tLDRpci), DestReg: ScratchReg0)
3469 .addConstantPoolIndex(Idx: CPI)
3470 .add(MOs: predOps(Pred: ARMCC::AL));
3471 }
3472
3473 // ldr SR0, [SR0]
3474 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tLDRi), DestReg: ScratchReg0)
3475 .addReg(RegNo: ScratchReg0)
3476 .addImm(Val: 0)
3477 .add(MOs: predOps(Pred: ARMCC::AL));
3478 } else {
3479 // Get TLS base address from the coprocessor
3480 // mrc p15, #0, SR0, c13, c0, #3
3481 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: Thumb ? ARM::t2MRC : ARM::MRC),
3482 DestReg: ScratchReg0)
3483 .addImm(Val: 15)
3484 .addImm(Val: 0)
3485 .addImm(Val: 13)
3486 .addImm(Val: 0)
3487 .addImm(Val: 3)
3488 .add(MOs: predOps(Pred: ARMCC::AL));
3489
3490 // Use the last tls slot on android and a private field of the TCP on linux.
3491 assert(ST->isTargetAndroid() || ST->isTargetLinux());
3492 unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
3493
3494 // Get the stack limit from the right offset
3495 // ldr SR0, [sr0, #4 * TlsOffset]
3496 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
3497 DestReg: ScratchReg0)
3498 .addReg(RegNo: ScratchReg0)
3499 .addImm(Val: 4 * TlsOffset)
3500 .add(MOs: predOps(Pred: ARMCC::AL));
3501 }
3502
3503 // Compare stack limit with stack size requested.
3504 // cmp SR0, SR1
3505 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
3506 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode))
3507 .addReg(RegNo: ScratchReg0)
3508 .addReg(RegNo: ScratchReg1)
3509 .add(MOs: predOps(Pred: ARMCC::AL));
3510
3511 // This jump is taken if StackLimit <= SP - stack required.
3512 Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
3513 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode))
3514 .addMBB(MBB: PostStackMBB)
3515 .addImm(Val: ARMCC::LS)
3516 .addReg(RegNo: ARM::CPSR);
3517
3518 // Calling __morestack(StackSize, Size of stack arguments).
3519 // __morestack knows that the stack size requested is in SR0(r4)
3520 // and amount size of stack arguments is in SR1(r5).
3521
3522 // Pass first argument for the __morestack by Scratch Register #0.
3523 // The amount size of stack required
3524 if (Thumb) {
3525 if (AlignedStackSize < 256) {
3526 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVi8), DestReg: ScratchReg0)
3527 .add(MO: condCodeOp())
3528 .addImm(Val: AlignedStackSize)
3529 .add(MOs: predOps(Pred: ARMCC::AL));
3530 } else {
3531 if (Thumb2 || ST->genExecuteOnly()) {
3532 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg0)
3533 .addImm(Val: AlignedStackSize);
3534 } else {
3535 auto MBBI = AllocMBB->end();
3536 auto RegInfo = STI.getRegisterInfo();
3537 RegInfo->emitLoadConstPool(MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: 0,
3538 Val: AlignedStackSize);
3539 }
3540 }
3541 } else {
3542 if (AlignedStackSize < 256) {
3543 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVi), DestReg: ScratchReg0)
3544 .addImm(Val: AlignedStackSize)
3545 .add(MOs: predOps(Pred: ARMCC::AL))
3546 .add(MO: condCodeOp());
3547 } else {
3548 auto MBBI = AllocMBB->end();
3549 auto RegInfo = STI.getRegisterInfo();
3550 RegInfo->emitLoadConstPool(MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: 0,
3551 Val: AlignedStackSize);
3552 }
3553 }
3554
3555 // Pass second argument for the __morestack by Scratch Register #1.
3556 // The amount size of stack consumed to save function arguments.
3557 if (Thumb) {
3558 if (ARMFI->getArgumentStackSize() < 256) {
3559 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVi8), DestReg: ScratchReg1)
3560 .add(MO: condCodeOp())
3561 .addImm(Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()))
3562 .add(MOs: predOps(Pred: ARMCC::AL));
3563 } else {
3564 if (Thumb2 || ST->genExecuteOnly()) {
3565 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg1)
3566 .addImm(Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3567 } else {
3568 auto MBBI = AllocMBB->end();
3569 auto RegInfo = STI.getRegisterInfo();
3570 RegInfo->emitLoadConstPool(
3571 MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg1, SubIdx: 0,
3572 Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3573 }
3574 }
3575 } else {
3576 if (alignToARMConstant(Value: ARMFI->getArgumentStackSize()) < 256) {
3577 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVi), DestReg: ScratchReg1)
3578 .addImm(Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()))
3579 .add(MOs: predOps(Pred: ARMCC::AL))
3580 .add(MO: condCodeOp());
3581 } else {
3582 auto MBBI = AllocMBB->end();
3583 auto RegInfo = STI.getRegisterInfo();
3584 RegInfo->emitLoadConstPool(
3585 MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg1, SubIdx: 0,
3586 Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3587 }
3588 }
3589
3590 // push {lr} - Save return address of this function.
3591 if (Thumb) {
3592 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPUSH))
3593 .add(MOs: predOps(Pred: ARMCC::AL))
3594 .addReg(RegNo: ARM::LR);
3595 } else {
3596 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::STMDB_UPD))
3597 .addReg(RegNo: ARM::SP, Flags: RegState::Define)
3598 .addReg(RegNo: ARM::SP)
3599 .add(MOs: predOps(Pred: ARMCC::AL))
3600 .addReg(RegNo: ARM::LR);
3601 }
3602
3603 // Emit the DWARF info about the change in stack as well as where to find the
3604 // previous link register
3605 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3606 CFIInstBuilder CFIBuilder(AllocMBB, MachineInstr::NoFlags);
3607 CFIBuilder.buildDefCFAOffset(Offset: 12);
3608 CFIBuilder.buildOffset(Reg: ARM::LR, Offset: -12);
3609 }
3610
3611 // Call __morestack().
3612 if (Thumb) {
3613 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tBL))
3614 .add(MOs: predOps(Pred: ARMCC::AL))
3615 .addExternalSymbol(FnName: "__morestack");
3616 } else {
3617 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::BL))
3618 .addExternalSymbol(FnName: "__morestack");
3619 }
3620
3621 // pop {lr} - Restore return address of this original function.
3622 if (Thumb) {
3623 if (ST->isThumb1Only()) {
3624 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP))
3625 .add(MOs: predOps(Pred: ARMCC::AL))
3626 .addReg(RegNo: ScratchReg0);
3627 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::LR)
3628 .addReg(RegNo: ScratchReg0)
3629 .add(MOs: predOps(Pred: ARMCC::AL));
3630 } else {
3631 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::t2LDR_POST))
3632 .addReg(RegNo: ARM::LR, Flags: RegState::Define)
3633 .addReg(RegNo: ARM::SP, Flags: RegState::Define)
3634 .addReg(RegNo: ARM::SP)
3635 .addImm(Val: 4)
3636 .add(MOs: predOps(Pred: ARMCC::AL));
3637 }
3638 } else {
3639 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::LDMIA_UPD))
3640 .addReg(RegNo: ARM::SP, Flags: RegState::Define)
3641 .addReg(RegNo: ARM::SP)
3642 .add(MOs: predOps(Pred: ARMCC::AL))
3643 .addReg(RegNo: ARM::LR);
3644 }
3645
3646 // Restore SR0 and SR1 in case of __morestack() was called.
3647 // __morestack() will skip PostStackMBB block so we need to restore
3648 // scratch registers from here.
3649 // pop {SR0, SR1}
3650 if (Thumb) {
3651 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP))
3652 .add(MOs: predOps(Pred: ARMCC::AL))
3653 .addReg(RegNo: ScratchReg0)
3654 .addReg(RegNo: ScratchReg1);
3655 } else {
3656 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::LDMIA_UPD))
3657 .addReg(RegNo: ARM::SP, Flags: RegState::Define)
3658 .addReg(RegNo: ARM::SP)
3659 .add(MOs: predOps(Pred: ARMCC::AL))
3660 .addReg(RegNo: ScratchReg0)
3661 .addReg(RegNo: ScratchReg1);
3662 }
3663
3664 // Update the CFA offset now that we've popped
3665 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
3666 CFIInstBuilder(AllocMBB, MachineInstr::NoFlags).buildDefCFAOffset(Offset: 0);
3667
3668 // Return from this function.
3669 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ST->getReturnOpcode())).add(MOs: predOps(Pred: ARMCC::AL));
3670
3671 // Restore SR0 and SR1 in case of __morestack() was not called.
3672 // pop {SR0, SR1}
3673 if (Thumb) {
3674 BuildMI(BB: PostStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP))
3675 .add(MOs: predOps(Pred: ARMCC::AL))
3676 .addReg(RegNo: ScratchReg0)
3677 .addReg(RegNo: ScratchReg1);
3678 } else {
3679 BuildMI(BB: PostStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::LDMIA_UPD))
3680 .addReg(RegNo: ARM::SP, Flags: RegState::Define)
3681 .addReg(RegNo: ARM::SP)
3682 .add(MOs: predOps(Pred: ARMCC::AL))
3683 .addReg(RegNo: ScratchReg0)
3684 .addReg(RegNo: ScratchReg1);
3685 }
3686
3687 // Update the CFA offset now that we've popped
3688 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3689 CFIInstBuilder CFIBuilder(PostStackMBB, MachineInstr::NoFlags);
3690 CFIBuilder.buildDefCFAOffset(Offset: 0);
3691
3692 // Tell debuggers that r4 and r5 are now the same as they were in the
3693 // previous function, that they're the "Same Value".
3694 CFIBuilder.buildSameValue(Reg: ScratchReg0);
3695 CFIBuilder.buildSameValue(Reg: ScratchReg1);
3696 }
3697
3698 // Organizing MBB lists
3699 PostStackMBB->addSuccessor(Succ: &PrologueMBB);
3700
3701 AllocMBB->addSuccessor(Succ: PostStackMBB);
3702
3703 GetMBB->addSuccessor(Succ: PostStackMBB);
3704 GetMBB->addSuccessor(Succ: AllocMBB);
3705
3706 McrMBB->addSuccessor(Succ: GetMBB);
3707
3708 PrevStackMBB->addSuccessor(Succ: McrMBB);
3709
3710#ifdef EXPENSIVE_CHECKS
3711 MF.verify();
3712#endif
3713}
3714