1//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the ARM implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12//
13// This file contains the ARM implementation of TargetFrameLowering class.
14//
15// On ARM, stack frames are structured as follows:
16//
17// The stack grows downward.
18//
19// All of the individual frame areas on the frame below are optional, i.e. it's
20// possible to create a function so that the particular area isn't present
21// in the frame.
22//
23// At function entry, the "frame" looks as follows:
24//
25// | | Higher address
26// |-----------------------------------|
27// | |
28// | arguments passed on the stack |
29// | |
30// |-----------------------------------| <- sp
31// | | Lower address
32//
33//
34// After the prologue has run, the frame has the following general structure.
35// Technically the last frame area (VLAs) doesn't get created until in the
36// main function body, after the prologue is run. However, it's depicted here
37// for completeness.
38//
39// | | Higher address
40// |-----------------------------------|
41// | |
42// | arguments passed on the stack |
43// | |
44// |-----------------------------------| <- (sp at function entry)
45// | |
46// | varargs from registers |
47// | |
48// |-----------------------------------|
49// | |
50// | prev_lr |
51// | prev_fp |
52// | (a.k.a. "frame record") |
53// | |
54// |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
55// | |
56// | callee-saved gpr registers |
57// | |
58// |-----------------------------------|
59// | |
60// | callee-saved fp/simd regs |
61// | |
62// |-----------------------------------|
63// |.empty.space.to.make.part.below....|
64// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
65// |.the.standard.8-byte.alignment.....| compile time; if present)
66// |-----------------------------------|
67// | |
68// | local variables of fixed size |
69// | including spill slots |
70// |-----------------------------------| <- base pointer (not defined by ABI,
71// |.variable-sized.local.variables....| LLVM chooses r6)
72// |.(VLAs)............................| (size of this area is unknown at
73// |...................................| compile time)
74// |-----------------------------------| <- sp
75// | | Lower address
76//
77//
78// To access the data in a frame, at-compile time, a constant offset must be
79// computable from one of the pointers (fp, bp, sp) to access it. The size
80// of the areas with a dotted background cannot be computed at compile-time
81// if they are present, making it required to have all three of fp, bp and
82// sp to be set up to be able to access all contents in the frame areas,
83// assuming all of the frame areas are non-empty.
84//
85// For most functions, some of the frame areas are empty. For those functions,
86// it may not be necessary to set up fp or bp:
87// * A base pointer is definitely needed when there are both VLAs and local
88// variables with more-than-default alignment requirements.
89// * A frame pointer is definitely needed when there are local variables with
90// more-than-default alignment requirements.
91//
92// In some cases when a base pointer is not strictly needed, it is generated
93// anyway when offsets from the frame pointer to access local variables become
94// so large that the offset can't be encoded in the immediate fields of loads
95// or stores.
96//
97// The frame pointer might be chosen to be r7 or r11, depending on the target
98// architecture and operating system. See ARMSubtarget::getFramePointerReg for
99// details.
100//
101// Outgoing function arguments must be at the bottom of the stack frame when
102// calling another function. If we do not have variable-sized stack objects, we
103// can allocate a "reserved call frame" area at the bottom of the local
104// variable area, large enough for all outgoing calls. If we do have VLAs, then
105// the stack pointer must be decremented and incremented around each call to
106// make space for the arguments below the VLAs.
107//
108//===----------------------------------------------------------------------===//
109
110#include "ARMFrameLowering.h"
111#include "ARMBaseInstrInfo.h"
112#include "ARMBaseRegisterInfo.h"
113#include "ARMConstantPoolValue.h"
114#include "ARMMachineFunctionInfo.h"
115#include "ARMSubtarget.h"
116#include "MCTargetDesc/ARMAddressingModes.h"
117#include "MCTargetDesc/ARMBaseInfo.h"
118#include "Utils/ARMBaseInfo.h"
119#include "llvm/ADT/BitVector.h"
120#include "llvm/ADT/STLExtras.h"
121#include "llvm/ADT/SmallPtrSet.h"
122#include "llvm/ADT/SmallVector.h"
123#include "llvm/CodeGen/CFIInstBuilder.h"
124#include "llvm/CodeGen/MachineBasicBlock.h"
125#include "llvm/CodeGen/MachineConstantPool.h"
126#include "llvm/CodeGen/MachineFrameInfo.h"
127#include "llvm/CodeGen/MachineFunction.h"
128#include "llvm/CodeGen/MachineInstr.h"
129#include "llvm/CodeGen/MachineInstrBuilder.h"
130#include "llvm/CodeGen/MachineJumpTableInfo.h"
131#include "llvm/CodeGen/MachineModuleInfo.h"
132#include "llvm/CodeGen/MachineOperand.h"
133#include "llvm/CodeGen/MachineRegisterInfo.h"
134#include "llvm/CodeGen/RegisterScavenging.h"
135#include "llvm/CodeGen/TargetInstrInfo.h"
136#include "llvm/CodeGen/TargetRegisterInfo.h"
137#include "llvm/CodeGen/TargetSubtargetInfo.h"
138#include "llvm/IR/Attributes.h"
139#include "llvm/IR/CallingConv.h"
140#include "llvm/IR/DebugLoc.h"
141#include "llvm/IR/Function.h"
142#include "llvm/MC/MCAsmInfo.h"
143#include "llvm/MC/MCInstrDesc.h"
144#include "llvm/Support/CodeGen.h"
145#include "llvm/Support/CommandLine.h"
146#include "llvm/Support/Compiler.h"
147#include "llvm/Support/Debug.h"
148#include "llvm/Support/ErrorHandling.h"
149#include "llvm/Support/raw_ostream.h"
150#include "llvm/Target/TargetMachine.h"
151#include "llvm/Target/TargetOptions.h"
152#include <algorithm>
153#include <cassert>
154#include <cstddef>
155#include <cstdint>
156#include <iterator>
157#include <utility>
158#include <vector>
159
160#define DEBUG_TYPE "arm-frame-lowering"
161
162using namespace llvm;
163
164static cl::opt<bool>
165SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(Val: true),
166 cl::desc("Align ARM NEON spills in prolog and epilog"));
167
168static MachineBasicBlock::iterator
169skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
170 unsigned NumAlignedDPRCS2Regs);
171
172enum class SpillArea {
173 GPRCS1,
174 GPRCS2,
175 FPStatus,
176 DPRCS1,
177 DPRCS2,
178 GPRCS3,
179 FPCXT,
180};
181
182/// Get the spill area that Reg should be saved into in the prologue.
183SpillArea getSpillArea(Register Reg,
184 ARMSubtarget::PushPopSplitVariation Variation,
185 unsigned NumAlignedDPRCS2Regs,
186 const ARMBaseRegisterInfo *RegInfo) {
187 // NoSplit:
188 // push {r0-r12, lr} GPRCS1
189 // vpush {r8-d15} DPRCS1
190 //
191 // SplitR7:
192 // push {r0-r7, lr} GPRCS1
193 // push {r8-r12} GPRCS2
194 // vpush {r8-d15} DPRCS1
195 //
196 // SplitR11WindowsSEH:
197 // push {r0-r10, r12} GPRCS1
198 // vpush {r8-d15} DPRCS1
199 // push {r11, lr} GPRCS3
200 //
201 // SplitR11AAPCSSignRA:
202 // push {r0-r10, r12} GPRSC1
203 // push {r11, lr} GPRCS2
204 // vpush {r8-d15} DPRCS1
205
206 // If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
207 // the top of the stack frame.
208 // The DPRCS2 region is used for ABIs which only guarantee 4-byte alignment
209 // of SP. If used, it will be below the other save areas, after the stack has
210 // been re-aligned.
211
212 switch (Reg) {
213 default:
214 dbgs() << "Don't know where to spill " << printReg(Reg, TRI: RegInfo) << "\n";
215 llvm_unreachable("Don't know where to spill this register");
216 break;
217
218 case ARM::FPCXTNS:
219 return SpillArea::FPCXT;
220
221 case ARM::FPSCR:
222 case ARM::FPEXC:
223 return SpillArea::FPStatus;
224
225 case ARM::R0:
226 case ARM::R1:
227 case ARM::R2:
228 case ARM::R3:
229 case ARM::R4:
230 case ARM::R5:
231 case ARM::R6:
232 case ARM::R7:
233 return SpillArea::GPRCS1;
234
235 case ARM::R8:
236 case ARM::R9:
237 case ARM::R10:
238 if (Variation == ARMSubtarget::SplitR7)
239 return SpillArea::GPRCS2;
240 else
241 return SpillArea::GPRCS1;
242
243 case ARM::R11:
244 if (Variation == ARMSubtarget::SplitR7 ||
245 Variation == ARMSubtarget::SplitR11AAPCSSignRA)
246 return SpillArea::GPRCS2;
247 if (Variation == ARMSubtarget::SplitR11WindowsSEH)
248 return SpillArea::GPRCS3;
249
250 return SpillArea::GPRCS1;
251
252 case ARM::R12:
253 if (Variation == ARMSubtarget::SplitR7)
254 return SpillArea::GPRCS2;
255 else
256 return SpillArea::GPRCS1;
257
258 case ARM::LR:
259 if (Variation == ARMSubtarget::SplitR11AAPCSSignRA)
260 return SpillArea::GPRCS2;
261 if (Variation == ARMSubtarget::SplitR11WindowsSEH)
262 return SpillArea::GPRCS3;
263
264 return SpillArea::GPRCS1;
265
266 case ARM::D0:
267 case ARM::D1:
268 case ARM::D2:
269 case ARM::D3:
270 case ARM::D4:
271 case ARM::D5:
272 case ARM::D6:
273 case ARM::D7:
274 return SpillArea::DPRCS1;
275
276 case ARM::D8:
277 case ARM::D9:
278 case ARM::D10:
279 case ARM::D11:
280 case ARM::D12:
281 case ARM::D13:
282 case ARM::D14:
283 case ARM::D15:
284 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
285 return SpillArea::DPRCS2;
286 else
287 return SpillArea::DPRCS1;
288
289 case ARM::D16:
290 case ARM::D17:
291 case ARM::D18:
292 case ARM::D19:
293 case ARM::D20:
294 case ARM::D21:
295 case ARM::D22:
296 case ARM::D23:
297 case ARM::D24:
298 case ARM::D25:
299 case ARM::D26:
300 case ARM::D27:
301 case ARM::D28:
302 case ARM::D29:
303 case ARM::D30:
304 case ARM::D31:
305 return SpillArea::DPRCS1;
306 }
307}
308
309ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
310 : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
311 STI(sti) {}
312
313bool ARMFrameLowering::keepFramePointer(const MachineFunction &MF) const {
314 // iOS always has a FP for backtracking, force other targets to keep their FP
315 // when doing FastISel. The emitted code is currently superior, and in cases
316 // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
317 return MF.getSubtarget<ARMSubtarget>().useFastISel();
318}
319
320/// Returns true if the target can safely skip saving callee-saved registers
321/// for noreturn nounwind functions.
322bool ARMFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const {
323 assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
324 MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
325 !MF.getFunction().hasFnAttribute(Attribute::UWTable));
326
327 // Frame pointer and link register are not treated as normal CSR, thus we
328 // can always skip CSR saves for nonreturning functions.
329 return true;
330}
331
332/// hasFPImpl - Return true if the specified function should have a dedicated
333/// frame pointer register. This is true if the function has variable sized
334/// allocas or if frame pointer elimination is disabled.
335bool ARMFrameLowering::hasFPImpl(const MachineFunction &MF) const {
336 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
337 const MachineFrameInfo &MFI = MF.getFrameInfo();
338
339 // Check to see if the target want to forcibly keep frame pointer.
340 if (keepFramePointer(MF))
341 return true;
342
343 // ABI-required frame pointer.
344 if (MF.getTarget().Options.DisableFramePointerElim(MF))
345 return true;
346
347 // Frame pointer required for use within this function.
348 return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
349 MFI.isFrameAddressTaken());
350}
351
352/// isFPReserved - Return true if the frame pointer register should be
353/// considered a reserved register on the scope of the specified function.
354bool ARMFrameLowering::isFPReserved(const MachineFunction &MF) const {
355 return hasFP(MF) || MF.getTarget().Options.FramePointerIsReserved(MF);
356}
357
358/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
359/// not required, we reserve argument space for call sites in the function
360/// immediately on entry to the current function. This eliminates the need for
361/// add/sub sp brackets around call sites. Returns true if the call frame is
362/// included as part of the stack frame.
363bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
364 const MachineFrameInfo &MFI = MF.getFrameInfo();
365 unsigned CFSize = MFI.getMaxCallFrameSize();
366 // It's not always a good idea to include the call frame as part of the
367 // stack frame. ARM (especially Thumb) has small immediate offset to
368 // address the stack frame. So a large call frame can cause poor codegen
369 // and may even makes it impossible to scavenge a register.
370 if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
371 return false;
372
373 return !MFI.hasVarSizedObjects();
374}
375
376/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
377/// call frame pseudos can be simplified. Unlike most targets, having a FP
378/// is not sufficient here since we still may reference some objects via SP
379/// even when FP is available in Thumb2 mode.
380bool
381ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
382 return hasReservedCallFrame(MF) || MF.getFrameInfo().hasVarSizedObjects();
383}
384
385// Returns how much of the incoming argument stack area we should clean up in an
386// epilogue. For the C calling convention this will be 0, for guaranteed tail
387// call conventions it can be positive (a normal return or a tail call to a
388// function that uses less stack space for arguments) or negative (for a tail
389// call to a function that needs more stack space than us for arguments).
390static int getArgumentStackToRestore(MachineFunction &MF,
391 MachineBasicBlock &MBB) {
392 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
393 bool IsTailCallReturn = false;
394 if (MBB.end() != MBBI) {
395 unsigned RetOpcode = MBBI->getOpcode();
396 IsTailCallReturn = RetOpcode == ARM::TCRETURNdi ||
397 RetOpcode == ARM::TCRETURNri ||
398 RetOpcode == ARM::TCRETURNrinotr12;
399 }
400 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
401
402 int ArgumentPopSize = 0;
403 if (IsTailCallReturn) {
404 MachineOperand &StackAdjust = MBBI->getOperand(i: 1);
405
406 // For a tail-call in a callee-pops-arguments environment, some or all of
407 // the stack may actually be in use for the call's arguments, this is
408 // calculated during LowerCall and consumed here...
409 ArgumentPopSize = StackAdjust.getImm();
410 } else {
411 // ... otherwise the amount to pop is *all* of the argument space,
412 // conveniently stored in the MachineFunctionInfo by
413 // LowerFormalArguments. This will, of course, be zero for the C calling
414 // convention.
415 ArgumentPopSize = AFI->getArgumentStackToRestore();
416 }
417
418 return ArgumentPopSize;
419}
420
421static bool needsWinCFI(const MachineFunction &MF) {
422 const Function &F = MF.getFunction();
423 return MF.getTarget().getMCAsmInfo().usesWindowsCFI() &&
424 F.needsUnwindTableEntry();
425}
426
427// Given a load or a store instruction, generate an appropriate unwinding SEH
428// code on Windows.
429static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
430 const TargetInstrInfo &TII,
431 unsigned Flags) {
432 unsigned Opc = MBBI->getOpcode();
433 MachineBasicBlock *MBB = MBBI->getParent();
434 MachineFunction &MF = *MBB->getParent();
435 DebugLoc DL = MBBI->getDebugLoc();
436 MachineInstrBuilder MIB;
437 const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
438 const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
439
440 Flags |= MachineInstr::NoMerge;
441
442 switch (Opc) {
443 default:
444 report_fatal_error(reason: "No SEH Opcode for instruction " + TII.getName(Opcode: Opc));
445 break;
446 case ARM::t2ADDri: // add.w r11, sp, #xx
447 case ARM::t2ADDri12: // add.w r11, sp, #xx
448 case ARM::t2MOVTi16: // movt r4, #xx
449 case ARM::tBL: // bl __chkstk
450 // These are harmless if used for just setting up a frame pointer,
451 // but that frame pointer can't be relied upon for unwinding, unless
452 // set up with SEH_SaveSP.
453 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
454 .addImm(/*Wide=*/Val: 1)
455 .setMIFlags(Flags);
456 break;
457
458 case ARM::t2MOVi16: { // mov(w) r4, #xx
459 bool Wide = MBBI->getOperand(i: 1).getImm() >= 256;
460 if (!Wide) {
461 MachineInstrBuilder NewInstr =
462 BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVi8)).setMIFlags(MBBI->getFlags());
463 NewInstr.add(MO: MBBI->getOperand(i: 0));
464 NewInstr.add(MO: t1CondCodeOp(/*isDead=*/true));
465 for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MBBI->operands()))
466 NewInstr.add(MO);
467 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(I: MBBI, MI: NewInstr);
468 MBB->erase(I: MBBI);
469 MBBI = NewMBBI;
470 }
471 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop)).addImm(Val: Wide).setMIFlags(Flags);
472 break;
473 }
474
475 case ARM::tBLXr: // blx r12 (__chkstk)
476 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
477 .addImm(/*Wide=*/Val: 0)
478 .setMIFlags(Flags);
479 break;
480
481 case ARM::t2MOVi32imm: // movw+movt
482 // This pseudo instruction expands into two mov instructions. If the
483 // second operand is a symbol reference, this will stay as two wide
484 // instructions, movw+movt. If they're immediates, the first one can
485 // end up as a narrow mov though.
486 // As two SEH instructions are appended here, they won't get interleaved
487 // between the two final movw/movt instructions, but it doesn't make any
488 // practical difference.
489 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
490 .addImm(/*Wide=*/Val: 1)
491 .setMIFlags(Flags);
492 MBB->insertAfter(I: MBBI, MI: MIB);
493 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
494 .addImm(/*Wide=*/Val: 1)
495 .setMIFlags(Flags);
496 break;
497
498 case ARM::t2STR_PRE:
499 if (MBBI->getOperand(i: 0).getReg() == ARM::SP &&
500 MBBI->getOperand(i: 2).getReg() == ARM::SP &&
501 MBBI->getOperand(i: 3).getImm() == -4) {
502 unsigned Reg = RegInfo->getSEHRegNum(i: MBBI->getOperand(i: 1).getReg());
503 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveRegs))
504 .addImm(Val: 1ULL << Reg)
505 .addImm(/*Wide=*/Val: 1)
506 .setMIFlags(Flags);
507 } else {
508 report_fatal_error(reason: "No matching SEH Opcode for t2STR_PRE");
509 }
510 break;
511
512 case ARM::t2LDR_POST:
513 if (MBBI->getOperand(i: 1).getReg() == ARM::SP &&
514 MBBI->getOperand(i: 2).getReg() == ARM::SP &&
515 MBBI->getOperand(i: 3).getImm() == 4) {
516 unsigned Reg = RegInfo->getSEHRegNum(i: MBBI->getOperand(i: 0).getReg());
517 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveRegs))
518 .addImm(Val: 1ULL << Reg)
519 .addImm(/*Wide=*/Val: 1)
520 .setMIFlags(Flags);
521 } else {
522 report_fatal_error(reason: "No matching SEH Opcode for t2LDR_POST");
523 }
524 break;
525
526 case ARM::t2LDMIA_RET:
527 case ARM::t2LDMIA_UPD:
528 case ARM::t2STMDB_UPD: {
529 unsigned Mask = 0;
530 bool Wide = false;
531 for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
532 const MachineOperand &MO = MBBI->getOperand(i);
533 if (!MO.isReg() || MO.isImplicit())
534 continue;
535 unsigned Reg = RegInfo->getSEHRegNum(i: MO.getReg());
536 if (Reg == 15)
537 Reg = 14;
538 if (Reg >= 8 && Reg <= 13)
539 Wide = true;
540 else if (Opc == ARM::t2LDMIA_UPD && Reg == 14)
541 Wide = true;
542 Mask |= 1 << Reg;
543 }
544 if (!Wide) {
545 unsigned NewOpc;
546 switch (Opc) {
547 case ARM::t2LDMIA_RET:
548 NewOpc = ARM::tPOP_RET;
549 break;
550 case ARM::t2LDMIA_UPD:
551 NewOpc = ARM::tPOP;
552 break;
553 case ARM::t2STMDB_UPD:
554 NewOpc = ARM::tPUSH;
555 break;
556 default:
557 llvm_unreachable("");
558 }
559 MachineInstrBuilder NewInstr =
560 BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: NewOpc)).setMIFlags(MBBI->getFlags());
561 for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
562 NewInstr.add(MO: MBBI->getOperand(i));
563 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(I: MBBI, MI: NewInstr);
564 MBB->erase(I: MBBI);
565 MBBI = NewMBBI;
566 }
567 unsigned SEHOpc =
568 (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
569 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: SEHOpc))
570 .addImm(Val: Mask)
571 .addImm(Val: Wide ? 1 : 0)
572 .setMIFlags(Flags);
573 break;
574 }
575 case ARM::VSTMDDB_UPD:
576 case ARM::VLDMDIA_UPD: {
577 int First = -1, Last = 0;
578 for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MBBI->operands(), N: 4)) {
579 unsigned Reg = RegInfo->getSEHRegNum(i: MO.getReg());
580 if (First == -1)
581 First = Reg;
582 Last = Reg;
583 }
584 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveFRegs))
585 .addImm(Val: First)
586 .addImm(Val: Last)
587 .setMIFlags(Flags);
588 break;
589 }
590 case ARM::tSUBspi:
591 case ARM::tADDspi:
592 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_StackAlloc))
593 .addImm(Val: MBBI->getOperand(i: 2).getImm() * 4)
594 .addImm(/*Wide=*/Val: 0)
595 .setMIFlags(Flags);
596 break;
597 case ARM::t2SUBspImm:
598 case ARM::t2SUBspImm12:
599 case ARM::t2ADDspImm:
600 case ARM::t2ADDspImm12:
601 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_StackAlloc))
602 .addImm(Val: MBBI->getOperand(i: 2).getImm())
603 .addImm(/*Wide=*/Val: 1)
604 .setMIFlags(Flags);
605 break;
606
607 case ARM::tMOVr:
608 if (MBBI->getOperand(i: 1).getReg() == ARM::SP &&
609 (Flags & MachineInstr::FrameSetup)) {
610 unsigned Reg = RegInfo->getSEHRegNum(i: MBBI->getOperand(i: 0).getReg());
611 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveSP))
612 .addImm(Val: Reg)
613 .setMIFlags(Flags);
614 } else if (MBBI->getOperand(i: 0).getReg() == ARM::SP &&
615 (Flags & MachineInstr::FrameDestroy)) {
616 unsigned Reg = RegInfo->getSEHRegNum(i: MBBI->getOperand(i: 1).getReg());
617 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveSP))
618 .addImm(Val: Reg)
619 .setMIFlags(Flags);
620 } else {
621 report_fatal_error(reason: "No SEH Opcode for MOV");
622 }
623 break;
624
625 case ARM::tBX_RET:
626 case ARM::t2BXAUT_RET:
627 case ARM::CLEANUPRET:
628 case ARM::CATCHRET:
629 case ARM::TCRETURNri:
630 case ARM::TCRETURNrinotr12:
631 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop_Ret))
632 .addImm(/*Wide=*/Val: 0)
633 .setMIFlags(Flags);
634 break;
635
636 case ARM::TCRETURNdi:
637 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop_Ret))
638 .addImm(/*Wide=*/Val: 1)
639 .setMIFlags(Flags);
640 break;
641 }
642 return MBB->insertAfter(I: MBBI, MI: MIB);
643}
644
645static MachineBasicBlock::iterator
646initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI) {
647 if (MBBI == MBB.begin())
648 return MachineBasicBlock::iterator();
649 return std::prev(x: MBBI);
650}
651
652static void insertSEHRange(MachineBasicBlock &MBB,
653 MachineBasicBlock::iterator Start,
654 const MachineBasicBlock::iterator &End,
655 const ARMBaseInstrInfo &TII, unsigned MIFlags) {
656 if (Start.isValid())
657 Start = std::next(x: Start);
658 else
659 Start = MBB.begin();
660
661 for (auto MI = Start; MI != End;) {
662 auto Next = std::next(x: MI);
663 // Check if this instruction already has got a SEH opcode added. In that
664 // case, don't do this generic mapping.
665 if (Next != End && isSEHInstruction(MI: *Next)) {
666 MI = std::next(x: Next);
667 while (MI != End && isSEHInstruction(MI: *MI))
668 ++MI;
669 continue;
670 }
671 insertSEH(MBBI: MI, TII, Flags: MIFlags);
672 MI = Next;
673 }
674}
675
676static void emitRegPlusImmediate(
677 bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
678 const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
679 unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
680 ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
681 if (isARM)
682 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, BaseReg: SrcReg, NumBytes,
683 Pred, PredReg, TII, MIFlags);
684 else
685 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, BaseReg: SrcReg, NumBytes,
686 Pred, PredReg, TII, MIFlags);
687}
688
689static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
690 MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
691 const ARMBaseInstrInfo &TII, int NumBytes,
692 unsigned MIFlags = MachineInstr::NoFlags,
693 ARMCC::CondCodes Pred = ARMCC::AL,
694 unsigned PredReg = 0) {
695 emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, DestReg: ARM::SP, SrcReg: ARM::SP, NumBytes,
696 MIFlags, Pred, PredReg);
697}
698
699static int sizeOfSPAdjustment(const MachineInstr &MI) {
700 int RegSize;
701 switch (MI.getOpcode()) {
702 case ARM::VSTMDDB_UPD:
703 RegSize = 8;
704 break;
705 case ARM::STMDB_UPD:
706 case ARM::t2STMDB_UPD:
707 RegSize = 4;
708 break;
709 case ARM::t2STR_PRE:
710 case ARM::STR_PRE_IMM:
711 return 4;
712 default:
713 llvm_unreachable("Unknown push or pop like instruction");
714 }
715
716 int count = 0;
717 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
718 // pred) so the list starts at 4.
719 for (int i = MI.getNumOperands() - 1; i >= 4; --i)
720 count += RegSize;
721 return count;
722}
723
724static bool WindowsRequiresStackProbe(const MachineFunction &MF,
725 size_t StackSizeInBytes) {
726 const MachineFrameInfo &MFI = MF.getFrameInfo();
727 const Function &F = MF.getFunction();
728 unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
729
730 StackProbeSize =
731 F.getFnAttributeAsParsedInteger(Kind: "stack-probe-size", Default: StackProbeSize);
732 return (StackSizeInBytes >= StackProbeSize) &&
733 !F.hasFnAttribute(Kind: "no-stack-arg-probe");
734}
735
736namespace {
737
738struct StackAdjustingInsts {
739 struct InstInfo {
740 MachineBasicBlock::iterator I;
741 unsigned SPAdjust;
742 bool BeforeFPSet;
743
744#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
745 void dump() {
746 dbgs() << " " << (BeforeFPSet ? "before-fp " : " ")
747 << "sp-adjust=" << SPAdjust;
748 I->dump();
749 }
750#endif
751 };
752
753 SmallVector<InstInfo, 4> Insts;
754
755 void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
756 bool BeforeFPSet = false) {
757 InstInfo Info = {.I: I, .SPAdjust: SPAdjust, .BeforeFPSet: BeforeFPSet};
758 Insts.push_back(Elt: Info);
759 }
760
761 void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
762 auto Info =
763 llvm::find_if(Range&: Insts, P: [&](InstInfo &Info) { return Info.I == I; });
764 assert(Info != Insts.end() && "invalid sp adjusting instruction");
765 Info->SPAdjust += ExtraBytes;
766 }
767
768 void emitDefCFAOffsets(MachineBasicBlock &MBB, bool HasFP) {
769 CFIInstBuilder CFIBuilder(MBB, MBB.end(), MachineInstr::FrameSetup);
770 unsigned CFAOffset = 0;
771 for (auto &Info : Insts) {
772 if (HasFP && !Info.BeforeFPSet)
773 return;
774
775 CFAOffset += Info.SPAdjust;
776 CFIBuilder.setInsertPoint(std::next(x: Info.I));
777 CFIBuilder.buildDefCFAOffset(Offset: CFAOffset);
778 }
779 }
780
781#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
782 void dump() {
783 dbgs() << "StackAdjustingInsts:\n";
784 for (auto &Info : Insts)
785 Info.dump();
786 }
787#endif
788};
789
790} // end anonymous namespace
791
792/// Emit an instruction sequence that will align the address in
793/// register Reg by zero-ing out the lower bits. For versions of the
794/// architecture that support Neon, this must be done in a single
795/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
796/// single instruction. That function only gets called when optimizing
797/// spilling of D registers on a core with the Neon instruction set
798/// present.
799static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
800 const TargetInstrInfo &TII,
801 MachineBasicBlock &MBB,
802 MachineBasicBlock::iterator MBBI,
803 const DebugLoc &DL, const unsigned Reg,
804 const Align Alignment,
805 const bool MustBeSingleInstruction) {
806 const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
807 const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
808 const unsigned AlignMask = Alignment.value() - 1U;
809 const unsigned NrBitsToZero = Log2(A: Alignment);
810 assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
811 if (!AFI->isThumbFunction()) {
812 // if the BFC instruction is available, use that to zero the lower
813 // bits:
814 // bfc Reg, #0, log2(Alignment)
815 // otherwise use BIC, if the mask to zero the required number of bits
816 // can be encoded in the bic immediate field
817 // bic Reg, Reg, Alignment-1
818 // otherwise, emit
819 // lsr Reg, Reg, log2(Alignment)
820 // lsl Reg, Reg, log2(Alignment)
821 if (CanUseBFC) {
822 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::BFC), DestReg: Reg)
823 .addReg(RegNo: Reg, Flags: RegState::Kill)
824 .addImm(Val: ~AlignMask)
825 .add(MOs: predOps(Pred: ARMCC::AL));
826 } else if (AlignMask <= 255) {
827 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::BICri), DestReg: Reg)
828 .addReg(RegNo: Reg, Flags: RegState::Kill)
829 .addImm(Val: AlignMask)
830 .add(MOs: predOps(Pred: ARMCC::AL))
831 .add(MO: condCodeOp());
832 } else {
833 assert(!MustBeSingleInstruction &&
834 "Shouldn't call emitAligningInstructions demanding a single "
835 "instruction to be emitted for large stack alignment for a target "
836 "without BFC.");
837 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVsi), DestReg: Reg)
838 .addReg(RegNo: Reg, Flags: RegState::Kill)
839 .addImm(Val: ARM_AM::getSORegOpc(ShOp: ARM_AM::lsr, Imm: NrBitsToZero))
840 .add(MOs: predOps(Pred: ARMCC::AL))
841 .add(MO: condCodeOp());
842 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVsi), DestReg: Reg)
843 .addReg(RegNo: Reg, Flags: RegState::Kill)
844 .addImm(Val: ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: NrBitsToZero))
845 .add(MOs: predOps(Pred: ARMCC::AL))
846 .add(MO: condCodeOp());
847 }
848 } else {
849 // Since this is only reached for Thumb-2 targets, the BFC instruction
850 // should always be available.
851 assert(CanUseBFC);
852 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::t2BFC), DestReg: Reg)
853 .addReg(RegNo: Reg, Flags: RegState::Kill)
854 .addImm(Val: ~AlignMask)
855 .add(MOs: predOps(Pred: ARMCC::AL));
856 }
857}
858
859/// We need the offset of the frame pointer relative to other MachineFrameInfo
860/// offsets which are encoded relative to SP at function begin.
861/// See also emitPrologue() for how the FP is set up.
862/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
863/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
864/// this to produce a conservative estimate that we check in an assert() later.
865static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
866 const MachineFunction &MF) {
867 ARMSubtarget::PushPopSplitVariation PushPopSplit =
868 STI.getPushPopSplitVariation(MF);
869 // For Thumb1, push.w isn't available, so the first push will always push
870 // r7 and lr onto the stack first.
871 if (AFI.isThumb1OnlyFunction())
872 return -AFI.getArgRegsSaveSize() - (2 * 4);
873 // This is a conservative estimation: Assume the frame pointer being r7 and
874 // pc("r15") up to r8 getting spilled before (= 8 registers).
875 int MaxRegBytes = 8 * 4;
876 if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
877 // Here, r11 can be stored below all of r4-r15.
878 MaxRegBytes = 11 * 4;
879 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
880 // Here, r11 can be stored below all of r4-r15 plus d8-d15.
881 MaxRegBytes = 11 * 4 + 8 * 8;
882 }
883 int FPCXTSaveSize =
884 (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
885 return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
886}
887
888void ARMFrameLowering::emitPrologue(MachineFunction &MF,
889 MachineBasicBlock &MBB) const {
890 MachineBasicBlock::iterator MBBI = MBB.begin();
891 MachineFrameInfo &MFI = MF.getFrameInfo();
892 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
893 const TargetMachine &TM = MF.getTarget();
894 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
895 const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
896 assert(!AFI->isThumb1OnlyFunction() &&
897 "This emitPrologue does not support Thumb1!");
898 bool isARM = !AFI->isThumbFunction();
899 Align Alignment = STI.getFrameLowering()->getStackAlign();
900 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
901 unsigned NumBytes = MFI.getStackSize();
902 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
903 int FPCXTSaveSize = 0;
904 bool NeedsWinCFI = needsWinCFI(MF);
905 ARMSubtarget::PushPopSplitVariation PushPopSplit =
906 STI.getPushPopSplitVariation(MF);
907
908 LLVM_DEBUG(dbgs() << "Emitting prologue for " << MF.getName() << "\n");
909
910 // Debug location must be unknown since the first debug location is used
911 // to determine the end of the prologue.
912 DebugLoc dl;
913
914 Register FramePtr = RegInfo->getFrameRegister(MF);
915
916 // Determine the sizes of each callee-save spill areas and record which frame
917 // belongs to which callee-save spill areas.
918 unsigned GPRCS1Size = 0, GPRCS2Size = 0, FPStatusSize = 0,
919 DPRCS1Size = 0, GPRCS3Size = 0, DPRCS2Size = 0;
920 int FramePtrSpillFI = 0;
921 int D8SpillFI = 0;
922
923 // All calls are tail calls in GHC calling conv, and functions have no
924 // prologue/epilogue.
925 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
926 return;
927
928 StackAdjustingInsts DefCFAOffsetCandidates;
929 bool HasFP = hasFP(MF);
930
931 if (!AFI->hasStackFrame() &&
932 (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, StackSizeInBytes: NumBytes))) {
933 if (NumBytes != 0) {
934 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -NumBytes,
935 MIFlags: MachineInstr::FrameSetup);
936 DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: NumBytes, BeforeFPSet: true);
937 }
938 if (!NeedsWinCFI)
939 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, HasFP);
940 if (NeedsWinCFI && MBBI != MBB.begin()) {
941 insertSEHRange(MBB, Start: {}, End: MBBI, TII, MIFlags: MachineInstr::FrameSetup);
942 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_PrologEnd))
943 .setMIFlag(MachineInstr::FrameSetup);
944 MF.setHasWinCFI(true);
945 }
946 return;
947 }
948
949 // Determine spill area sizes, and some important frame indices.
950 SpillArea FramePtrSpillArea = SpillArea::GPRCS1;
951 bool BeforeFPPush = true;
952 for (const CalleeSavedInfo &I : CSI) {
953 MCRegister Reg = I.getReg();
954 int FI = I.getFrameIdx();
955
956 SpillArea Area = getSpillArea(Reg, Variation: PushPopSplit,
957 NumAlignedDPRCS2Regs: AFI->getNumAlignedDPRCS2Regs(), RegInfo);
958
959 if (Reg == FramePtr.asMCReg()) {
960 FramePtrSpillFI = FI;
961 FramePtrSpillArea = Area;
962 }
963 if (Reg == ARM::D8)
964 D8SpillFI = FI;
965
966 switch (Area) {
967 case SpillArea::FPCXT:
968 FPCXTSaveSize += 4;
969 break;
970 case SpillArea::GPRCS1:
971 GPRCS1Size += 4;
972 break;
973 case SpillArea::GPRCS2:
974 GPRCS2Size += 4;
975 break;
976 case SpillArea::FPStatus:
977 FPStatusSize += 4;
978 break;
979 case SpillArea::DPRCS1:
980 DPRCS1Size += 8;
981 break;
982 case SpillArea::GPRCS3:
983 GPRCS3Size += 4;
984 break;
985 case SpillArea::DPRCS2:
986 DPRCS2Size += 8;
987 break;
988 }
989 }
990
991 MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push,
992 DPRCS1Push, GPRCS3Push;
993
994 // Move past the PAC computation.
995 if (AFI->shouldSignReturnAddress())
996 LastPush = MBBI++;
997
998 // Move past FPCXT area.
999 if (FPCXTSaveSize > 0) {
1000 LastPush = MBBI++;
1001 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: FPCXTSaveSize, BeforeFPSet: BeforeFPPush);
1002 }
1003
1004 // Allocate the vararg register save area.
1005 if (ArgRegsSaveSize) {
1006 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -ArgRegsSaveSize,
1007 MIFlags: MachineInstr::FrameSetup);
1008 LastPush = std::prev(x: MBBI);
1009 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: ArgRegsSaveSize, BeforeFPSet: BeforeFPPush);
1010 }
1011
1012 // Move past area 1.
1013 if (GPRCS1Size > 0) {
1014 GPRCS1Push = LastPush = MBBI++;
1015 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS1Size, BeforeFPSet: BeforeFPPush);
1016 if (FramePtrSpillArea == SpillArea::GPRCS1)
1017 BeforeFPPush = false;
1018 }
1019
1020 // Determine starting offsets of spill areas. These offsets are all positive
1021 // offsets from the bottom of the lowest-addressed callee-save area
1022 // (excluding DPRCS2, which is th the re-aligned stack region) to the bottom
1023 // of the spill area in question.
1024 unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
1025 unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
1026 unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
1027 unsigned FPStatusOffset = GPRCS2Offset - FPStatusSize;
1028
1029 Align DPRAlign = DPRCS1Size ? std::min(a: Align(8), b: Alignment) : Align(4);
1030 unsigned DPRGapSize = (ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1031 GPRCS2Size + FPStatusSize) %
1032 DPRAlign.value();
1033
1034 unsigned DPRCS1Offset = FPStatusOffset - DPRGapSize - DPRCS1Size;
1035
1036 if (HasFP) {
1037 // Offset from the CFA to the saved frame pointer, will be negative.
1038 [[maybe_unused]] int FPOffset = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI);
1039 LLVM_DEBUG(dbgs() << "FramePtrSpillFI: " << FramePtrSpillFI
1040 << ", FPOffset: " << FPOffset << "\n");
1041 assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
1042 "Max FP estimation is wrong");
1043 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
1044 NumBytes);
1045 }
1046 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
1047 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
1048 AFI->setDPRCalleeSavedArea1Offset(DPRCS1Offset);
1049
1050 // Move past area 2.
1051 if (GPRCS2Size > 0) {
1052 assert(PushPopSplit != ARMSubtarget::SplitR11WindowsSEH);
1053 GPRCS2Push = LastPush = MBBI++;
1054 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS2Size, BeforeFPSet: BeforeFPPush);
1055 if (FramePtrSpillArea == SpillArea::GPRCS2)
1056 BeforeFPPush = false;
1057 }
1058
1059 // Move past FP status save area.
1060 if (FPStatusSize > 0) {
1061 while (MBBI != MBB.end()) {
1062 unsigned Opc = MBBI->getOpcode();
1063 if (Opc == ARM::VMRS || Opc == ARM::VMRS_FPEXC)
1064 MBBI++;
1065 else
1066 break;
1067 }
1068 LastPush = MBBI++;
1069 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: FPStatusSize);
1070 }
1071
1072 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
1073 // .cfi_offset operations will reflect that.
1074 if (DPRGapSize) {
1075 assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
1076 if (LastPush != MBB.end() &&
1077 tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*LastPush, NumBytes: DPRGapSize))
1078 DefCFAOffsetCandidates.addExtraBytes(I: LastPush, ExtraBytes: DPRGapSize);
1079 else {
1080 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -DPRGapSize,
1081 MIFlags: MachineInstr::FrameSetup);
1082 DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: DPRGapSize, BeforeFPSet: BeforeFPPush);
1083 }
1084 }
1085
1086 // Move past DPRCS1Size.
1087 if (DPRCS1Size > 0) {
1088 // Since vpush register list cannot have gaps, there may be multiple vpush
1089 // instructions in the prologue.
1090 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
1091 DefCFAOffsetCandidates.addInst(I: MBBI, SPAdjust: sizeOfSPAdjustment(MI: *MBBI),
1092 BeforeFPSet: BeforeFPPush);
1093 DPRCS1Push = LastPush = MBBI++;
1094 }
1095 }
1096
1097 // Move past the aligned DPRCS2 area.
1098 if (DPRCS2Size > 0) {
1099 MBBI = skipAlignedDPRCS2Spills(MI: MBBI, NumAlignedDPRCS2Regs: AFI->getNumAlignedDPRCS2Regs());
1100 // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
1101 // leaves the stack pointer pointing to the DPRCS2 area.
1102 //
1103 // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
1104 NumBytes += MFI.getObjectOffset(ObjectIdx: D8SpillFI);
1105 } else
1106 NumBytes = DPRCS1Offset;
1107
1108 // Move GPRCS3, if using using SplitR11WindowsSEH.
1109 if (GPRCS3Size > 0) {
1110 assert(PushPopSplit == ARMSubtarget::SplitR11WindowsSEH);
1111 GPRCS3Push = LastPush = MBBI++;
1112 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS3Size, BeforeFPSet: BeforeFPPush);
1113 if (FramePtrSpillArea == SpillArea::GPRCS3)
1114 BeforeFPPush = false;
1115 NumBytes -= GPRCS3Size;
1116 }
1117
1118 bool NeedsWinCFIStackAlloc = NeedsWinCFI;
1119 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH && HasFP)
1120 NeedsWinCFIStackAlloc = false;
1121
1122 if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, StackSizeInBytes: NumBytes)) {
1123 uint32_t NumWords = NumBytes >> 2;
1124
1125 if (NumWords < 65536) {
1126 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVi16), DestReg: ARM::R4)
1127 .addImm(Val: NumWords)
1128 .setMIFlags(MachineInstr::FrameSetup)
1129 .add(MOs: predOps(Pred: ARMCC::AL));
1130 } else {
1131 // Split into two instructions here, instead of using t2MOVi32imm,
1132 // to allow inserting accurate SEH instructions (including accurate
1133 // instruction size for each of them).
1134 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVi16), DestReg: ARM::R4)
1135 .addImm(Val: NumWords & 0xffff)
1136 .setMIFlags(MachineInstr::FrameSetup)
1137 .add(MOs: predOps(Pred: ARMCC::AL));
1138 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVTi16), DestReg: ARM::R4)
1139 .addReg(RegNo: ARM::R4)
1140 .addImm(Val: NumWords >> 16)
1141 .setMIFlags(MachineInstr::FrameSetup)
1142 .add(MOs: predOps(Pred: ARMCC::AL));
1143 }
1144
1145 const ARMTargetLowering *TLI = STI.getTargetLowering();
1146 RTLIB::LibcallImpl ChkStkLibcall = TLI->getLibcallImpl(Call: RTLIB::STACK_PROBE);
1147 if (ChkStkLibcall == RTLIB::Unsupported)
1148 reportFatalUsageError(reason: "no available implementation of __chkstk");
1149 const char *ChkStk = TLI->getLibcallImplName(Call: ChkStkLibcall).data();
1150
1151 switch (TM.getCodeModel()) {
1152 case CodeModel::Tiny:
1153 llvm_unreachable("Tiny code model not available on ARM.");
1154 case CodeModel::Small:
1155 case CodeModel::Medium:
1156 case CodeModel::Kernel:
1157 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tBL))
1158 .add(MOs: predOps(Pred: ARMCC::AL))
1159 .addExternalSymbol(FnName: ChkStk)
1160 .addReg(RegNo: ARM::R4, Flags: RegState::Implicit)
1161 .setMIFlags(MachineInstr::FrameSetup);
1162 break;
1163 case CodeModel::Large:
1164 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVi32imm), DestReg: ARM::R12)
1165 .addExternalSymbol(FnName: ChkStk)
1166 .setMIFlags(MachineInstr::FrameSetup);
1167
1168 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tBLXr))
1169 .add(MOs: predOps(Pred: ARMCC::AL))
1170 .addReg(RegNo: ARM::R12, Flags: RegState::Kill)
1171 .addReg(RegNo: ARM::R4, Flags: RegState::Implicit)
1172 .setMIFlags(MachineInstr::FrameSetup);
1173 break;
1174 }
1175
1176 MachineInstrBuilder Instr, SEH;
1177 Instr = BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2SUBrr), DestReg: ARM::SP)
1178 .addReg(RegNo: ARM::SP, Flags: RegState::Kill)
1179 .addReg(RegNo: ARM::R4, Flags: RegState::Kill)
1180 .setMIFlags(MachineInstr::FrameSetup)
1181 .add(MOs: predOps(Pred: ARMCC::AL))
1182 .add(MO: condCodeOp());
1183 if (NeedsWinCFIStackAlloc) {
1184 SEH = BuildMI(MF, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_StackAlloc))
1185 .addImm(Val: NumBytes)
1186 .addImm(/*Wide=*/Val: 1)
1187 .setMIFlags(MachineInstr::FrameSetup);
1188 MBB.insertAfter(I: Instr, MI: SEH);
1189 }
1190 NumBytes = 0;
1191 }
1192
1193 if (NumBytes) {
1194 // Adjust SP after all the callee-save spills.
1195 if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
1196 tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*LastPush, NumBytes))
1197 DefCFAOffsetCandidates.addExtraBytes(I: LastPush, ExtraBytes: NumBytes);
1198 else {
1199 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -NumBytes,
1200 MIFlags: MachineInstr::FrameSetup);
1201 DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: NumBytes);
1202 }
1203
1204 if (HasFP && isARM)
1205 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1206 // Note it's not safe to do this in Thumb2 mode because it would have
1207 // taken two instructions:
1208 // mov sp, r7
1209 // sub sp, #24
1210 // If an interrupt is taken between the two instructions, then sp is in
1211 // an inconsistent state (pointing to the middle of callee-saved area).
1212 // The interrupt handler can end up clobbering the registers.
1213 AFI->setShouldRestoreSPFromFP(true);
1214 }
1215
1216 // Set FP to point to the stack slot that contains the previous FP.
1217 // For iOS, FP is R7, which has now been stored in spill area 1.
1218 // Otherwise, if this is not iOS, all the callee-saved registers go
1219 // into spill area 1, including the FP in R11. In either case, it
1220 // is in area one and the adjustment needs to take place just after
1221 // that push.
1222 MachineBasicBlock::iterator AfterPush;
1223 if (HasFP) {
1224 MachineBasicBlock::iterator FPPushInst;
1225 // Offset from SP immediately after the push which saved the FP to the FP
1226 // save slot.
1227 int64_t FPOffsetAfterPush;
1228 switch (FramePtrSpillArea) {
1229 case SpillArea::GPRCS1:
1230 FPPushInst = GPRCS1Push;
1231 FPOffsetAfterPush = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
1232 ArgRegsSaveSize + FPCXTSaveSize +
1233 sizeOfSPAdjustment(MI: *FPPushInst);
1234 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS1, offset "
1235 << FPOffsetAfterPush << " after that push\n");
1236 break;
1237 case SpillArea::GPRCS2:
1238 FPPushInst = GPRCS2Push;
1239 FPOffsetAfterPush = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
1240 ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1241 sizeOfSPAdjustment(MI: *FPPushInst);
1242 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS2, offset "
1243 << FPOffsetAfterPush << " after that push\n");
1244 break;
1245 case SpillArea::GPRCS3:
1246 FPPushInst = GPRCS3Push;
1247 FPOffsetAfterPush = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
1248 ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1249 FPStatusSize + GPRCS2Size + DPRCS1Size + DPRGapSize +
1250 sizeOfSPAdjustment(MI: *FPPushInst);
1251 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS3, offset "
1252 << FPOffsetAfterPush << " after that push\n");
1253 break;
1254 default:
1255 llvm_unreachable("frame pointer in unknown spill area");
1256 break;
1257 }
1258 AfterPush = std::next(x: FPPushInst);
1259 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1260 assert(FPOffsetAfterPush == 0);
1261
1262 // Emit the MOV or ADD to set up the frame pointer register.
1263 emitRegPlusImmediate(isARM: !AFI->isThumbFunction(), MBB, MBBI&: AfterPush, dl, TII,
1264 DestReg: FramePtr, SrcReg: ARM::SP, NumBytes: FPOffsetAfterPush,
1265 MIFlags: MachineInstr::FrameSetup);
1266
1267 if (!NeedsWinCFI) {
1268 // Emit DWARF info to find the CFA using the frame pointer from this
1269 // point onward.
1270 CFIInstBuilder CFIBuilder(MBB, AfterPush, MachineInstr::FrameSetup);
1271 if (FPOffsetAfterPush != 0)
1272 CFIBuilder.buildDefCFA(Reg: FramePtr, Offset: -MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI));
1273 else
1274 CFIBuilder.buildDefCFARegister(Reg: FramePtr);
1275 }
1276 }
1277
1278 // Emit a SEH opcode indicating the prologue end. The rest of the prologue
1279 // instructions below don't need to be replayed to unwind the stack.
1280 if (NeedsWinCFI && MBBI != MBB.begin()) {
1281 MachineBasicBlock::iterator End = MBBI;
1282 if (HasFP && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1283 End = AfterPush;
1284 insertSEHRange(MBB, Start: {}, End, TII, MIFlags: MachineInstr::FrameSetup);
1285 BuildMI(BB&: MBB, I: End, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_PrologEnd))
1286 .setMIFlag(MachineInstr::FrameSetup);
1287 MF.setHasWinCFI(true);
1288 }
1289
1290 // Now that the prologue's actual instructions are finalised, we can insert
1291 // the necessary DWARF cf instructions to describe the situation. Start by
1292 // recording where each register ended up:
1293 if (!NeedsWinCFI) {
1294 for (const auto &Entry : reverse(C: CSI)) {
1295 MCRegister Reg = Entry.getReg();
1296 int FI = Entry.getFrameIdx();
1297 MachineBasicBlock::iterator CFIPos;
1298 switch (getSpillArea(Reg, Variation: PushPopSplit, NumAlignedDPRCS2Regs: AFI->getNumAlignedDPRCS2Regs(),
1299 RegInfo)) {
1300 case SpillArea::GPRCS1:
1301 CFIPos = std::next(x: GPRCS1Push);
1302 break;
1303 case SpillArea::GPRCS2:
1304 CFIPos = std::next(x: GPRCS2Push);
1305 break;
1306 case SpillArea::DPRCS1:
1307 CFIPos = std::next(x: DPRCS1Push);
1308 break;
1309 case SpillArea::GPRCS3:
1310 CFIPos = std::next(x: GPRCS3Push);
1311 break;
1312 case SpillArea::FPStatus:
1313 case SpillArea::FPCXT:
1314 case SpillArea::DPRCS2:
1315 // FPCXT and DPRCS2 are not represented in the DWARF info.
1316 break;
1317 }
1318
1319 if (CFIPos.isValid()) {
1320 CFIInstBuilder(MBB, CFIPos, MachineInstr::FrameSetup)
1321 .buildOffset(Reg: Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg,
1322 Offset: MFI.getObjectOffset(ObjectIdx: FI));
1323 }
1324 }
1325 }
1326
1327 // Now we can emit descriptions of where the canonical frame address was
1328 // throughout the process. If we have a frame pointer, it takes over the job
1329 // half-way through, so only the first few .cfi_def_cfa_offset instructions
1330 // actually get emitted.
1331 if (!NeedsWinCFI) {
1332 LLVM_DEBUG(DefCFAOffsetCandidates.dump());
1333 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, HasFP);
1334 }
1335
1336 if (STI.isTargetELF() && hasFP(MF))
1337 MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
1338 AFI->getFramePtrSpillOffset());
1339
1340 AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1341 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1342 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1343 AFI->setFPStatusSavesSize(FPStatusSize);
1344 AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1345 AFI->setDPRCalleeSavedArea1Size(DPRCS1Size);
1346 AFI->setGPRCalleeSavedArea3Size(GPRCS3Size);
1347
1348 // If we need dynamic stack realignment, do it here. Be paranoid and make
1349 // sure if we also have VLAs, we have a base pointer for frame access.
1350 // If aligned NEON registers were spilled, the stack has already been
1351 // realigned.
1352 if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1353 Align MaxAlign = MFI.getMaxAlign();
1354 assert(!AFI->isThumb1OnlyFunction());
1355 if (!AFI->isThumbFunction()) {
1356 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, DL: dl, Reg: ARM::SP, Alignment: MaxAlign,
1357 MustBeSingleInstruction: false);
1358 } else {
1359 // We cannot use sp as source/dest register here, thus we're using r4 to
1360 // perform the calculations. We're emitting the following sequence:
1361 // mov r4, sp
1362 // -- use emitAligningInstructions to produce best sequence to zero
1363 // -- out lower bits in r4
1364 // mov sp, r4
1365 // FIXME: It will be better just to find spare register here.
1366 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::R4)
1367 .addReg(RegNo: ARM::SP, Flags: RegState::Kill)
1368 .add(MOs: predOps(Pred: ARMCC::AL));
1369 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, DL: dl, Reg: ARM::R4, Alignment: MaxAlign,
1370 MustBeSingleInstruction: false);
1371 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP)
1372 .addReg(RegNo: ARM::R4, Flags: RegState::Kill)
1373 .add(MOs: predOps(Pred: ARMCC::AL));
1374 }
1375
1376 AFI->setShouldRestoreSPFromFP(true);
1377 }
1378
1379 // If we need a base pointer, set it up here. It's whatever the value
1380 // of the stack pointer is at this point. Any variable size objects
1381 // will be allocated after this, so we can still use the base pointer
1382 // to reference locals.
1383 // FIXME: Clarify FrameSetup flags here.
1384 if (RegInfo->hasBasePointer(MF) && !MBB.isEHFuncletEntry()) {
1385 if (isARM)
1386 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::MOVr), DestReg: RegInfo->getBaseRegister())
1387 .addReg(RegNo: ARM::SP)
1388 .add(MOs: predOps(Pred: ARMCC::AL))
1389 .add(MO: condCodeOp());
1390 else
1391 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: RegInfo->getBaseRegister())
1392 .addReg(RegNo: ARM::SP)
1393 .add(MOs: predOps(Pred: ARMCC::AL));
1394 }
1395
1396 // If the frame has variable sized objects then the epilogue must restore
1397 // the sp from fp. We can assume there's an FP here since hasFP already
1398 // checks for hasVarSizedObjects.
1399 if (MFI.hasVarSizedObjects())
1400 AFI->setShouldRestoreSPFromFP(true);
1401}
1402
1403void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
1404 MachineBasicBlock &MBB) const {
1405 MachineFrameInfo &MFI = MF.getFrameInfo();
1406 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1407 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1408 const ARMBaseInstrInfo &TII =
1409 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1410 assert(!AFI->isThumb1OnlyFunction() &&
1411 "This emitEpilogue does not support Thumb1!");
1412 bool isARM = !AFI->isThumbFunction();
1413 ARMSubtarget::PushPopSplitVariation PushPopSplit =
1414 STI.getPushPopSplitVariation(MF);
1415
1416 LLVM_DEBUG(dbgs() << "Emitting epilogue for " << MF.getName() << "\n");
1417
1418 // Amount of stack space we reserved next to incoming args for either
1419 // varargs registers or stack arguments in tail calls made by this function.
1420 unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1421
1422 // How much of the stack used by incoming arguments this function is expected
1423 // to restore in this particular epilogue.
1424 int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1425 int NumBytes = (int)MFI.getStackSize();
1426 Register FramePtr = RegInfo->getFrameRegister(MF);
1427
1428 // All calls are tail calls in GHC calling conv, and functions have no
1429 // prologue/epilogue.
1430 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1431 return;
1432
1433 // First put ourselves on the first (from top) terminator instructions.
1434 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1435 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
1436
1437 MachineBasicBlock::iterator RangeStart;
1438 if (!AFI->hasStackFrame()) {
1439 if (MF.hasWinCFI()) {
1440 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_EpilogStart))
1441 .setMIFlag(MachineInstr::FrameDestroy);
1442 RangeStart = initMBBRange(MBB, MBBI);
1443 }
1444
1445 if (NumBytes + IncomingArgStackToRestore != 0)
1446 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1447 NumBytes: NumBytes + IncomingArgStackToRestore,
1448 MIFlags: MachineInstr::FrameDestroy);
1449 } else {
1450 // Unwind MBBI to point to first LDR / VLDRD.
1451 if (MBBI != MBB.begin()) {
1452 do {
1453 --MBBI;
1454 } while (MBBI != MBB.begin() &&
1455 MBBI->getFlag(Flag: MachineInstr::FrameDestroy));
1456 if (!MBBI->getFlag(Flag: MachineInstr::FrameDestroy))
1457 ++MBBI;
1458 }
1459
1460 if (MF.hasWinCFI()) {
1461 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_EpilogStart))
1462 .setMIFlag(MachineInstr::FrameDestroy);
1463 RangeStart = initMBBRange(MBB, MBBI);
1464 }
1465
1466 // Move SP to start of FP callee save spill area.
1467 NumBytes -=
1468 (ReservedArgStack + AFI->getFPCXTSaveAreaSize() +
1469 AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() +
1470 AFI->getFPStatusSavesSize() + AFI->getDPRCalleeSavedGapSize() +
1471 AFI->getDPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea3Size());
1472
1473 // Reset SP based on frame pointer only if the stack frame extends beyond
1474 // frame pointer stack slot or target is ELF and the function has FP.
1475 if (AFI->shouldRestoreSPFromFP()) {
1476 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1477 if (NumBytes) {
1478 if (isARM)
1479 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg: ARM::SP, BaseReg: FramePtr, NumBytes: -NumBytes,
1480 Pred: ARMCC::AL, PredReg: 0, TII,
1481 MIFlags: MachineInstr::FrameDestroy);
1482 else {
1483 // It's not possible to restore SP from FP in a single instruction.
1484 // For iOS, this looks like:
1485 // mov sp, r7
1486 // sub sp, #24
1487 // This is bad, if an interrupt is taken after the mov, sp is in an
1488 // inconsistent state.
1489 // Use the first callee-saved register as a scratch register.
1490 assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1491 "No scratch register to restore SP from FP!");
1492 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg: ARM::R4, BaseReg: FramePtr, NumBytes: -NumBytes,
1493 Pred: ARMCC::AL, PredReg: 0, TII, MIFlags: MachineInstr::FrameDestroy);
1494 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP)
1495 .addReg(RegNo: ARM::R4)
1496 .add(MOs: predOps(Pred: ARMCC::AL))
1497 .setMIFlag(MachineInstr::FrameDestroy);
1498 }
1499 } else {
1500 // Thumb2 or ARM.
1501 if (isARM)
1502 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::MOVr), DestReg: ARM::SP)
1503 .addReg(RegNo: FramePtr)
1504 .add(MOs: predOps(Pred: ARMCC::AL))
1505 .add(MO: condCodeOp())
1506 .setMIFlag(MachineInstr::FrameDestroy);
1507 else
1508 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP)
1509 .addReg(RegNo: FramePtr)
1510 .add(MOs: predOps(Pred: ARMCC::AL))
1511 .setMIFlag(MachineInstr::FrameDestroy);
1512 }
1513 } else if (NumBytes &&
1514 !tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*MBBI, NumBytes))
1515 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1516 MIFlags: MachineInstr::FrameDestroy);
1517
1518 // Increment past our save areas.
1519 if (AFI->getGPRCalleeSavedArea3Size()) {
1520 assert(PushPopSplit == ARMSubtarget::SplitR11WindowsSEH);
1521 (void)PushPopSplit;
1522 MBBI++;
1523 }
1524
1525 if (MBBI != MBB.end() && AFI->getDPRCalleeSavedArea1Size()) {
1526 MBBI++;
1527 // Since vpop register list cannot have gaps, there may be multiple vpop
1528 // instructions in the epilogue.
1529 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
1530 MBBI++;
1531 }
1532 if (AFI->getDPRCalleeSavedGapSize()) {
1533 assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
1534 "unexpected DPR alignment gap");
1535 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: AFI->getDPRCalleeSavedGapSize(),
1536 MIFlags: MachineInstr::FrameDestroy);
1537 }
1538
1539 if (AFI->getGPRCalleeSavedArea2Size()) {
1540 assert(PushPopSplit != ARMSubtarget::SplitR11WindowsSEH);
1541 (void)PushPopSplit;
1542 MBBI++;
1543 }
1544 if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
1545
1546 if (ReservedArgStack || IncomingArgStackToRestore) {
1547 assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&
1548 "attempting to restore negative stack amount");
1549 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1550 NumBytes: ReservedArgStack + IncomingArgStackToRestore,
1551 MIFlags: MachineInstr::FrameDestroy);
1552 }
1553
1554 // Validate PAC, It should have been already popped into R12. For CMSE entry
1555 // function, the validation instruction is emitted during expansion of the
1556 // tBXNS_RET, since the validation must use the value of SP at function
1557 // entry, before saving, resp. after restoring, FPCXTNS.
1558 if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction()) {
1559 bool CanUseBXAut =
1560 STI.isThumb() && STI.hasV8_1MMainlineOps() && STI.hasPACBTI();
1561 auto TMBBI = MBB.getFirstTerminator();
1562 bool IsBXReturn =
1563 TMBBI != MBB.end() && TMBBI->getOpcode() == ARM::tBX_RET;
1564 if (IsBXReturn && CanUseBXAut)
1565 TMBBI->setDesc(STI.getInstrInfo()->get(Opcode: ARM::t2BXAUT_RET));
1566 else
1567 BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc(), MCID: STI.getInstrInfo()->get(Opcode: ARM::t2AUT));
1568 }
1569 }
1570
1571 if (MF.hasWinCFI()) {
1572 insertSEHRange(MBB, Start: RangeStart, End: MBB.end(), TII, MIFlags: MachineInstr::FrameDestroy);
1573 BuildMI(BB&: MBB, I: MBB.end(), MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_EpilogEnd))
1574 .setMIFlag(MachineInstr::FrameDestroy);
1575 }
1576}
1577
1578/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1579/// debug info. It's the same as what we use for resolving the code-gen
1580/// references for now. FIXME: This can go wrong when references are
1581/// SP-relative and simple call frames aren't used.
1582StackOffset ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1583 int FI,
1584 Register &FrameReg) const {
1585 return StackOffset::getFixed(Fixed: ResolveFrameIndexReference(MF, FI, FrameReg, SPAdj: 0));
1586}
1587
1588StackOffset
1589ARMFrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF,
1590 int FI) const {
1591 const MachineFrameInfo &MFI = MF.getFrameInfo();
1592 int Offset = MFI.getObjectOffset(ObjectIdx: FI) + MFI.getStackSize();
1593 return StackOffset::getFixed(Fixed: Offset);
1594}
1595
1596int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
1597 int FI, Register &FrameReg,
1598 int SPAdj) const {
1599 const MachineFrameInfo &MFI = MF.getFrameInfo();
1600 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1601 MF.getSubtarget().getRegisterInfo());
1602 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1603 int Offset = MFI.getObjectOffset(ObjectIdx: FI) + MFI.getStackSize();
1604 int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1605 bool isFixed = MFI.isFixedObjectIndex(ObjectIdx: FI);
1606
1607 FrameReg = ARM::SP;
1608 Offset += SPAdj;
1609
1610 // SP can move around if there are allocas. We may also lose track of SP
1611 // when emergency spilling inside a non-reserved call frame setup.
1612 bool hasMovingSP = !hasReservedCallFrame(MF);
1613
1614 // When dynamically realigning the stack, use the frame pointer for
1615 // parameters, and the stack/base pointer for locals.
1616 if (RegInfo->hasStackRealignment(MF)) {
1617 assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1618 if (isFixed) {
1619 FrameReg = RegInfo->getFrameRegister(MF);
1620 Offset = FPOffset;
1621 } else if (hasMovingSP) {
1622 assert(RegInfo->hasBasePointer(MF) &&
1623 "VLAs and dynamic stack alignment, but missing base pointer!");
1624 FrameReg = RegInfo->getBaseRegister();
1625 Offset -= SPAdj;
1626 }
1627 return Offset;
1628 }
1629
1630 // If there is a frame pointer, use it when we can.
1631 if (hasFP(MF) && AFI->hasStackFrame()) {
1632 // Use frame pointer to reference fixed objects. Use it for locals if
1633 // there are VLAs (and thus the SP isn't reliable as a base).
1634 if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1635 FrameReg = RegInfo->getFrameRegister(MF);
1636 return FPOffset;
1637 } else if (hasMovingSP) {
1638 assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1639 if (AFI->isThumb2Function()) {
1640 // Try to use the frame pointer if we can, else use the base pointer
1641 // since it's available. This is handy for the emergency spill slot, in
1642 // particular.
1643 if (FPOffset >= -255 && FPOffset < 0) {
1644 FrameReg = RegInfo->getFrameRegister(MF);
1645 return FPOffset;
1646 }
1647 }
1648 } else if (AFI->isThumbFunction()) {
1649 // Prefer SP to base pointer, if the offset is suitably aligned and in
1650 // range as the effective range of the immediate offset is bigger when
1651 // basing off SP.
1652 // Use add <rd>, sp, #<imm8>
1653 // ldr <rd>, [sp, #<imm8>]
1654 if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
1655 return Offset;
1656 // In Thumb2 mode, the negative offset is very limited. Try to avoid
1657 // out of range references. ldr <rt>,[<rn>, #-<imm8>]
1658 if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
1659 FrameReg = RegInfo->getFrameRegister(MF);
1660 return FPOffset;
1661 }
1662 } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
1663 // Otherwise, use SP or FP, whichever is closer to the stack slot.
1664 FrameReg = RegInfo->getFrameRegister(MF);
1665 return FPOffset;
1666 }
1667 }
1668 // Use the base pointer if we have one.
1669 // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1670 // That can happen if we forced a base pointer for a large call frame.
1671 if (RegInfo->hasBasePointer(MF)) {
1672 FrameReg = RegInfo->getBaseRegister();
1673 Offset -= SPAdj;
1674 }
1675 return Offset;
1676}
1677
1678void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1679 MachineBasicBlock::iterator MI,
1680 ArrayRef<CalleeSavedInfo> CSI,
1681 unsigned StmOpc, unsigned StrOpc,
1682 bool NoGap,
1683 function_ref<bool(unsigned)> Func) const {
1684 MachineFunction &MF = *MBB.getParent();
1685 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1686 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1687
1688 DebugLoc DL;
1689
1690 using RegAndKill = std::pair<unsigned, bool>;
1691
1692 SmallVector<RegAndKill, 4> Regs;
1693 unsigned i = CSI.size();
1694 while (i != 0) {
1695 unsigned LastReg = 0;
1696 for (; i != 0; --i) {
1697 MCRegister Reg = CSI[i-1].getReg();
1698 if (!Func(Reg))
1699 continue;
1700
1701 const MachineRegisterInfo &MRI = MF.getRegInfo();
1702 bool isLiveIn = MRI.isLiveIn(Reg);
1703 if (!isLiveIn && !MRI.isReserved(PhysReg: Reg))
1704 MBB.addLiveIn(PhysReg: Reg);
1705 // If NoGap is true, push consecutive registers and then leave the rest
1706 // for other instructions. e.g.
1707 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1708 if (NoGap && LastReg && LastReg != Reg-1)
1709 break;
1710 LastReg = Reg;
1711 // Do not set a kill flag on values that are also marked as live-in. This
1712 // happens with the @llvm-returnaddress intrinsic and with arguments
1713 // passed in callee saved registers.
1714 // Omitting the kill flags is conservatively correct even if the live-in
1715 // is not used after all.
1716 Regs.push_back(Elt: std::make_pair(x&: Reg, /*isKill=*/y: !isLiveIn));
1717 }
1718
1719 if (Regs.empty())
1720 continue;
1721
1722 llvm::sort(C&: Regs, Comp: [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1723 return TRI.getEncodingValue(Reg: LHS.first) < TRI.getEncodingValue(Reg: RHS.first);
1724 });
1725
1726 if (Regs.size() > 1 || StrOpc== 0) {
1727 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: StmOpc), DestReg: ARM::SP)
1728 .addReg(RegNo: ARM::SP)
1729 .setMIFlags(MachineInstr::FrameSetup)
1730 .add(MOs: predOps(Pred: ARMCC::AL));
1731 for (const auto &[Reg, Kill] : Regs)
1732 MIB.addReg(RegNo: Reg, Flags: getKillRegState(B: Kill));
1733 } else if (Regs.size() == 1) {
1734 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: StrOpc), DestReg: ARM::SP)
1735 .addReg(RegNo: Regs[0].first, Flags: getKillRegState(B: Regs[0].second))
1736 .addReg(RegNo: ARM::SP)
1737 .setMIFlags(MachineInstr::FrameSetup)
1738 .addImm(Val: -4)
1739 .add(MOs: predOps(Pred: ARMCC::AL));
1740 }
1741 Regs.clear();
1742
1743 // Put any subsequent vpush instructions before this one: they will refer to
1744 // higher register numbers so need to be pushed first in order to preserve
1745 // monotonicity.
1746 if (MI != MBB.begin())
1747 --MI;
1748 }
1749}
1750
1751void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1752 MachineBasicBlock::iterator MI,
1753 MutableArrayRef<CalleeSavedInfo> CSI,
1754 unsigned LdmOpc, unsigned LdrOpc,
1755 bool isVarArg, bool NoGap,
1756 function_ref<bool(unsigned)> Func) const {
1757 MachineFunction &MF = *MBB.getParent();
1758 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1759 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1760 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1761 bool hasPAC = AFI->shouldSignReturnAddress();
1762 DebugLoc DL;
1763 bool isTailCall = false;
1764 bool isInterrupt = false;
1765 bool isTrap = false;
1766 bool isCmseEntry = false;
1767 ARMSubtarget::PushPopSplitVariation PushPopSplit =
1768 STI.getPushPopSplitVariation(MF);
1769 if (MBB.end() != MI) {
1770 DL = MI->getDebugLoc();
1771 unsigned RetOpcode = MI->getOpcode();
1772 isTailCall =
1773 (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri ||
1774 RetOpcode == ARM::TCRETURNrinotr12);
1775 isInterrupt =
1776 RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1777 isTrap = RetOpcode == ARM::TRAP || RetOpcode == ARM::tTRAP;
1778 isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
1779 }
1780
1781 SmallVector<unsigned, 4> Regs;
1782 unsigned i = CSI.size();
1783 while (i != 0) {
1784 unsigned LastReg = 0;
1785 bool DeleteRet = false;
1786 for (; i != 0; --i) {
1787 CalleeSavedInfo &Info = CSI[i-1];
1788 MCRegister Reg = Info.getReg();
1789 if (!Func(Reg))
1790 continue;
1791
1792 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1793 !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
1794 STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1795 (PushPopSplit != ARMSubtarget::SplitR11WindowsSEH &&
1796 PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) {
1797 Reg = ARM::PC;
1798 // Fold the return instruction into the LDM.
1799 DeleteRet = true;
1800 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1801 }
1802
1803 // If NoGap is true, pop consecutive registers and then leave the rest
1804 // for other instructions. e.g.
1805 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1806 if (NoGap && LastReg && LastReg != Reg-1)
1807 break;
1808
1809 LastReg = Reg;
1810 Regs.push_back(Elt: Reg);
1811 }
1812
1813 if (Regs.empty())
1814 continue;
1815
1816 llvm::sort(C&: Regs, Comp: [&](unsigned LHS, unsigned RHS) {
1817 return TRI.getEncodingValue(Reg: LHS) < TRI.getEncodingValue(Reg: RHS);
1818 });
1819
1820 if (Regs.size() > 1 || LdrOpc == 0) {
1821 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: LdmOpc), DestReg: ARM::SP)
1822 .addReg(RegNo: ARM::SP)
1823 .add(MOs: predOps(Pred: ARMCC::AL))
1824 .setMIFlags(MachineInstr::FrameDestroy);
1825 for (unsigned Reg : Regs)
1826 MIB.addReg(RegNo: Reg, Flags: getDefRegState(B: true));
1827 if (DeleteRet) {
1828 if (MI != MBB.end()) {
1829 MIB.copyImplicitOps(OtherMI: *MI);
1830 MI->eraseFromParent();
1831 }
1832 }
1833 MI = MIB;
1834 } else if (Regs.size() == 1) {
1835 // If we adjusted the reg to PC from LR above, switch it back here. We
1836 // only do that for LDM.
1837 if (Regs[0] == ARM::PC)
1838 Regs[0] = ARM::LR;
1839 MachineInstrBuilder MIB =
1840 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: LdrOpc), DestReg: Regs[0])
1841 .addReg(RegNo: ARM::SP, Flags: RegState::Define)
1842 .addReg(RegNo: ARM::SP)
1843 .setMIFlags(MachineInstr::FrameDestroy);
1844 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1845 // that refactoring is complete (eventually).
1846 if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1847 MIB.addReg(RegNo: 0);
1848 MIB.addImm(Val: ARM_AM::getAM2Opc(Opc: ARM_AM::add, Imm12: 4, SO: ARM_AM::no_shift));
1849 } else
1850 MIB.addImm(Val: 4);
1851 MIB.add(MOs: predOps(Pred: ARMCC::AL));
1852 }
1853 Regs.clear();
1854
1855 // Put any subsequent vpop instructions after this one: they will refer to
1856 // higher register numbers so need to be popped afterwards.
1857 if (MI != MBB.end())
1858 ++MI;
1859 }
1860}
1861
1862void ARMFrameLowering::emitFPStatusSaves(MachineBasicBlock &MBB,
1863 MachineBasicBlock::iterator MI,
1864 ArrayRef<CalleeSavedInfo> CSI,
1865 unsigned PushOpc) const {
1866 MachineFunction &MF = *MBB.getParent();
1867 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1868
1869 SmallVector<MCRegister> Regs;
1870 auto RegPresent = [&CSI](MCRegister Reg) {
1871 return llvm::any_of(Range&: CSI, P: [Reg](const CalleeSavedInfo &C) {
1872 return C.getReg() == Reg;
1873 });
1874 };
1875
1876 // If we need to save FPSCR, then we must move FPSCR into R4 with the VMRS
1877 // instruction.
1878 if (RegPresent(ARM::FPSCR)) {
1879 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: TII.get(Opcode: ARM::VMRS), DestReg: ARM::R4)
1880 .add(MOs: predOps(Pred: ARMCC::AL))
1881 .setMIFlags(MachineInstr::FrameSetup);
1882
1883 Regs.push_back(Elt: ARM::R4);
1884 }
1885
1886 // If we need to save FPEXC, then we must move FPEXC into R5 with the
1887 // VMRS_FPEXC instruction.
1888 if (RegPresent(ARM::FPEXC)) {
1889 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: TII.get(Opcode: ARM::VMRS_FPEXC), DestReg: ARM::R5)
1890 .add(MOs: predOps(Pred: ARMCC::AL))
1891 .setMIFlags(MachineInstr::FrameSetup);
1892
1893 Regs.push_back(Elt: ARM::R5);
1894 }
1895
1896 // If neither FPSCR and FPEXC are present, then do nothing.
1897 if (Regs.size() == 0)
1898 return;
1899
1900 // Push both R4 and R5 onto the stack, if present.
1901 MachineInstrBuilder MIB =
1902 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: TII.get(Opcode: PushOpc), DestReg: ARM::SP)
1903 .addReg(RegNo: ARM::SP)
1904 .add(MOs: predOps(Pred: ARMCC::AL))
1905 .setMIFlags(MachineInstr::FrameSetup);
1906
1907 for (Register Reg : Regs) {
1908 MIB.addReg(RegNo: Reg);
1909 }
1910}
1911
1912void ARMFrameLowering::emitFPStatusRestores(
1913 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1914 MutableArrayRef<CalleeSavedInfo> CSI, unsigned LdmOpc) const {
1915 MachineFunction &MF = *MBB.getParent();
1916 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1917
1918 auto RegPresent = [&CSI](MCRegister Reg) {
1919 return llvm::any_of(Range&: CSI, P: [Reg](const CalleeSavedInfo &C) {
1920 return C.getReg() == Reg;
1921 });
1922 };
1923
1924 // Do nothing if we don't need to restore any FP status registers.
1925 if (!RegPresent(ARM::FPSCR) && !RegPresent(ARM::FPEXC))
1926 return;
1927
1928 // Pop registers off of the stack.
1929 MachineInstrBuilder MIB =
1930 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: TII.get(Opcode: LdmOpc), DestReg: ARM::SP)
1931 .addReg(RegNo: ARM::SP)
1932 .add(MOs: predOps(Pred: ARMCC::AL))
1933 .setMIFlags(MachineInstr::FrameDestroy);
1934
1935 // If FPSCR was saved, it will be popped into R4.
1936 if (RegPresent(ARM::FPSCR)) {
1937 MIB.addReg(RegNo: ARM::R4, Flags: RegState::Define);
1938 }
1939
1940 // If FPEXC was saved, it will be popped into R5.
1941 if (RegPresent(ARM::FPEXC)) {
1942 MIB.addReg(RegNo: ARM::R5, Flags: RegState::Define);
1943 }
1944
1945 // Move the FPSCR value back into the register with the VMSR instruction.
1946 if (RegPresent(ARM::FPSCR)) {
1947 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: STI.getInstrInfo()->get(Opcode: ARM::VMSR))
1948 .addReg(RegNo: ARM::R4)
1949 .add(MOs: predOps(Pred: ARMCC::AL))
1950 .setMIFlags(MachineInstr::FrameDestroy);
1951 }
1952
1953 // Move the FPEXC value back into the register with the VMSR_FPEXC
1954 // instruction.
1955 if (RegPresent(ARM::FPEXC)) {
1956 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: STI.getInstrInfo()->get(Opcode: ARM::VMSR_FPEXC))
1957 .addReg(RegNo: ARM::R5)
1958 .add(MOs: predOps(Pred: ARMCC::AL))
1959 .setMIFlags(MachineInstr::FrameDestroy);
1960 }
1961}
1962
1963/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1964/// starting from d8. Also insert stack realignment code and leave the stack
1965/// pointer pointing to the d8 spill slot.
1966static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
1967 MachineBasicBlock::iterator MI,
1968 unsigned NumAlignedDPRCS2Regs,
1969 ArrayRef<CalleeSavedInfo> CSI,
1970 const TargetRegisterInfo *TRI) {
1971 MachineFunction &MF = *MBB.getParent();
1972 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1973 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1974 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1975 MachineFrameInfo &MFI = MF.getFrameInfo();
1976
1977 // Mark the D-register spill slots as properly aligned. Since MFI computes
1978 // stack slot layout backwards, this can actually mean that the d-reg stack
1979 // slot offsets can be wrong. The offset for d8 will always be correct.
1980 for (const CalleeSavedInfo &I : CSI) {
1981 unsigned DNum = I.getReg() - ARM::D8;
1982 if (DNum > NumAlignedDPRCS2Regs - 1)
1983 continue;
1984 int FI = I.getFrameIdx();
1985 // The even-numbered registers will be 16-byte aligned, the odd-numbered
1986 // registers will be 8-byte aligned.
1987 MFI.setObjectAlignment(ObjectIdx: FI, Alignment: DNum % 2 ? Align(8) : Align(16));
1988
1989 // The stack slot for D8 needs to be maximally aligned because this is
1990 // actually the point where we align the stack pointer. MachineFrameInfo
1991 // computes all offsets relative to the incoming stack pointer which is a
1992 // bit weird when realigning the stack. Any extra padding for this
1993 // over-alignment is not realized because the code inserted below adjusts
1994 // the stack pointer by numregs * 8 before aligning the stack pointer.
1995 if (DNum == 0)
1996 MFI.setObjectAlignment(ObjectIdx: FI, Alignment: MFI.getMaxAlign());
1997 }
1998
1999 // Move the stack pointer to the d8 spill slot, and align it at the same
2000 // time. Leave the stack slot address in the scratch register r4.
2001 //
2002 // sub r4, sp, #numregs * 8
2003 // bic r4, r4, #align - 1
2004 // mov sp, r4
2005 //
2006 bool isThumb = AFI->isThumbFunction();
2007 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
2008 AFI->setShouldRestoreSPFromFP(true);
2009
2010 // sub r4, sp, #numregs * 8
2011 // The immediate is <= 64, so it doesn't need any special encoding.
2012 unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
2013 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: ARM::R4)
2014 .addReg(RegNo: ARM::SP)
2015 .addImm(Val: 8 * NumAlignedDPRCS2Regs)
2016 .add(MOs: predOps(Pred: ARMCC::AL))
2017 .add(MO: condCodeOp());
2018
2019 Align MaxAlign = MF.getFrameInfo().getMaxAlign();
2020 // We must set parameter MustBeSingleInstruction to true, since
2021 // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
2022 // stack alignment. Luckily, this can always be done since all ARM
2023 // architecture versions that support Neon also support the BFC
2024 // instruction.
2025 emitAligningInstructions(MF, AFI, TII, MBB, MBBI: MI, DL, Reg: ARM::R4, Alignment: MaxAlign, MustBeSingleInstruction: true);
2026
2027 // mov sp, r4
2028 // The stack pointer must be adjusted before spilling anything, otherwise
2029 // the stack slots could be clobbered by an interrupt handler.
2030 // Leave r4 live, it is used below.
2031 Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
2032 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: ARM::SP)
2033 .addReg(RegNo: ARM::R4)
2034 .add(MOs: predOps(Pred: ARMCC::AL));
2035 if (!isThumb)
2036 MIB.add(MO: condCodeOp());
2037
2038 // Now spill NumAlignedDPRCS2Regs registers starting from d8.
2039 // r4 holds the stack slot address.
2040 unsigned NextReg = ARM::D8;
2041
2042 // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
2043 // The writeback is only needed when emitting two vst1.64 instructions.
2044 if (NumAlignedDPRCS2Regs >= 6) {
2045 MCRegister SupReg =
2046 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QQPRRegClass);
2047 MBB.addLiveIn(PhysReg: SupReg);
2048 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VST1d64Qwb_fixed), DestReg: ARM::R4)
2049 .addReg(RegNo: ARM::R4, Flags: RegState::Kill)
2050 .addImm(Val: 16)
2051 .addReg(RegNo: NextReg)
2052 .addReg(RegNo: SupReg, Flags: RegState::ImplicitKill)
2053 .add(MOs: predOps(Pred: ARMCC::AL));
2054 NextReg += 4;
2055 NumAlignedDPRCS2Regs -= 4;
2056 }
2057
2058 // We won't modify r4 beyond this point. It currently points to the next
2059 // register to be spilled.
2060 unsigned R4BaseReg = NextReg;
2061
2062 // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
2063 if (NumAlignedDPRCS2Regs >= 4) {
2064 MCRegister SupReg =
2065 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QQPRRegClass);
2066 MBB.addLiveIn(PhysReg: SupReg);
2067 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VST1d64Q))
2068 .addReg(RegNo: ARM::R4)
2069 .addImm(Val: 16)
2070 .addReg(RegNo: NextReg)
2071 .addReg(RegNo: SupReg, Flags: RegState::ImplicitKill)
2072 .add(MOs: predOps(Pred: ARMCC::AL));
2073 NextReg += 4;
2074 NumAlignedDPRCS2Regs -= 4;
2075 }
2076
2077 // 16-byte aligned vst1.64 with 2 d-regs.
2078 if (NumAlignedDPRCS2Regs >= 2) {
2079 MCRegister SupReg =
2080 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QPRRegClass);
2081 MBB.addLiveIn(PhysReg: SupReg);
2082 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VST1q64))
2083 .addReg(RegNo: ARM::R4)
2084 .addImm(Val: 16)
2085 .addReg(RegNo: SupReg)
2086 .add(MOs: predOps(Pred: ARMCC::AL));
2087 NextReg += 2;
2088 NumAlignedDPRCS2Regs -= 2;
2089 }
2090
2091 // Finally, use a vanilla vstr.64 for the odd last register.
2092 if (NumAlignedDPRCS2Regs) {
2093 MBB.addLiveIn(PhysReg: NextReg);
2094 // vstr.64 uses addrmode5 which has an offset scale of 4.
2095 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VSTRD))
2096 .addReg(RegNo: NextReg)
2097 .addReg(RegNo: ARM::R4)
2098 .addImm(Val: (NextReg - R4BaseReg) * 2)
2099 .add(MOs: predOps(Pred: ARMCC::AL));
2100 }
2101
2102 // The last spill instruction inserted should kill the scratch register r4.
2103 std::prev(x: MI)->addRegisterKilled(IncomingReg: ARM::R4, RegInfo: TRI);
2104}
2105
2106/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
2107/// iterator to the following instruction.
2108static MachineBasicBlock::iterator
2109skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
2110 unsigned NumAlignedDPRCS2Regs) {
2111 // sub r4, sp, #numregs * 8
2112 // bic r4, r4, #align - 1
2113 // mov sp, r4
2114 ++MI; ++MI; ++MI;
2115 assert(MI->mayStore() && "Expecting spill instruction");
2116
2117 // These switches all fall through.
2118 switch(NumAlignedDPRCS2Regs) {
2119 case 7:
2120 ++MI;
2121 assert(MI->mayStore() && "Expecting spill instruction");
2122 [[fallthrough]];
2123 default:
2124 ++MI;
2125 assert(MI->mayStore() && "Expecting spill instruction");
2126 [[fallthrough]];
2127 case 1:
2128 case 2:
2129 case 4:
2130 assert(MI->killsRegister(ARM::R4, /*TRI=*/nullptr) && "Missed kill flag");
2131 ++MI;
2132 }
2133 return MI;
2134}
2135
2136/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
2137/// starting from d8. These instructions are assumed to execute while the
2138/// stack is still aligned, unlike the code inserted by emitPopInst.
2139static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
2140 MachineBasicBlock::iterator MI,
2141 unsigned NumAlignedDPRCS2Regs,
2142 ArrayRef<CalleeSavedInfo> CSI,
2143 const TargetRegisterInfo *TRI) {
2144 MachineFunction &MF = *MBB.getParent();
2145 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2146 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
2147 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2148
2149 // Find the frame index assigned to d8.
2150 int D8SpillFI = 0;
2151 for (const CalleeSavedInfo &I : CSI)
2152 if (I.getReg() == ARM::D8) {
2153 D8SpillFI = I.getFrameIdx();
2154 break;
2155 }
2156
2157 // Materialize the address of the d8 spill slot into the scratch register r4.
2158 // This can be fairly complicated if the stack frame is large, so just use
2159 // the normal frame index elimination mechanism to do it. This code runs as
2160 // the initial part of the epilog where the stack and base pointers haven't
2161 // been changed yet.
2162 bool isThumb = AFI->isThumbFunction();
2163 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
2164
2165 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
2166 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: ARM::R4)
2167 .addFrameIndex(Idx: D8SpillFI)
2168 .addImm(Val: 0)
2169 .add(MOs: predOps(Pred: ARMCC::AL))
2170 .add(MO: condCodeOp());
2171
2172 // Now restore NumAlignedDPRCS2Regs registers starting from d8.
2173 unsigned NextReg = ARM::D8;
2174
2175 // 16-byte aligned vld1.64 with 4 d-regs and writeback.
2176 if (NumAlignedDPRCS2Regs >= 6) {
2177 MCRegister SupReg =
2178 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QQPRRegClass);
2179 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLD1d64Qwb_fixed), DestReg: NextReg)
2180 .addReg(RegNo: ARM::R4, Flags: RegState::Define)
2181 .addReg(RegNo: ARM::R4, Flags: RegState::Kill)
2182 .addImm(Val: 16)
2183 .addReg(RegNo: SupReg, Flags: RegState::ImplicitDefine)
2184 .add(MOs: predOps(Pred: ARMCC::AL));
2185 NextReg += 4;
2186 NumAlignedDPRCS2Regs -= 4;
2187 }
2188
2189 // We won't modify r4 beyond this point. It currently points to the next
2190 // register to be spilled.
2191 unsigned R4BaseReg = NextReg;
2192
2193 // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
2194 if (NumAlignedDPRCS2Regs >= 4) {
2195 MCRegister SupReg =
2196 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QQPRRegClass);
2197 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLD1d64Q), DestReg: NextReg)
2198 .addReg(RegNo: ARM::R4)
2199 .addImm(Val: 16)
2200 .addReg(RegNo: SupReg, Flags: RegState::ImplicitDefine)
2201 .add(MOs: predOps(Pred: ARMCC::AL));
2202 NextReg += 4;
2203 NumAlignedDPRCS2Regs -= 4;
2204 }
2205
2206 // 16-byte aligned vld1.64 with 2 d-regs.
2207 if (NumAlignedDPRCS2Regs >= 2) {
2208 MCRegister SupReg =
2209 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QPRRegClass);
2210 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLD1q64), DestReg: SupReg)
2211 .addReg(RegNo: ARM::R4)
2212 .addImm(Val: 16)
2213 .add(MOs: predOps(Pred: ARMCC::AL));
2214 NextReg += 2;
2215 NumAlignedDPRCS2Regs -= 2;
2216 }
2217
2218 // Finally, use a vanilla vldr.64 for the remaining odd register.
2219 if (NumAlignedDPRCS2Regs)
2220 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLDRD), DestReg: NextReg)
2221 .addReg(RegNo: ARM::R4)
2222 .addImm(Val: 2 * (NextReg - R4BaseReg))
2223 .add(MOs: predOps(Pred: ARMCC::AL));
2224
2225 // Last store kills r4.
2226 std::prev(x: MI)->addRegisterKilled(IncomingReg: ARM::R4, RegInfo: TRI);
2227}
2228
2229bool ARMFrameLowering::spillCalleeSavedRegisters(
2230 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2231 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2232 if (CSI.empty())
2233 return false;
2234
2235 MachineFunction &MF = *MBB.getParent();
2236 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2237 ARMSubtarget::PushPopSplitVariation PushPopSplit =
2238 STI.getPushPopSplitVariation(MF);
2239 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
2240
2241 unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
2242 unsigned PushOneOpc = AFI->isThumbFunction() ?
2243 ARM::t2STR_PRE : ARM::STR_PRE_IMM;
2244 unsigned FltOpc = ARM::VSTMDDB_UPD;
2245 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2246 // Compute PAC in R12.
2247 if (AFI->shouldSignReturnAddress()) {
2248 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: STI.getInstrInfo()->get(Opcode: ARM::t2PAC))
2249 .setMIFlags(MachineInstr::FrameSetup);
2250 }
2251 // Save the non-secure floating point context.
2252 if (llvm::any_of(Range&: CSI, P: [](const CalleeSavedInfo &C) {
2253 return C.getReg() == ARM::FPCXTNS;
2254 })) {
2255 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: STI.getInstrInfo()->get(Opcode: ARM::VSTR_FPCXTNS_pre),
2256 DestReg: ARM::SP)
2257 .addReg(RegNo: ARM::SP)
2258 .addImm(Val: -4)
2259 .add(MOs: predOps(Pred: ARMCC::AL));
2260 }
2261
2262 auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
2263 RegInfo](unsigned Reg, SpillArea TestArea) {
2264 return getSpillArea(Reg, Variation: PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
2265 TestArea;
2266 };
2267 auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
2268 return CheckRegArea(Reg, SpillArea::GPRCS1);
2269 };
2270 auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
2271 return CheckRegArea(Reg, SpillArea::GPRCS2);
2272 };
2273 auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
2274 return CheckRegArea(Reg, SpillArea::DPRCS1);
2275 };
2276 auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
2277 return CheckRegArea(Reg, SpillArea::GPRCS3);
2278 };
2279
2280 emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false, Func: IsGPRCS1);
2281 emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false, Func: IsGPRCS2);
2282 emitFPStatusSaves(MBB, MI, CSI, PushOpc);
2283 emitPushInst(MBB, MI, CSI, StmOpc: FltOpc, StrOpc: 0, NoGap: true, Func: IsDPRCS1);
2284 emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false, Func: IsGPRCS3);
2285
2286 // The code above does not insert spill code for the aligned DPRCS2 registers.
2287 // The stack realignment code will be inserted between the push instructions
2288 // and these spills.
2289 if (NumAlignedDPRCS2Regs)
2290 emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2291
2292 return true;
2293}
2294
2295bool ARMFrameLowering::restoreCalleeSavedRegisters(
2296 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2297 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2298 if (CSI.empty())
2299 return false;
2300
2301 MachineFunction &MF = *MBB.getParent();
2302 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2303 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
2304
2305 bool isVarArg = AFI->getArgRegsSaveSize() > 0;
2306 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2307 ARMSubtarget::PushPopSplitVariation PushPopSplit =
2308 STI.getPushPopSplitVariation(MF);
2309
2310 // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2311 // registers. Do that here instead.
2312 if (NumAlignedDPRCS2Regs)
2313 emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2314
2315 unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
2316 unsigned LdrOpc =
2317 AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
2318 unsigned FltOpc = ARM::VLDMDIA_UPD;
2319
2320 auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
2321 RegInfo](unsigned Reg, SpillArea TestArea) {
2322 return getSpillArea(Reg, Variation: PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
2323 TestArea;
2324 };
2325 auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
2326 return CheckRegArea(Reg, SpillArea::GPRCS1);
2327 };
2328 auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
2329 return CheckRegArea(Reg, SpillArea::GPRCS2);
2330 };
2331 auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
2332 return CheckRegArea(Reg, SpillArea::DPRCS1);
2333 };
2334 auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
2335 return CheckRegArea(Reg, SpillArea::GPRCS3);
2336 };
2337
2338 emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false, Func: IsGPRCS3);
2339 emitPopInst(MBB, MI, CSI, LdmOpc: FltOpc, LdrOpc: 0, isVarArg, NoGap: true, Func: IsDPRCS1);
2340 emitFPStatusRestores(MBB, MI, CSI, LdmOpc: PopOpc);
2341 emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false, Func: IsGPRCS2);
2342 emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false, Func: IsGPRCS1);
2343
2344 return true;
2345}
2346
2347// FIXME: Make generic?
2348static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF,
2349 const ARMBaseInstrInfo &TII) {
2350 unsigned FnSize = 0;
2351 for (auto &MBB : MF) {
2352 for (auto &MI : MBB)
2353 FnSize += TII.getInstSizeInBytes(MI);
2354 }
2355 if (MF.getJumpTableInfo())
2356 for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
2357 FnSize += Table.MBBs.size() * 4;
2358 FnSize += MF.getConstantPool()->getConstants().size() * 4;
2359 LLVM_DEBUG(dbgs() << "Estimated function size for " << MF.getName() << " = "
2360 << FnSize << " bytes\n");
2361 return FnSize;
2362}
2363
2364/// estimateRSStackSizeLimit - Look at each instruction that references stack
2365/// frames and return the stack size limit beyond which some of these
2366/// instructions will require a scratch register during their expansion later.
2367// FIXME: Move to TII?
2368static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
2369 const TargetFrameLowering *TFI,
2370 bool &HasNonSPFrameIndex) {
2371 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2372 const ARMBaseInstrInfo &TII =
2373 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2374 unsigned Limit = (1 << 12) - 1;
2375 for (auto &MBB : MF) {
2376 for (auto &MI : MBB) {
2377 if (MI.isDebugInstr())
2378 continue;
2379 if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE)
2380 continue;
2381 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
2382 if (!MI.getOperand(i).isFI())
2383 continue;
2384
2385 // When using ADDri to get the address of a stack object, 255 is the
2386 // largest offset guaranteed to fit in the immediate offset.
2387 if (MI.getOpcode() == ARM::ADDri) {
2388 Limit = std::min(a: Limit, b: (1U << 8) - 1);
2389 break;
2390 }
2391 // t2ADDri will not require an extra register, it can reuse the
2392 // destination.
2393 if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
2394 break;
2395
2396 const MCInstrDesc &MCID = MI.getDesc();
2397 const TargetRegisterClass *RegClass = TII.getRegClass(MCID, OpNum: i);
2398 if (RegClass && !RegClass->contains(Reg: ARM::SP))
2399 HasNonSPFrameIndex = true;
2400
2401 // Otherwise check the addressing mode.
2402 switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
2403 case ARMII::AddrMode_i12:
2404 case ARMII::AddrMode2:
2405 // Default 12 bit limit.
2406 break;
2407 case ARMII::AddrMode3:
2408 case ARMII::AddrModeT2_i8neg:
2409 Limit = std::min(a: Limit, b: (1U << 8) - 1);
2410 break;
2411 case ARMII::AddrMode5FP16:
2412 Limit = std::min(a: Limit, b: ((1U << 8) - 1) * 2);
2413 break;
2414 case ARMII::AddrMode5:
2415 case ARMII::AddrModeT2_i8s4:
2416 case ARMII::AddrModeT2_ldrex:
2417 Limit = std::min(a: Limit, b: ((1U << 8) - 1) * 4);
2418 break;
2419 case ARMII::AddrModeT2_i12:
2420 // i12 supports only positive offset so these will be converted to
2421 // i8 opcodes. See llvm::rewriteT2FrameIndex.
2422 if (TFI->hasFP(MF) && AFI->hasStackFrame())
2423 Limit = std::min(a: Limit, b: (1U << 8) - 1);
2424 break;
2425 case ARMII::AddrMode4:
2426 case ARMII::AddrMode6:
2427 // Addressing modes 4 & 6 (load/store) instructions can't encode an
2428 // immediate offset for stack references.
2429 return 0;
2430 case ARMII::AddrModeT2_i7:
2431 Limit = std::min(a: Limit, b: ((1U << 7) - 1) * 1);
2432 break;
2433 case ARMII::AddrModeT2_i7s2:
2434 Limit = std::min(a: Limit, b: ((1U << 7) - 1) * 2);
2435 break;
2436 case ARMII::AddrModeT2_i7s4:
2437 Limit = std::min(a: Limit, b: ((1U << 7) - 1) * 4);
2438 break;
2439 default:
2440 llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2441 }
2442 break; // At most one FI per instruction
2443 }
2444 }
2445 }
2446
2447 return Limit;
2448}
2449
2450// In functions that realign the stack, it can be an advantage to spill the
2451// callee-saved vector registers after realigning the stack. The vst1 and vld1
2452// instructions take alignment hints that can improve performance.
2453static void
2454checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
2455 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
2456 if (!SpillAlignedNEONRegs)
2457 return;
2458
2459 // Naked functions don't spill callee-saved registers.
2460 if (MF.getFunction().hasFnAttribute(Kind: Attribute::Naked))
2461 return;
2462
2463 // We are planning to use NEON instructions vst1 / vld1.
2464 if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2465 return;
2466
2467 // Don't bother if the default stack alignment is sufficiently high.
2468 if (MF.getSubtarget().getFrameLowering()->getStackAlign() >= Align(8))
2469 return;
2470
2471 // Aligned spills require stack realignment.
2472 if (!static_cast<const ARMBaseRegisterInfo *>(
2473 MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
2474 return;
2475
2476 // We always spill contiguous d-registers starting from d8. Count how many
2477 // needs spilling. The register allocator will almost always use the
2478 // callee-saved registers in order, but it can happen that there are holes in
2479 // the range. Registers above the hole will be spilled to the standard DPRCS
2480 // area.
2481 unsigned NumSpills = 0;
2482 for (; NumSpills < 8; ++NumSpills)
2483 if (!SavedRegs.test(Idx: ARM::D8 + NumSpills))
2484 break;
2485
2486 // Don't do this for just one d-register. It's not worth it.
2487 if (NumSpills < 2)
2488 return;
2489
2490 // Spill the first NumSpills D-registers after realigning the stack.
2491 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2492
2493 // A scratch register is required for the vst1 / vld1 instructions.
2494 SavedRegs.set(ARM::R4);
2495}
2496
2497bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2498 // For CMSE entry functions, we want to save the FPCXT_NS immediately
2499 // upon function entry (resp. restore it immediately before return)
2500 if (STI.hasV8_1MMainlineOps() &&
2501 MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction())
2502 return false;
2503
2504 // We are disabling shrinkwrapping for now when PAC is enabled, as
2505 // shrinkwrapping can cause clobbering of r12 when the PAC code is
2506 // generated. A follow-up patch will fix this in a more performant manner.
2507 if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(
2508 SpillsLR: true /* SpillsLR */))
2509 return false;
2510
2511 return true;
2512}
2513
2514bool ARMFrameLowering::requiresAAPCSFrameRecord(
2515 const MachineFunction &MF) const {
2516 const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
2517 return Subtarget.createAAPCSFrameChain() && hasFP(MF);
2518}
2519
2520// Thumb1 may require a spill when storing to a frame index through FP (or any
2521// access with execute-only), for cases where FP is a high register (R11). This
2522// scans the function for cases where this may happen.
2523static bool canSpillOnFrameIndexAccess(const MachineFunction &MF,
2524 const TargetFrameLowering &TFI) {
2525 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2526 if (!AFI->isThumb1OnlyFunction())
2527 return false;
2528
2529 const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
2530 for (const auto &MBB : MF)
2531 for (const auto &MI : MBB)
2532 if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi ||
2533 STI.genExecuteOnly())
2534 for (const auto &Op : MI.operands())
2535 if (Op.isFI()) {
2536 Register Reg;
2537 TFI.getFrameIndexReference(MF, FI: Op.getIndex(), FrameReg&: Reg);
2538 if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
2539 return true;
2540 }
2541 return false;
2542}
2543
2544void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
2545 BitVector &SavedRegs,
2546 RegScavenger *RS) const {
2547 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2548 // This tells PEI to spill the FP as if it is any other callee-save register
2549 // to take advantage the eliminateFrameIndex machinery. This also ensures it
2550 // is spilled in the order specified by getCalleeSavedRegs() to make it easier
2551 // to combine multiple loads / stores.
2552 bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF)) &&
2553 !MF.getTarget().Options.DisableFramePointerElim(MF);
2554 bool CS1Spilled = false;
2555 bool LRSpilled = false;
2556 unsigned NumGPRSpills = 0;
2557 unsigned NumFPRSpills = 0;
2558 SmallVector<unsigned, 4> UnspilledCS1GPRs;
2559 SmallVector<unsigned, 4> UnspilledCS2GPRs;
2560 const Function &F = MF.getFunction();
2561 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
2562 MF.getSubtarget().getRegisterInfo());
2563 const ARMBaseInstrInfo &TII =
2564 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2565 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2566 MachineFrameInfo &MFI = MF.getFrameInfo();
2567 MachineRegisterInfo &MRI = MF.getRegInfo();
2568 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2569 (void)TRI; // Silence unused warning in non-assert builds.
2570 Register FramePtr = STI.getFramePointerReg();
2571 ARMSubtarget::PushPopSplitVariation PushPopSplit =
2572 STI.getPushPopSplitVariation(MF);
2573
2574 // For a floating point interrupt, save these registers always, since LLVM
2575 // currently doesn't model reads/writes to these registers.
2576 if (F.hasFnAttribute(Kind: "interrupt") && F.hasFnAttribute(Kind: "save-fp")) {
2577 SavedRegs.set(ARM::FPSCR);
2578 SavedRegs.set(ARM::R4);
2579
2580 // This register will only be present on non-MClass registers.
2581 if (STI.isMClass()) {
2582 SavedRegs.reset(Idx: ARM::FPEXC);
2583 } else {
2584 SavedRegs.set(ARM::FPEXC);
2585 SavedRegs.set(ARM::R5);
2586 }
2587 }
2588
2589 // Spill R4 if Thumb2 function requires stack realignment - it will be used as
2590 // scratch register. Also spill R4 if Thumb2 function has varsized objects,
2591 // since it's not always possible to restore sp from fp in a single
2592 // instruction.
2593 // FIXME: It will be better just to find spare register here.
2594 if (AFI->isThumb2Function() &&
2595 (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)))
2596 SavedRegs.set(ARM::R4);
2597
2598 // If a stack probe will be emitted, spill R4 and LR, since they are
2599 // clobbered by the stack probe call.
2600 // This estimate should be a safe, conservative estimate. The actual
2601 // stack probe is enabled based on the size of the local objects;
2602 // this estimate also includes the varargs store size.
2603 if (STI.isTargetWindows() &&
2604 WindowsRequiresStackProbe(MF, StackSizeInBytes: MFI.estimateStackSize(MF))) {
2605 SavedRegs.set(ARM::R4);
2606 SavedRegs.set(ARM::LR);
2607 }
2608
2609 if (AFI->isThumb1OnlyFunction()) {
2610 // Spill LR if Thumb1 function uses variable length argument lists.
2611 if (AFI->getArgRegsSaveSize() > 0)
2612 SavedRegs.set(ARM::LR);
2613
2614 // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2615 // requires stack alignment. We don't know for sure what the stack size
2616 // will be, but for this, an estimate is good enough. If there anything
2617 // changes it, it'll be a spill, which implies we've used all the registers
2618 // and so R4 is already used, so not marking it here will be OK.
2619 // FIXME: It will be better just to find spare register here.
2620 if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||
2621 MFI.estimateStackSize(MF) > 508)
2622 SavedRegs.set(ARM::R4);
2623 }
2624
2625 // See if we can spill vector registers to aligned stack.
2626 checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2627
2628 // Spill the BasePtr if it's used.
2629 if (RegInfo->hasBasePointer(MF))
2630 SavedRegs.set(RegInfo->getBaseRegister());
2631
2632 // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2633 if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2634 CanEliminateFrame = false;
2635
2636 // When return address signing is enabled R12 is treated as callee-saved.
2637 if (AFI->shouldSignReturnAddress())
2638 CanEliminateFrame = false;
2639
2640 // Don't spill FP if the frame can be eliminated. This is determined
2641 // by scanning the callee-save registers to see if any is modified.
2642 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF: &MF);
2643 for (unsigned i = 0; CSRegs[i]; ++i) {
2644 unsigned Reg = CSRegs[i];
2645 bool Spilled = false;
2646 if (SavedRegs.test(Idx: Reg)) {
2647 Spilled = true;
2648 CanEliminateFrame = false;
2649 }
2650
2651 if (!ARM::GPRRegClass.contains(Reg)) {
2652 if (Spilled) {
2653 if (ARM::SPRRegClass.contains(Reg))
2654 NumFPRSpills++;
2655 else if (ARM::DPRRegClass.contains(Reg))
2656 NumFPRSpills += 2;
2657 else if (ARM::QPRRegClass.contains(Reg))
2658 NumFPRSpills += 4;
2659 }
2660 continue;
2661 }
2662
2663 if (Spilled) {
2664 NumGPRSpills++;
2665
2666 if (PushPopSplit != ARMSubtarget::SplitR7) {
2667 if (Reg == ARM::LR)
2668 LRSpilled = true;
2669 CS1Spilled = true;
2670 continue;
2671 }
2672
2673 // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2674 switch (Reg) {
2675 case ARM::LR:
2676 LRSpilled = true;
2677 [[fallthrough]];
2678 case ARM::R0: case ARM::R1:
2679 case ARM::R2: case ARM::R3:
2680 case ARM::R4: case ARM::R5:
2681 case ARM::R6: case ARM::R7:
2682 CS1Spilled = true;
2683 break;
2684 default:
2685 break;
2686 }
2687 } else {
2688 if (PushPopSplit != ARMSubtarget::SplitR7) {
2689 UnspilledCS1GPRs.push_back(Elt: Reg);
2690 continue;
2691 }
2692
2693 switch (Reg) {
2694 case ARM::R0: case ARM::R1:
2695 case ARM::R2: case ARM::R3:
2696 case ARM::R4: case ARM::R5:
2697 case ARM::R6: case ARM::R7:
2698 case ARM::LR:
2699 UnspilledCS1GPRs.push_back(Elt: Reg);
2700 break;
2701 default:
2702 UnspilledCS2GPRs.push_back(Elt: Reg);
2703 break;
2704 }
2705 }
2706 }
2707
2708 bool ForceLRSpill = false;
2709 if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2710 unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2711 // Force LR to be spilled if the Thumb function size is > 2048. This enables
2712 // use of BL to implement far jump.
2713 if (FnSize >= (1 << 11)) {
2714 CanEliminateFrame = false;
2715 ForceLRSpill = true;
2716 }
2717 }
2718
2719 // If any of the stack slot references may be out of range of an immediate
2720 // offset, make sure a register (or a spill slot) is available for the
2721 // register scavenger. Note that if we're indexing off the frame pointer, the
2722 // effective stack size is 4 bytes larger since the FP points to the stack
2723 // slot of the previous FP. Also, if we have variable sized objects in the
2724 // function, stack slot references will often be negative, and some of
2725 // our instructions are positive-offset only, so conservatively consider
2726 // that case to want a spill slot (or register) as well. Similarly, if
2727 // the function adjusts the stack pointer during execution and the
2728 // adjustments aren't already part of our stack size estimate, our offset
2729 // calculations may be off, so be conservative.
2730 // FIXME: We could add logic to be more precise about negative offsets
2731 // and which instructions will need a scratch register for them. Is it
2732 // worth the effort and added fragility?
2733 unsigned EstimatedStackSize =
2734 MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
2735
2736 // Determine biggest (positive) SP offset in MachineFrameInfo.
2737 int MaxFixedOffset = 0;
2738 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
2739 int MaxObjectOffset = MFI.getObjectOffset(ObjectIdx: I) + MFI.getObjectSize(ObjectIdx: I);
2740 MaxFixedOffset = std::max(a: MaxFixedOffset, b: MaxObjectOffset);
2741 }
2742
2743 bool HasFP = hasFP(MF);
2744 if (HasFP) {
2745 if (AFI->hasStackFrame())
2746 EstimatedStackSize += 4;
2747 } else {
2748 // If FP is not used, SP will be used to access arguments, so count the
2749 // size of arguments into the estimation.
2750 EstimatedStackSize += MaxFixedOffset;
2751 }
2752 EstimatedStackSize += 16; // For possible paddings.
2753
2754 unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2755 bool HasNonSPFrameIndex = false;
2756 if (AFI->isThumb1OnlyFunction()) {
2757 // For Thumb1, don't bother to iterate over the function. The only
2758 // instruction that requires an emergency spill slot is a store to a
2759 // frame index.
2760 //
2761 // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2762 // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2763 // a 5-bit unsigned immediate.
2764 //
2765 // We could try to check if the function actually contains a tSTRspi
2766 // that might need the spill slot, but it's not really important.
2767 // Functions with VLAs or extremely large call frames are rare, and
2768 // if a function is allocating more than 1KB of stack, an extra 4-byte
2769 // slot probably isn't relevant.
2770 //
2771 // A special case is the scenario where r11 is used as FP, where accesses
2772 // to a frame index will require its value to be moved into a low reg.
2773 // This is handled later on, once we are able to determine if we have any
2774 // fp-relative accesses.
2775 if (RegInfo->hasBasePointer(MF))
2776 EstimatedRSStackSizeLimit = (1U << 5) * 4;
2777 else
2778 EstimatedRSStackSizeLimit = (1U << 8) * 4;
2779 EstimatedRSFixedSizeLimit = (1U << 5) * 4;
2780 } else {
2781 EstimatedRSStackSizeLimit =
2782 estimateRSStackSizeLimit(MF, TFI: this, HasNonSPFrameIndex);
2783 EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2784 }
2785 // Final estimate of whether sp or bp-relative accesses might require
2786 // scavenging.
2787 bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2788
2789 // If the stack pointer moves and we don't have a base pointer, the
2790 // estimate logic doesn't work. The actual offsets might be larger when
2791 // we're constructing a call frame, or we might need to use negative
2792 // offsets from fp.
2793 bool HasMovingSP = MFI.hasVarSizedObjects() ||
2794 (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2795 bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
2796
2797 // If we have a frame pointer, we assume arguments will be accessed
2798 // relative to the frame pointer. Check whether fp-relative accesses to
2799 // arguments require scavenging.
2800 //
2801 // We could do slightly better on Thumb1; in some cases, an sp-relative
2802 // offset would be legal even though an fp-relative offset is not.
2803 int MaxFPOffset = getMaxFPOffset(STI, AFI: *AFI, MF);
2804 bool HasLargeArgumentList =
2805 HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2806
2807 bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
2808 HasLargeArgumentList || HasNonSPFrameIndex;
2809 LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2810 << "; EstimatedStack: " << EstimatedStackSize
2811 << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2812 << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2813 if (BigFrameOffsets ||
2814 !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
2815 AFI->setHasStackFrame(true);
2816
2817 // Save the FP if:
2818 // 1. We currently need it (HasFP), OR
2819 // 2. We might need it later due to stack realignment from aligned DPRCS2
2820 // saves (which will make hasFP() become true in emitPrologue).
2821 if (HasFP || (isFPReserved(MF) && AFI->getNumAlignedDPRCS2Regs() > 0)) {
2822 SavedRegs.set(FramePtr);
2823 // If the frame pointer is required by the ABI, also spill LR so that we
2824 // emit a complete frame record.
2825 if ((requiresAAPCSFrameRecord(MF) ||
2826 MF.getTarget().Options.DisableFramePointerElim(MF)) &&
2827 !LRSpilled) {
2828 SavedRegs.set(ARM::LR);
2829 LRSpilled = true;
2830 NumGPRSpills++;
2831 auto LRPos = llvm::find(Range&: UnspilledCS1GPRs, Val: ARM::LR);
2832 if (LRPos != UnspilledCS1GPRs.end())
2833 UnspilledCS1GPRs.erase(CI: LRPos);
2834 }
2835 auto FPPos = llvm::find(Range&: UnspilledCS1GPRs, Val: FramePtr);
2836 if (FPPos != UnspilledCS1GPRs.end())
2837 UnspilledCS1GPRs.erase(CI: FPPos);
2838 NumGPRSpills++;
2839 if (FramePtr == ARM::R7)
2840 CS1Spilled = true;
2841 }
2842
2843 // This is the number of extra spills inserted for callee-save GPRs which
2844 // would not otherwise be used by the function. When greater than zero it
2845 // guaranteees that it is possible to scavenge a register to hold the
2846 // address of a stack slot. On Thumb1, the register must be a valid operand
2847 // to tSTRi, i.e. r4-r7. For other subtargets, this is any GPR, i.e. r4-r11
2848 // or lr.
2849 //
2850 // If we don't insert a spill, we instead allocate an emergency spill
2851 // slot, which can be used by scavenging to spill an arbitrary register.
2852 //
2853 // We currently don't try to figure out whether any specific instruction
2854 // requires scavening an additional register.
2855 unsigned NumExtraCSSpill = 0;
2856
2857 if (AFI->isThumb1OnlyFunction()) {
2858 // For Thumb1-only targets, we need some low registers when we save and
2859 // restore the high registers (which aren't allocatable, but could be
2860 // used by inline assembly) because the push/pop instructions can not
2861 // access high registers. If necessary, we might need to push more low
2862 // registers to ensure that there is at least one free that can be used
2863 // for the saving & restoring, and preferably we should ensure that as
2864 // many as are needed are available so that fewer push/pop instructions
2865 // are required.
2866
2867 // Low registers which are not currently pushed, but could be (r4-r7).
2868 SmallVector<unsigned, 4> AvailableRegs;
2869
2870 // Unused argument registers (r0-r3) can be clobbered in the prologue for
2871 // free.
2872 int EntryRegDeficit = 0;
2873 for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2874 if (!MF.getRegInfo().isLiveIn(Reg)) {
2875 --EntryRegDeficit;
2876 LLVM_DEBUG(dbgs()
2877 << printReg(Reg, TRI)
2878 << " is unused argument register, EntryRegDeficit = "
2879 << EntryRegDeficit << "\n");
2880 }
2881 }
2882
2883 // Unused return registers can be clobbered in the epilogue for free.
2884 int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
2885 LLVM_DEBUG(dbgs() << AFI->getReturnRegsCount()
2886 << " return regs used, ExitRegDeficit = "
2887 << ExitRegDeficit << "\n");
2888
2889 int RegDeficit = std::max(a: EntryRegDeficit, b: ExitRegDeficit);
2890 LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2891
2892 // r4-r6 can be used in the prologue if they are pushed by the first push
2893 // instruction.
2894 for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2895 if (SavedRegs.test(Idx: Reg)) {
2896 --RegDeficit;
2897 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2898 << " is saved low register, RegDeficit = "
2899 << RegDeficit << "\n");
2900 } else {
2901 AvailableRegs.push_back(Elt: Reg);
2902 LLVM_DEBUG(
2903 dbgs()
2904 << printReg(Reg, TRI)
2905 << " is non-saved low register, adding to AvailableRegs\n");
2906 }
2907 }
2908
2909 // r7 can be used if it is not being used as the frame pointer.
2910 if (!HasFP || FramePtr != ARM::R7) {
2911 if (SavedRegs.test(Idx: ARM::R7)) {
2912 --RegDeficit;
2913 LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2914 << RegDeficit << "\n");
2915 } else {
2916 AvailableRegs.push_back(Elt: ARM::R7);
2917 LLVM_DEBUG(
2918 dbgs()
2919 << "%r7 is non-saved low register, adding to AvailableRegs\n");
2920 }
2921 }
2922
2923 // Each of r8-r11 needs to be copied to a low register, then pushed.
2924 for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2925 if (SavedRegs.test(Idx: Reg)) {
2926 ++RegDeficit;
2927 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2928 << " is saved high register, RegDeficit = "
2929 << RegDeficit << "\n");
2930 }
2931 }
2932
2933 // LR can only be used by PUSH, not POP, and can't be used at all if the
2934 // llvm.returnaddress intrinsic is used. This is only worth doing if we
2935 // are more limited at function entry than exit.
2936 if ((EntryRegDeficit > ExitRegDeficit) &&
2937 !(MF.getRegInfo().isLiveIn(Reg: ARM::LR) &&
2938 MF.getFrameInfo().isReturnAddressTaken())) {
2939 if (SavedRegs.test(Idx: ARM::LR)) {
2940 --RegDeficit;
2941 LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2942 << RegDeficit << "\n");
2943 } else {
2944 AvailableRegs.push_back(Elt: ARM::LR);
2945 LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2946 }
2947 }
2948
2949 // If there are more high registers that need pushing than low registers
2950 // available, push some more low registers so that we can use fewer push
2951 // instructions. This might not reduce RegDeficit all the way to zero,
2952 // because we can only guarantee that r4-r6 are available, but r8-r11 may
2953 // need saving.
2954 LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2955 for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
2956 unsigned Reg = AvailableRegs.pop_back_val();
2957 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2958 << " to make up reg deficit\n");
2959 SavedRegs.set(Reg);
2960 NumGPRSpills++;
2961 CS1Spilled = true;
2962 assert(!MRI.isReserved(Reg) && "Should not be reserved");
2963 if (Reg != ARM::LR && !MRI.isPhysRegUsed(PhysReg: Reg))
2964 NumExtraCSSpill++;
2965 UnspilledCS1GPRs.erase(CI: llvm::find(Range&: UnspilledCS1GPRs, Val: Reg));
2966 if (Reg == ARM::LR)
2967 LRSpilled = true;
2968 }
2969 LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2970 << "\n");
2971 }
2972
2973 // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2974 // restore LR in that case.
2975 bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2976
2977 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2978 // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2979 if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2980 SavedRegs.set(ARM::LR);
2981 NumGPRSpills++;
2982 SmallVectorImpl<unsigned>::iterator LRPos;
2983 LRPos = llvm::find(Range&: UnspilledCS1GPRs, Val: (unsigned)ARM::LR);
2984 if (LRPos != UnspilledCS1GPRs.end())
2985 UnspilledCS1GPRs.erase(CI: LRPos);
2986
2987 ForceLRSpill = false;
2988 if (!MRI.isReserved(PhysReg: ARM::LR) && !MRI.isPhysRegUsed(PhysReg: ARM::LR) &&
2989 !AFI->isThumb1OnlyFunction())
2990 NumExtraCSSpill++;
2991 }
2992
2993 // If stack and double are 8-byte aligned and we are spilling an odd number
2994 // of GPRs, spill one extra callee save GPR so we won't have to pad between
2995 // the integer and double callee save areas.
2996 LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2997 const Align TargetAlign = getStackAlign();
2998 if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
2999 if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
3000 for (unsigned Reg : UnspilledCS1GPRs) {
3001 // Don't spill high register if the function is thumb. In the case of
3002 // Windows on ARM, accept R11 (frame pointer)
3003 if (!AFI->isThumbFunction() ||
3004 (STI.isTargetWindows() && Reg == ARM::R11) ||
3005 isARMLowRegister(Reg) ||
3006 (Reg == ARM::LR && !ExpensiveLRRestore)) {
3007 SavedRegs.set(Reg);
3008 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
3009 << " to make up alignment\n");
3010 if (!MRI.isReserved(PhysReg: Reg) && !MRI.isPhysRegUsed(PhysReg: Reg) &&
3011 !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
3012 NumExtraCSSpill++;
3013 break;
3014 }
3015 }
3016 } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
3017 unsigned Reg = UnspilledCS2GPRs.front();
3018 SavedRegs.set(Reg);
3019 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
3020 << " to make up alignment\n");
3021 if (!MRI.isReserved(PhysReg: Reg) && !MRI.isPhysRegUsed(PhysReg: Reg))
3022 NumExtraCSSpill++;
3023 }
3024 }
3025
3026 // Estimate if we might need to scavenge registers at some point in order
3027 // to materialize a stack offset. If so, either spill one additional
3028 // callee-saved register or reserve a special spill slot to facilitate
3029 // register scavenging. Thumb1 needs a spill slot for stack pointer
3030 // adjustments and for frame index accesses when FP is high register,
3031 // even when the frame itself is small.
3032 unsigned RegsNeeded = 0;
3033 if (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, TFI: *this)) {
3034 RegsNeeded++;
3035 // With thumb1 execute-only we may need an additional register for saving
3036 // and restoring the CPSR.
3037 if (AFI->isThumb1OnlyFunction() && STI.genExecuteOnly() && !STI.useMovt())
3038 RegsNeeded++;
3039 }
3040
3041 if (RegsNeeded > NumExtraCSSpill) {
3042 // If any non-reserved CS register isn't spilled, just spill one or two
3043 // extra. That should take care of it!
3044 unsigned NumExtras = TargetAlign.value() / 4;
3045 SmallVector<unsigned, 2> Extras;
3046 while (NumExtras && !UnspilledCS1GPRs.empty()) {
3047 unsigned Reg = UnspilledCS1GPRs.pop_back_val();
3048 if (!MRI.isReserved(PhysReg: Reg) &&
3049 (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
3050 Extras.push_back(Elt: Reg);
3051 NumExtras--;
3052 }
3053 }
3054 // For non-Thumb1 functions, also check for hi-reg CS registers
3055 if (!AFI->isThumb1OnlyFunction()) {
3056 while (NumExtras && !UnspilledCS2GPRs.empty()) {
3057 unsigned Reg = UnspilledCS2GPRs.pop_back_val();
3058 if (!MRI.isReserved(PhysReg: Reg)) {
3059 Extras.push_back(Elt: Reg);
3060 NumExtras--;
3061 }
3062 }
3063 }
3064 if (NumExtras == 0) {
3065 for (unsigned Reg : Extras) {
3066 SavedRegs.set(Reg);
3067 if (!MRI.isPhysRegUsed(PhysReg: Reg))
3068 NumExtraCSSpill++;
3069 }
3070 }
3071 while ((RegsNeeded > NumExtraCSSpill) && RS) {
3072 // Reserve a slot closest to SP or frame pointer.
3073 LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
3074 const TargetRegisterClass &RC = ARM::GPRRegClass;
3075 unsigned Size = TRI->getSpillSize(RC);
3076 Align Alignment = TRI->getSpillAlign(RC);
3077 RS->addScavengingFrameIndex(
3078 FI: MFI.CreateSpillStackObject(Size, Alignment));
3079 --RegsNeeded;
3080 }
3081 }
3082 }
3083
3084 if (ForceLRSpill)
3085 SavedRegs.set(ARM::LR);
3086 AFI->setLRIsSpilled(SavedRegs.test(Idx: ARM::LR));
3087}
3088
3089void ARMFrameLowering::updateLRRestored(MachineFunction &MF) {
3090 MachineFrameInfo &MFI = MF.getFrameInfo();
3091 if (!MFI.isCalleeSavedInfoValid())
3092 return;
3093
3094 // Check if all terminators do not implicitly use LR. Then we can 'restore' LR
3095 // into PC so it is not live out of the return block: Clear the Restored bit
3096 // in that case.
3097 for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
3098 if (Info.getReg() != ARM::LR)
3099 continue;
3100 if (all_of(Range&: MF, P: [](const MachineBasicBlock &MBB) {
3101 return all_of(Range: MBB.terminators(), P: [](const MachineInstr &Term) {
3102 return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET ||
3103 Term.getOpcode() == ARM::t2LDMIA_RET ||
3104 Term.getOpcode() == ARM::tPOP_RET;
3105 });
3106 })) {
3107 Info.setRestored(false);
3108 break;
3109 }
3110 }
3111}
3112
3113void ARMFrameLowering::processFunctionBeforeFrameFinalized(
3114 MachineFunction &MF, RegScavenger *RS) const {
3115 TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS);
3116 updateLRRestored(MF);
3117}
3118
3119void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF,
3120 BitVector &SavedRegs) const {
3121 TargetFrameLowering::getCalleeSaves(MF, SavedRegs);
3122
3123 // If we have the "returned" parameter attribute which guarantees that we
3124 // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
3125 // record that fact for IPRA.
3126 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3127 if (AFI->getPreservesR0())
3128 SavedRegs.set(ARM::R0);
3129}
3130
3131bool ARMFrameLowering::assignCalleeSavedSpillSlots(
3132 MachineFunction &MF, const TargetRegisterInfo *TRI,
3133 std::vector<CalleeSavedInfo> &CSI) const {
3134 // For CMSE entry functions, handle floating-point context as if it was a
3135 // callee-saved register.
3136 if (STI.hasV8_1MMainlineOps() &&
3137 MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction()) {
3138 CSI.emplace_back(args: ARM::FPCXTNS);
3139 CSI.back().setRestored(false);
3140 }
3141
3142 // For functions, which sign their return address, upon function entry, the
3143 // return address PAC is computed in R12. Treat R12 as a callee-saved register
3144 // in this case.
3145 const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
3146 if (AFI.shouldSignReturnAddress()) {
3147 // The order of register must match the order we push them, because the
3148 // PEI assigns frame indices in that order. That order depends on the
3149 // PushPopSplitVariation, there are only two cases which we use with return
3150 // address signing:
3151 switch (STI.getPushPopSplitVariation(MF)) {
3152 case ARMSubtarget::SplitR7:
3153 // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
3154 CSI.insert(position: find_if(Range&: CSI,
3155 P: [=](const auto &CS) {
3156 MCRegister Reg = CS.getReg();
3157 return Reg == ARM::R10 || Reg == ARM::R11 ||
3158 Reg == ARM::R8 || Reg == ARM::R9 ||
3159 ARM::DPRRegClass.contains(Reg);
3160 }),
3161 x: CalleeSavedInfo(ARM::R12));
3162 break;
3163 case ARMSubtarget::SplitR11AAPCSSignRA:
3164 // With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR
3165 // on the stack.
3166 CSI.insert(position: CSI.begin(), x: CalleeSavedInfo(ARM::R12));
3167 break;
3168 case ARMSubtarget::NoSplit:
3169 assert(!MF.getTarget().Options.DisableFramePointerElim(MF) &&
3170 "ABI-required frame pointers need a CSR split when signing return "
3171 "address.");
3172 CSI.insert(position: find_if(Range&: CSI,
3173 P: [=](const auto &CS) {
3174 MCRegister Reg = CS.getReg();
3175 return Reg != ARM::LR;
3176 }),
3177 x: CalleeSavedInfo(ARM::R12));
3178 break;
3179 default:
3180 llvm_unreachable("Unexpected CSR split with return address signing");
3181 }
3182 }
3183
3184 return false;
3185}
3186
3187const TargetFrameLowering::SpillSlot *
3188ARMFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
3189 static const SpillSlot FixedSpillOffsets[] = {{.Reg: ARM::FPCXTNS, .Offset: -4}};
3190 NumEntries = std::size(FixedSpillOffsets);
3191 return FixedSpillOffsets;
3192}
3193
3194MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
3195 MachineFunction &MF, MachineBasicBlock &MBB,
3196 MachineBasicBlock::iterator I) const {
3197 const ARMBaseInstrInfo &TII =
3198 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3199 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3200 bool isARM = !AFI->isThumbFunction();
3201 DebugLoc dl = I->getDebugLoc();
3202 unsigned Opc = I->getOpcode();
3203 bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
3204 unsigned CalleePopAmount = IsDestroy ? I->getOperand(i: 1).getImm() : 0;
3205
3206 assert(!AFI->isThumb1OnlyFunction() &&
3207 "This eliminateCallFramePseudoInstr does not support Thumb1!");
3208
3209 int PIdx = I->findFirstPredOperandIdx();
3210 ARMCC::CondCodes Pred = (PIdx == -1)
3211 ? ARMCC::AL
3212 : (ARMCC::CondCodes)I->getOperand(i: PIdx).getImm();
3213 unsigned PredReg = TII.getFramePred(MI: *I);
3214
3215 if (!hasReservedCallFrame(MF)) {
3216 // Bail early if the callee is expected to do the adjustment.
3217 if (IsDestroy && CalleePopAmount != -1U)
3218 return MBB.erase(I);
3219
3220 // If we have alloca, convert as follows:
3221 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
3222 // ADJCALLSTACKUP -> add, sp, sp, amount
3223 unsigned Amount = TII.getFrameSize(I: *I);
3224 if (Amount != 0) {
3225 // We need to keep the stack aligned properly. To do this, we round the
3226 // amount of space needed for the outgoing arguments up to the next
3227 // alignment boundary.
3228 Amount = alignSPAdjust(SPAdj: Amount);
3229
3230 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
3231 emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: -Amount, MIFlags: MachineInstr::NoFlags,
3232 Pred, PredReg);
3233 } else {
3234 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
3235 emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: Amount, MIFlags: MachineInstr::NoFlags,
3236 Pred, PredReg);
3237 }
3238 }
3239 } else if (CalleePopAmount != -1U) {
3240 // If the calling convention demands that the callee pops arguments from the
3241 // stack, we want to add it back if we have a reserved call frame.
3242 emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: -CalleePopAmount,
3243 MIFlags: MachineInstr::NoFlags, Pred, PredReg);
3244 }
3245 return MBB.erase(I);
3246}
3247
3248/// Get the minimum constant for ARM that is greater than or equal to the
3249/// argument. In ARM, constants can have any value that can be produced by
3250/// rotating an 8-bit value to the right by an even number of bits within a
3251/// 32-bit word.
3252static uint32_t alignToARMConstant(uint32_t Value) {
3253 unsigned Shifted = 0;
3254
3255 if (Value == 0)
3256 return 0;
3257
3258 while (!(Value & 0xC0000000)) {
3259 Value = Value << 2;
3260 Shifted += 2;
3261 }
3262
3263 bool Carry = (Value & 0x00FFFFFF);
3264 Value = ((Value & 0xFF000000) >> 24) + Carry;
3265
3266 if (Value & 0x0000100)
3267 Value = Value & 0x000001FC;
3268
3269 if (Shifted > 24)
3270 Value = Value >> (Shifted - 24);
3271 else
3272 Value = Value << (24 - Shifted);
3273
3274 return Value;
3275}
3276
3277// The stack limit in the TCB is set to this many bytes above the actual
3278// stack limit.
3279static const uint64_t kSplitStackAvailable = 256;
3280
3281// Adjust the function prologue to enable split stacks. This currently only
3282// supports android and linux.
3283//
3284// The ABI of the segmented stack prologue is a little arbitrarily chosen, but
3285// must be well defined in order to allow for consistent implementations of the
3286// __morestack helper function. The ABI is also not a normal ABI in that it
3287// doesn't follow the normal calling conventions because this allows the
3288// prologue of each function to be optimized further.
3289//
3290// Currently, the ABI looks like (when calling __morestack)
3291//
3292// * r4 holds the minimum stack size requested for this function call
3293// * r5 holds the stack size of the arguments to the function
3294// * the beginning of the function is 3 instructions after the call to
3295// __morestack
3296//
3297// Implementations of __morestack should use r4 to allocate a new stack, r5 to
3298// place the arguments on to the new stack, and the 3-instruction knowledge to
3299// jump directly to the body of the function when working on the new stack.
3300//
3301// An old (and possibly no longer compatible) implementation of __morestack for
3302// ARM can be found at [1].
3303//
3304// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
3305void ARMFrameLowering::adjustForSegmentedStacks(
3306 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3307 unsigned Opcode;
3308 const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
3309 bool Thumb = ST->isThumb();
3310 bool Thumb2 = ST->isThumb2();
3311
3312 // Sadly, this currently doesn't support varargs, platforms other than
3313 // android/linux. Note that thumb1/thumb2 are support for android/linux.
3314 if (MF.getFunction().isVarArg())
3315 report_fatal_error(reason: "Segmented stacks do not support vararg functions.");
3316 if (!ST->isTargetAndroid() && !ST->isTargetLinux())
3317 report_fatal_error(reason: "Segmented stacks not supported on this platform.");
3318
3319 MachineFrameInfo &MFI = MF.getFrameInfo();
3320 const ARMBaseInstrInfo &TII =
3321 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3322 ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
3323 DebugLoc DL;
3324
3325 if (!MFI.needsSplitStackProlog())
3326 return;
3327
3328 uint64_t StackSize = MFI.getStackSize();
3329
3330 // Use R4 and R5 as scratch registers.
3331 // We save R4 and R5 before use and restore them before leaving the function.
3332 unsigned ScratchReg0 = ARM::R4;
3333 unsigned ScratchReg1 = ARM::R5;
3334 unsigned MovOp = ST->useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
3335 uint64_t AlignedStackSize;
3336
3337 MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
3338 MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
3339 MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
3340 MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
3341 MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
3342
3343 // Grab everything that reaches PrologueMBB to update there liveness as well.
3344 SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
3345 SmallVector<MachineBasicBlock *, 2> WalkList;
3346 WalkList.push_back(Elt: &PrologueMBB);
3347
3348 do {
3349 MachineBasicBlock *CurMBB = WalkList.pop_back_val();
3350 for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
3351 if (BeforePrologueRegion.insert(Ptr: PredBB).second)
3352 WalkList.push_back(Elt: PredBB);
3353 }
3354 } while (!WalkList.empty());
3355
3356 // The order in that list is important.
3357 // The blocks will all be inserted before PrologueMBB using that order.
3358 // Therefore the block that should appear first in the CFG should appear
3359 // first in the list.
3360 MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
3361 PostStackMBB};
3362
3363 BeforePrologueRegion.insert_range(R&: AddedBlocks);
3364
3365 for (const auto &LI : PrologueMBB.liveins()) {
3366 for (MachineBasicBlock *PredBB : BeforePrologueRegion)
3367 PredBB->addLiveIn(RegMaskPair: LI);
3368 }
3369
3370 // Remove the newly added blocks from the list, since we know
3371 // we do not have to do the following updates for them.
3372 for (MachineBasicBlock *B : AddedBlocks) {
3373 BeforePrologueRegion.erase(Ptr: B);
3374 MF.insert(MBBI: PrologueMBB.getIterator(), MBB: B);
3375 }
3376
3377 for (MachineBasicBlock *MBB : BeforePrologueRegion) {
3378 // Make sure the LiveIns are still sorted and unique.
3379 MBB->sortUniqueLiveIns();
3380 // Replace the edges to PrologueMBB by edges to the sequences
3381 // we are about to add, but only update for immediate predecessors.
3382 if (MBB->isSuccessor(MBB: &PrologueMBB))
3383 MBB->ReplaceUsesOfBlockWith(Old: &PrologueMBB, New: AddedBlocks[0]);
3384 }
3385
3386 // The required stack size that is aligned to ARM constant criterion.
3387 AlignedStackSize = alignToARMConstant(Value: StackSize);
3388
3389 // When the frame size is less than 256 we just compare the stack
3390 // boundary directly to the value of the stack pointer, per gcc.
3391 bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
3392
3393 // We will use two of the callee save registers as scratch registers so we
3394 // need to save those registers onto the stack.
3395 // We will use SR0 to hold stack limit and SR1 to hold the stack size
3396 // requested and arguments for __morestack().
3397 // SR0: Scratch Register #0
3398 // SR1: Scratch Register #1
3399 // push {SR0, SR1}
3400 if (Thumb) {
3401 BuildMI(BB: PrevStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPUSH))
3402 .add(MOs: predOps(Pred: ARMCC::AL))
3403 .addReg(RegNo: ScratchReg0)
3404 .addReg(RegNo: ScratchReg1);
3405 } else {
3406 BuildMI(BB: PrevStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::STMDB_UPD))
3407 .addReg(RegNo: ARM::SP, Flags: RegState::Define)
3408 .addReg(RegNo: ARM::SP)
3409 .add(MOs: predOps(Pred: ARMCC::AL))
3410 .addReg(RegNo: ScratchReg0)
3411 .addReg(RegNo: ScratchReg1);
3412 }
3413
3414 // Emit the relevant DWARF information about the change in stack pointer as
3415 // well as where to find both r4 and r5 (the callee-save registers)
3416 if (!MF.getTarget().getMCAsmInfo().usesWindowsCFI()) {
3417 CFIInstBuilder CFIBuilder(PrevStackMBB, MachineInstr::NoFlags);
3418 CFIBuilder.buildDefCFAOffset(Offset: 8);
3419 CFIBuilder.buildOffset(Reg: ScratchReg1, Offset: -4);
3420 CFIBuilder.buildOffset(Reg: ScratchReg0, Offset: -8);
3421 }
3422
3423 // mov SR1, sp
3424 if (Thumb) {
3425 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ScratchReg1)
3426 .addReg(RegNo: ARM::SP)
3427 .add(MOs: predOps(Pred: ARMCC::AL));
3428 } else if (CompareStackPointer) {
3429 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVr), DestReg: ScratchReg1)
3430 .addReg(RegNo: ARM::SP)
3431 .add(MOs: predOps(Pred: ARMCC::AL))
3432 .add(MO: condCodeOp());
3433 }
3434
3435 // sub SR1, sp, #StackSize
3436 if (!CompareStackPointer && Thumb) {
3437 if (AlignedStackSize < 256) {
3438 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tSUBi8), DestReg: ScratchReg1)
3439 .add(MO: condCodeOp())
3440 .addReg(RegNo: ScratchReg1)
3441 .addImm(Val: AlignedStackSize)
3442 .add(MOs: predOps(Pred: ARMCC::AL));
3443 } else {
3444 if (Thumb2 || ST->genExecuteOnly()) {
3445 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg0)
3446 .addImm(Val: AlignedStackSize);
3447 } else {
3448 auto MBBI = McrMBB->end();
3449 auto RegInfo = STI.getRegisterInfo();
3450 RegInfo->emitLoadConstPool(MBB&: *McrMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: 0,
3451 Val: AlignedStackSize);
3452 }
3453 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tSUBrr), DestReg: ScratchReg1)
3454 .add(MO: condCodeOp())
3455 .addReg(RegNo: ScratchReg1)
3456 .addReg(RegNo: ScratchReg0)
3457 .add(MOs: predOps(Pred: ARMCC::AL));
3458 }
3459 } else if (!CompareStackPointer) {
3460 if (AlignedStackSize < 256) {
3461 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::SUBri), DestReg: ScratchReg1)
3462 .addReg(RegNo: ARM::SP)
3463 .addImm(Val: AlignedStackSize)
3464 .add(MOs: predOps(Pred: ARMCC::AL))
3465 .add(MO: condCodeOp());
3466 } else {
3467 auto MBBI = McrMBB->end();
3468 auto RegInfo = STI.getRegisterInfo();
3469 RegInfo->emitLoadConstPool(MBB&: *McrMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: 0,
3470 Val: AlignedStackSize);
3471 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::SUBrr), DestReg: ScratchReg1)
3472 .addReg(RegNo: ARM::SP)
3473 .addReg(RegNo: ScratchReg0)
3474 .add(MOs: predOps(Pred: ARMCC::AL))
3475 .add(MO: condCodeOp());
3476 }
3477 }
3478
3479 if (Thumb && ST->isThumb1Only()) {
3480 if (ST->genExecuteOnly()) {
3481 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg0)
3482 .addExternalSymbol(FnName: "__STACK_LIMIT");
3483 } else {
3484 unsigned PCLabelId = ARMFI->createPICLabelUId();
3485 ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(
3486 C&: MF.getFunction().getContext(), s: "__STACK_LIMIT", ID: PCLabelId, PCAdj: 0);
3487 MachineConstantPool *MCP = MF.getConstantPool();
3488 unsigned CPI = MCP->getConstantPoolIndex(V: NewCPV, Alignment: Align(4));
3489
3490 // ldr SR0, [pc, offset(STACK_LIMIT)]
3491 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tLDRpci), DestReg: ScratchReg0)
3492 .addConstantPoolIndex(Idx: CPI)
3493 .add(MOs: predOps(Pred: ARMCC::AL));
3494 }
3495
3496 // ldr SR0, [SR0]
3497 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tLDRi), DestReg: ScratchReg0)
3498 .addReg(RegNo: ScratchReg0)
3499 .addImm(Val: 0)
3500 .add(MOs: predOps(Pred: ARMCC::AL));
3501 } else {
3502 // Get TLS base address from the coprocessor
3503 // mrc p15, #0, SR0, c13, c0, #3
3504 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: Thumb ? ARM::t2MRC : ARM::MRC),
3505 DestReg: ScratchReg0)
3506 .addImm(Val: 15)
3507 .addImm(Val: 0)
3508 .addImm(Val: 13)
3509 .addImm(Val: 0)
3510 .addImm(Val: 3)
3511 .add(MOs: predOps(Pred: ARMCC::AL));
3512
3513 // Use the last tls slot on android and a private field of the TCP on linux.
3514 assert(ST->isTargetAndroid() || ST->isTargetLinux());
3515 unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
3516
3517 // Get the stack limit from the right offset
3518 // ldr SR0, [sr0, #4 * TlsOffset]
3519 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
3520 DestReg: ScratchReg0)
3521 .addReg(RegNo: ScratchReg0)
3522 .addImm(Val: 4 * TlsOffset)
3523 .add(MOs: predOps(Pred: ARMCC::AL));
3524 }
3525
3526 // Compare stack limit with stack size requested.
3527 // cmp SR0, SR1
3528 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
3529 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode))
3530 .addReg(RegNo: ScratchReg0)
3531 .addReg(RegNo: ScratchReg1)
3532 .add(MOs: predOps(Pred: ARMCC::AL));
3533
3534 // This jump is taken if StackLimit <= SP - stack required.
3535 Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
3536 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode))
3537 .addMBB(MBB: PostStackMBB)
3538 .addImm(Val: ARMCC::LS)
3539 .addReg(RegNo: ARM::CPSR);
3540
3541 // Calling __morestack(StackSize, Size of stack arguments).
3542 // __morestack knows that the stack size requested is in SR0(r4)
3543 // and amount size of stack arguments is in SR1(r5).
3544
3545 // Pass first argument for the __morestack by Scratch Register #0.
3546 // The amount size of stack required
3547 if (Thumb) {
3548 if (AlignedStackSize < 256) {
3549 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVi8), DestReg: ScratchReg0)
3550 .add(MO: condCodeOp())
3551 .addImm(Val: AlignedStackSize)
3552 .add(MOs: predOps(Pred: ARMCC::AL));
3553 } else {
3554 if (Thumb2 || ST->genExecuteOnly()) {
3555 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg0)
3556 .addImm(Val: AlignedStackSize);
3557 } else {
3558 auto MBBI = AllocMBB->end();
3559 auto RegInfo = STI.getRegisterInfo();
3560 RegInfo->emitLoadConstPool(MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: 0,
3561 Val: AlignedStackSize);
3562 }
3563 }
3564 } else {
3565 if (AlignedStackSize < 256) {
3566 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVi), DestReg: ScratchReg0)
3567 .addImm(Val: AlignedStackSize)
3568 .add(MOs: predOps(Pred: ARMCC::AL))
3569 .add(MO: condCodeOp());
3570 } else {
3571 auto MBBI = AllocMBB->end();
3572 auto RegInfo = STI.getRegisterInfo();
3573 RegInfo->emitLoadConstPool(MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: 0,
3574 Val: AlignedStackSize);
3575 }
3576 }
3577
3578 // Pass second argument for the __morestack by Scratch Register #1.
3579 // The amount size of stack consumed to save function arguments.
3580 if (Thumb) {
3581 if (ARMFI->getArgumentStackSize() < 256) {
3582 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVi8), DestReg: ScratchReg1)
3583 .add(MO: condCodeOp())
3584 .addImm(Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()))
3585 .add(MOs: predOps(Pred: ARMCC::AL));
3586 } else {
3587 if (Thumb2 || ST->genExecuteOnly()) {
3588 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg1)
3589 .addImm(Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3590 } else {
3591 auto MBBI = AllocMBB->end();
3592 auto RegInfo = STI.getRegisterInfo();
3593 RegInfo->emitLoadConstPool(
3594 MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg1, SubIdx: 0,
3595 Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3596 }
3597 }
3598 } else {
3599 if (alignToARMConstant(Value: ARMFI->getArgumentStackSize()) < 256) {
3600 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVi), DestReg: ScratchReg1)
3601 .addImm(Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()))
3602 .add(MOs: predOps(Pred: ARMCC::AL))
3603 .add(MO: condCodeOp());
3604 } else {
3605 auto MBBI = AllocMBB->end();
3606 auto RegInfo = STI.getRegisterInfo();
3607 RegInfo->emitLoadConstPool(
3608 MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg1, SubIdx: 0,
3609 Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3610 }
3611 }
3612
3613 // push {lr} - Save return address of this function.
3614 if (Thumb) {
3615 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPUSH))
3616 .add(MOs: predOps(Pred: ARMCC::AL))
3617 .addReg(RegNo: ARM::LR);
3618 } else {
3619 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::STMDB_UPD))
3620 .addReg(RegNo: ARM::SP, Flags: RegState::Define)
3621 .addReg(RegNo: ARM::SP)
3622 .add(MOs: predOps(Pred: ARMCC::AL))
3623 .addReg(RegNo: ARM::LR);
3624 }
3625
3626 // Emit the DWARF info about the change in stack as well as where to find the
3627 // previous link register
3628 if (!MF.getTarget().getMCAsmInfo().usesWindowsCFI()) {
3629 CFIInstBuilder CFIBuilder(AllocMBB, MachineInstr::NoFlags);
3630 CFIBuilder.buildDefCFAOffset(Offset: 12);
3631 CFIBuilder.buildOffset(Reg: ARM::LR, Offset: -12);
3632 }
3633
3634 // Call __morestack().
3635 if (Thumb) {
3636 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tBL))
3637 .add(MOs: predOps(Pred: ARMCC::AL))
3638 .addExternalSymbol(FnName: "__morestack");
3639 } else {
3640 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::BL))
3641 .addExternalSymbol(FnName: "__morestack");
3642 }
3643
3644 // pop {lr} - Restore return address of this original function.
3645 if (Thumb) {
3646 if (ST->isThumb1Only()) {
3647 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP))
3648 .add(MOs: predOps(Pred: ARMCC::AL))
3649 .addReg(RegNo: ScratchReg0);
3650 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::LR)
3651 .addReg(RegNo: ScratchReg0)
3652 .add(MOs: predOps(Pred: ARMCC::AL));
3653 } else {
3654 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::t2LDR_POST))
3655 .addReg(RegNo: ARM::LR, Flags: RegState::Define)
3656 .addReg(RegNo: ARM::SP, Flags: RegState::Define)
3657 .addReg(RegNo: ARM::SP)
3658 .addImm(Val: 4)
3659 .add(MOs: predOps(Pred: ARMCC::AL));
3660 }
3661 } else {
3662 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::LDMIA_UPD))
3663 .addReg(RegNo: ARM::SP, Flags: RegState::Define)
3664 .addReg(RegNo: ARM::SP)
3665 .add(MOs: predOps(Pred: ARMCC::AL))
3666 .addReg(RegNo: ARM::LR);
3667 }
3668
3669 // Restore SR0 and SR1 in case of __morestack() was called.
3670 // __morestack() will skip PostStackMBB block so we need to restore
3671 // scratch registers from here.
3672 // pop {SR0, SR1}
3673 if (Thumb) {
3674 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP))
3675 .add(MOs: predOps(Pred: ARMCC::AL))
3676 .addReg(RegNo: ScratchReg0)
3677 .addReg(RegNo: ScratchReg1);
3678 } else {
3679 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::LDMIA_UPD))
3680 .addReg(RegNo: ARM::SP, Flags: RegState::Define)
3681 .addReg(RegNo: ARM::SP)
3682 .add(MOs: predOps(Pred: ARMCC::AL))
3683 .addReg(RegNo: ScratchReg0)
3684 .addReg(RegNo: ScratchReg1);
3685 }
3686
3687 // Update the CFA offset now that we've popped
3688 if (!MF.getTarget().getMCAsmInfo().usesWindowsCFI())
3689 CFIInstBuilder(AllocMBB, MachineInstr::NoFlags).buildDefCFAOffset(Offset: 0);
3690
3691 // Return from this function.
3692 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ST->getReturnOpcode())).add(MOs: predOps(Pred: ARMCC::AL));
3693
3694 // Restore SR0 and SR1 in case of __morestack() was not called.
3695 // pop {SR0, SR1}
3696 if (Thumb) {
3697 BuildMI(BB: PostStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP))
3698 .add(MOs: predOps(Pred: ARMCC::AL))
3699 .addReg(RegNo: ScratchReg0)
3700 .addReg(RegNo: ScratchReg1);
3701 } else {
3702 BuildMI(BB: PostStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::LDMIA_UPD))
3703 .addReg(RegNo: ARM::SP, Flags: RegState::Define)
3704 .addReg(RegNo: ARM::SP)
3705 .add(MOs: predOps(Pred: ARMCC::AL))
3706 .addReg(RegNo: ScratchReg0)
3707 .addReg(RegNo: ScratchReg1);
3708 }
3709
3710 // Update the CFA offset now that we've popped
3711 if (!MF.getTarget().getMCAsmInfo().usesWindowsCFI()) {
3712 CFIInstBuilder CFIBuilder(PostStackMBB, MachineInstr::NoFlags);
3713 CFIBuilder.buildDefCFAOffset(Offset: 0);
3714
3715 // Tell debuggers that r4 and r5 are now the same as they were in the
3716 // previous function, that they're the "Same Value".
3717 CFIBuilder.buildSameValue(Reg: ScratchReg0);
3718 CFIBuilder.buildSameValue(Reg: ScratchReg1);
3719 }
3720
3721 // Organizing MBB lists
3722 PostStackMBB->addSuccessor(Succ: &PrologueMBB);
3723
3724 AllocMBB->addSuccessor(Succ: PostStackMBB);
3725
3726 GetMBB->addSuccessor(Succ: PostStackMBB);
3727 GetMBB->addSuccessor(Succ: AllocMBB);
3728
3729 McrMBB->addSuccessor(Succ: GetMBB);
3730
3731 PrevStackMBB->addSuccessor(Succ: McrMBB);
3732
3733#ifdef EXPENSIVE_CHECKS
3734 MF.verify();
3735#endif
3736}
3737