1//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the ARM implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12//
13// This file contains the ARM implementation of TargetFrameLowering class.
14//
15// On ARM, stack frames are structured as follows:
16//
17// The stack grows downward.
18//
19// All of the individual frame areas on the frame below are optional, i.e. it's
20// possible to create a function so that the particular area isn't present
21// in the frame.
22//
23// At function entry, the "frame" looks as follows:
24//
25// | | Higher address
26// |-----------------------------------|
27// | |
28// | arguments passed on the stack |
29// | |
30// |-----------------------------------| <- sp
31// | | Lower address
32//
33//
34// After the prologue has run, the frame has the following general structure.
35// Technically the last frame area (VLAs) doesn't get created until in the
36// main function body, after the prologue is run. However, it's depicted here
37// for completeness.
38//
39// | | Higher address
40// |-----------------------------------|
41// | |
42// | arguments passed on the stack |
43// | |
44// |-----------------------------------| <- (sp at function entry)
45// | |
46// | varargs from registers |
47// | |
48// |-----------------------------------|
49// | |
50// | prev_lr |
51// | prev_fp |
52// | (a.k.a. "frame record") |
53// | |
54// |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
55// | |
56// | callee-saved gpr registers |
57// | |
58// |-----------------------------------|
59// | |
60// | callee-saved fp/simd regs |
61// | |
62// |-----------------------------------|
63// |.empty.space.to.make.part.below....|
64// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
65// |.the.standard.8-byte.alignment.....| compile time; if present)
66// |-----------------------------------|
67// | |
68// | local variables of fixed size |
69// | including spill slots |
70// |-----------------------------------| <- base pointer (not defined by ABI,
71// |.variable-sized.local.variables....| LLVM chooses r6)
72// |.(VLAs)............................| (size of this area is unknown at
73// |...................................| compile time)
74// |-----------------------------------| <- sp
75// | | Lower address
76//
77//
78// To access the data in a frame, at-compile time, a constant offset must be
79// computable from one of the pointers (fp, bp, sp) to access it. The size
80// of the areas with a dotted background cannot be computed at compile-time
81// if they are present, making it required to have all three of fp, bp and
82// sp to be set up to be able to access all contents in the frame areas,
83// assuming all of the frame areas are non-empty.
84//
85// For most functions, some of the frame areas are empty. For those functions,
86// it may not be necessary to set up fp or bp:
87// * A base pointer is definitely needed when there are both VLAs and local
88// variables with more-than-default alignment requirements.
89// * A frame pointer is definitely needed when there are local variables with
90// more-than-default alignment requirements.
91//
92// In some cases when a base pointer is not strictly needed, it is generated
93// anyway when offsets from the frame pointer to access local variables become
94// so large that the offset can't be encoded in the immediate fields of loads
95// or stores.
96//
97// The frame pointer might be chosen to be r7 or r11, depending on the target
98// architecture and operating system. See ARMSubtarget::getFramePointerReg for
99// details.
100//
101// Outgoing function arguments must be at the bottom of the stack frame when
102// calling another function. If we do not have variable-sized stack objects, we
103// can allocate a "reserved call frame" area at the bottom of the local
104// variable area, large enough for all outgoing calls. If we do have VLAs, then
105// the stack pointer must be decremented and incremented around each call to
106// make space for the arguments below the VLAs.
107//
108//===----------------------------------------------------------------------===//
109
110#include "ARMFrameLowering.h"
111#include "ARMBaseInstrInfo.h"
112#include "ARMBaseRegisterInfo.h"
113#include "ARMConstantPoolValue.h"
114#include "ARMMachineFunctionInfo.h"
115#include "ARMSubtarget.h"
116#include "MCTargetDesc/ARMAddressingModes.h"
117#include "MCTargetDesc/ARMBaseInfo.h"
118#include "Utils/ARMBaseInfo.h"
119#include "llvm/ADT/BitVector.h"
120#include "llvm/ADT/STLExtras.h"
121#include "llvm/ADT/SmallPtrSet.h"
122#include "llvm/ADT/SmallVector.h"
123#include "llvm/CodeGen/CFIInstBuilder.h"
124#include "llvm/CodeGen/MachineBasicBlock.h"
125#include "llvm/CodeGen/MachineConstantPool.h"
126#include "llvm/CodeGen/MachineFrameInfo.h"
127#include "llvm/CodeGen/MachineFunction.h"
128#include "llvm/CodeGen/MachineInstr.h"
129#include "llvm/CodeGen/MachineInstrBuilder.h"
130#include "llvm/CodeGen/MachineJumpTableInfo.h"
131#include "llvm/CodeGen/MachineModuleInfo.h"
132#include "llvm/CodeGen/MachineOperand.h"
133#include "llvm/CodeGen/MachineRegisterInfo.h"
134#include "llvm/CodeGen/RegisterScavenging.h"
135#include "llvm/CodeGen/TargetInstrInfo.h"
136#include "llvm/CodeGen/TargetRegisterInfo.h"
137#include "llvm/CodeGen/TargetSubtargetInfo.h"
138#include "llvm/IR/Attributes.h"
139#include "llvm/IR/CallingConv.h"
140#include "llvm/IR/DebugLoc.h"
141#include "llvm/IR/Function.h"
142#include "llvm/MC/MCAsmInfo.h"
143#include "llvm/MC/MCInstrDesc.h"
144#include "llvm/Support/CodeGen.h"
145#include "llvm/Support/CommandLine.h"
146#include "llvm/Support/Compiler.h"
147#include "llvm/Support/Debug.h"
148#include "llvm/Support/ErrorHandling.h"
149#include "llvm/Support/raw_ostream.h"
150#include "llvm/Target/TargetMachine.h"
151#include "llvm/Target/TargetOptions.h"
152#include <algorithm>
153#include <cassert>
154#include <cstddef>
155#include <cstdint>
156#include <iterator>
157#include <utility>
158#include <vector>
159
160#define DEBUG_TYPE "arm-frame-lowering"
161
162using namespace llvm;
163
164static cl::opt<bool>
165SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(Val: true),
166 cl::desc("Align ARM NEON spills in prolog and epilog"));
167
168static MachineBasicBlock::iterator
169skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
170 unsigned NumAlignedDPRCS2Regs);
171
172enum class SpillArea {
173 GPRCS1,
174 GPRCS2,
175 FPStatus,
176 DPRCS1,
177 DPRCS2,
178 GPRCS3,
179 FPCXT,
180};
181
182/// Get the spill area that Reg should be saved into in the prologue.
183SpillArea getSpillArea(Register Reg,
184 ARMSubtarget::PushPopSplitVariation Variation,
185 unsigned NumAlignedDPRCS2Regs,
186 const ARMBaseRegisterInfo *RegInfo) {
187 // NoSplit:
188 // push {r0-r12, lr} GPRCS1
189 // vpush {r8-d15} DPRCS1
190 //
191 // SplitR7:
192 // push {r0-r7, lr} GPRCS1
193 // push {r8-r12} GPRCS2
194 // vpush {r8-d15} DPRCS1
195 //
196 // SplitR11WindowsSEH:
197 // push {r0-r10, r12} GPRCS1
198 // vpush {r8-d15} DPRCS1
199 // push {r11, lr} GPRCS3
200 //
201 // SplitR11AAPCSSignRA:
202 // push {r0-r10, r12} GPRSC1
203 // push {r11, lr} GPRCS2
204 // vpush {r8-d15} DPRCS1
205
206 // If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
207 // the top of the stack frame.
208 // The DPRCS2 region is used for ABIs which only guarantee 4-byte alignment
209 // of SP. If used, it will be below the other save areas, after the stack has
210 // been re-aligned.
211
212 switch (Reg) {
213 default:
214 dbgs() << "Don't know where to spill " << printReg(Reg, TRI: RegInfo) << "\n";
215 llvm_unreachable("Don't know where to spill this register");
216 break;
217
218 case ARM::FPCXTNS:
219 return SpillArea::FPCXT;
220
221 case ARM::FPSCR:
222 case ARM::FPEXC:
223 return SpillArea::FPStatus;
224
225 case ARM::R0:
226 case ARM::R1:
227 case ARM::R2:
228 case ARM::R3:
229 case ARM::R4:
230 case ARM::R5:
231 case ARM::R6:
232 case ARM::R7:
233 return SpillArea::GPRCS1;
234
235 case ARM::R8:
236 case ARM::R9:
237 case ARM::R10:
238 if (Variation == ARMSubtarget::SplitR7)
239 return SpillArea::GPRCS2;
240 else
241 return SpillArea::GPRCS1;
242
243 case ARM::R11:
244 if (Variation == ARMSubtarget::SplitR7 ||
245 Variation == ARMSubtarget::SplitR11AAPCSSignRA)
246 return SpillArea::GPRCS2;
247 if (Variation == ARMSubtarget::SplitR11WindowsSEH)
248 return SpillArea::GPRCS3;
249
250 return SpillArea::GPRCS1;
251
252 case ARM::R12:
253 if (Variation == ARMSubtarget::SplitR7)
254 return SpillArea::GPRCS2;
255 else
256 return SpillArea::GPRCS1;
257
258 case ARM::LR:
259 if (Variation == ARMSubtarget::SplitR11AAPCSSignRA)
260 return SpillArea::GPRCS2;
261 if (Variation == ARMSubtarget::SplitR11WindowsSEH)
262 return SpillArea::GPRCS3;
263
264 return SpillArea::GPRCS1;
265
266 case ARM::D0:
267 case ARM::D1:
268 case ARM::D2:
269 case ARM::D3:
270 case ARM::D4:
271 case ARM::D5:
272 case ARM::D6:
273 case ARM::D7:
274 return SpillArea::DPRCS1;
275
276 case ARM::D8:
277 case ARM::D9:
278 case ARM::D10:
279 case ARM::D11:
280 case ARM::D12:
281 case ARM::D13:
282 case ARM::D14:
283 case ARM::D15:
284 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
285 return SpillArea::DPRCS2;
286 else
287 return SpillArea::DPRCS1;
288
289 case ARM::D16:
290 case ARM::D17:
291 case ARM::D18:
292 case ARM::D19:
293 case ARM::D20:
294 case ARM::D21:
295 case ARM::D22:
296 case ARM::D23:
297 case ARM::D24:
298 case ARM::D25:
299 case ARM::D26:
300 case ARM::D27:
301 case ARM::D28:
302 case ARM::D29:
303 case ARM::D30:
304 case ARM::D31:
305 return SpillArea::DPRCS1;
306 }
307}
308
309ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
310 : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
311 STI(sti) {}
312
313bool ARMFrameLowering::keepFramePointer(const MachineFunction &MF) const {
314 // iOS always has a FP for backtracking, force other targets to keep their FP
315 // when doing FastISel. The emitted code is currently superior, and in cases
316 // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
317 return MF.getSubtarget<ARMSubtarget>().useFastISel();
318}
319
320/// Returns true if the target can safely skip saving callee-saved registers
321/// for noreturn nounwind functions.
322bool ARMFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const {
323 assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
324 MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
325 !MF.getFunction().hasFnAttribute(Attribute::UWTable));
326
327 // Frame pointer and link register are not treated as normal CSR, thus we
328 // can always skip CSR saves for nonreturning functions.
329 return true;
330}
331
332/// hasFPImpl - Return true if the specified function should have a dedicated
333/// frame pointer register. This is true if the function has variable sized
334/// allocas or if frame pointer elimination is disabled.
335bool ARMFrameLowering::hasFPImpl(const MachineFunction &MF) const {
336 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
337 const MachineFrameInfo &MFI = MF.getFrameInfo();
338
339 // Check to see if the target want to forcibly keep frame pointer.
340 if (keepFramePointer(MF))
341 return true;
342
343 // ABI-required frame pointer.
344 if (MF.getTarget().Options.DisableFramePointerElim(MF))
345 return true;
346
347 // Frame pointer required for use within this function.
348 return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
349 MFI.isFrameAddressTaken());
350}
351
352/// isFPReserved - Return true if the frame pointer register should be
353/// considered a reserved register on the scope of the specified function.
354bool ARMFrameLowering::isFPReserved(const MachineFunction &MF) const {
355 return hasFP(MF) || MF.getTarget().Options.FramePointerIsReserved(MF);
356}
357
358/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
359/// not required, we reserve argument space for call sites in the function
360/// immediately on entry to the current function. This eliminates the need for
361/// add/sub sp brackets around call sites. Returns true if the call frame is
362/// included as part of the stack frame.
363bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
364 const MachineFrameInfo &MFI = MF.getFrameInfo();
365 unsigned CFSize = MFI.getMaxCallFrameSize();
366 // It's not always a good idea to include the call frame as part of the
367 // stack frame. ARM (especially Thumb) has small immediate offset to
368 // address the stack frame. So a large call frame can cause poor codegen
369 // and may even makes it impossible to scavenge a register.
370 if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
371 return false;
372
373 return !MFI.hasVarSizedObjects();
374}
375
376/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
377/// call frame pseudos can be simplified. Unlike most targets, having a FP
378/// is not sufficient here since we still may reference some objects via SP
379/// even when FP is available in Thumb2 mode.
380bool
381ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
382 return hasReservedCallFrame(MF) || MF.getFrameInfo().hasVarSizedObjects();
383}
384
385// Returns how much of the incoming argument stack area we should clean up in an
386// epilogue. For the C calling convention this will be 0, for guaranteed tail
387// call conventions it can be positive (a normal return or a tail call to a
388// function that uses less stack space for arguments) or negative (for a tail
389// call to a function that needs more stack space than us for arguments).
390static int getArgumentStackToRestore(MachineFunction &MF,
391 MachineBasicBlock &MBB) {
392 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
393 bool IsTailCallReturn = false;
394 if (MBB.end() != MBBI) {
395 unsigned RetOpcode = MBBI->getOpcode();
396 IsTailCallReturn = RetOpcode == ARM::TCRETURNdi ||
397 RetOpcode == ARM::TCRETURNri ||
398 RetOpcode == ARM::TCRETURNrinotr12;
399 }
400 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
401
402 int ArgumentPopSize = 0;
403 if (IsTailCallReturn) {
404 MachineOperand &StackAdjust = MBBI->getOperand(i: 1);
405
406 // For a tail-call in a callee-pops-arguments environment, some or all of
407 // the stack may actually be in use for the call's arguments, this is
408 // calculated during LowerCall and consumed here...
409 ArgumentPopSize = StackAdjust.getImm();
410 } else {
411 // ... otherwise the amount to pop is *all* of the argument space,
412 // conveniently stored in the MachineFunctionInfo by
413 // LowerFormalArguments. This will, of course, be zero for the C calling
414 // convention.
415 ArgumentPopSize = AFI->getArgumentStackToRestore();
416 }
417
418 return ArgumentPopSize;
419}
420
421static bool needsWinCFI(const MachineFunction &MF) {
422 const Function &F = MF.getFunction();
423 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
424 F.needsUnwindTableEntry();
425}
426
427// Given a load or a store instruction, generate an appropriate unwinding SEH
428// code on Windows.
429static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
430 const TargetInstrInfo &TII,
431 unsigned Flags) {
432 unsigned Opc = MBBI->getOpcode();
433 MachineBasicBlock *MBB = MBBI->getParent();
434 MachineFunction &MF = *MBB->getParent();
435 DebugLoc DL = MBBI->getDebugLoc();
436 MachineInstrBuilder MIB;
437 const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
438 const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
439
440 Flags |= MachineInstr::NoMerge;
441
442 switch (Opc) {
443 default:
444 report_fatal_error(reason: "No SEH Opcode for instruction " + TII.getName(Opcode: Opc));
445 break;
446 case ARM::t2ADDri: // add.w r11, sp, #xx
447 case ARM::t2ADDri12: // add.w r11, sp, #xx
448 case ARM::t2MOVTi16: // movt r4, #xx
449 case ARM::tBL: // bl __chkstk
450 // These are harmless if used for just setting up a frame pointer,
451 // but that frame pointer can't be relied upon for unwinding, unless
452 // set up with SEH_SaveSP.
453 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
454 .addImm(/*Wide=*/Val: 1)
455 .setMIFlags(Flags);
456 break;
457
458 case ARM::t2MOVi16: { // mov(w) r4, #xx
459 bool Wide = MBBI->getOperand(i: 1).getImm() >= 256;
460 if (!Wide) {
461 MachineInstrBuilder NewInstr =
462 BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVi8)).setMIFlags(MBBI->getFlags());
463 NewInstr.add(MO: MBBI->getOperand(i: 0));
464 NewInstr.add(MO: t1CondCodeOp(/*isDead=*/true));
465 for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MBBI->operands()))
466 NewInstr.add(MO);
467 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(I: MBBI, MI: NewInstr);
468 MBB->erase(I: MBBI);
469 MBBI = NewMBBI;
470 }
471 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop)).addImm(Val: Wide).setMIFlags(Flags);
472 break;
473 }
474
475 case ARM::tBLXr: // blx r12 (__chkstk)
476 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
477 .addImm(/*Wide=*/Val: 0)
478 .setMIFlags(Flags);
479 break;
480
481 case ARM::t2MOVi32imm: // movw+movt
482 // This pseudo instruction expands into two mov instructions. If the
483 // second operand is a symbol reference, this will stay as two wide
484 // instructions, movw+movt. If they're immediates, the first one can
485 // end up as a narrow mov though.
486 // As two SEH instructions are appended here, they won't get interleaved
487 // between the two final movw/movt instructions, but it doesn't make any
488 // practical difference.
489 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
490 .addImm(/*Wide=*/Val: 1)
491 .setMIFlags(Flags);
492 MBB->insertAfter(I: MBBI, MI: MIB);
493 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
494 .addImm(/*Wide=*/Val: 1)
495 .setMIFlags(Flags);
496 break;
497
498 case ARM::t2STR_PRE:
499 if (MBBI->getOperand(i: 0).getReg() == ARM::SP &&
500 MBBI->getOperand(i: 2).getReg() == ARM::SP &&
501 MBBI->getOperand(i: 3).getImm() == -4) {
502 unsigned Reg = RegInfo->getSEHRegNum(i: MBBI->getOperand(i: 1).getReg());
503 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveRegs))
504 .addImm(Val: 1ULL << Reg)
505 .addImm(/*Wide=*/Val: 1)
506 .setMIFlags(Flags);
507 } else {
508 report_fatal_error(reason: "No matching SEH Opcode for t2STR_PRE");
509 }
510 break;
511
512 case ARM::t2LDR_POST:
513 if (MBBI->getOperand(i: 1).getReg() == ARM::SP &&
514 MBBI->getOperand(i: 2).getReg() == ARM::SP &&
515 MBBI->getOperand(i: 3).getImm() == 4) {
516 unsigned Reg = RegInfo->getSEHRegNum(i: MBBI->getOperand(i: 0).getReg());
517 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveRegs))
518 .addImm(Val: 1ULL << Reg)
519 .addImm(/*Wide=*/Val: 1)
520 .setMIFlags(Flags);
521 } else {
522 report_fatal_error(reason: "No matching SEH Opcode for t2LDR_POST");
523 }
524 break;
525
526 case ARM::t2LDMIA_RET:
527 case ARM::t2LDMIA_UPD:
528 case ARM::t2STMDB_UPD: {
529 unsigned Mask = 0;
530 bool Wide = false;
531 for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
532 const MachineOperand &MO = MBBI->getOperand(i);
533 if (!MO.isReg() || MO.isImplicit())
534 continue;
535 unsigned Reg = RegInfo->getSEHRegNum(i: MO.getReg());
536 if (Reg == 15)
537 Reg = 14;
538 if (Reg >= 8 && Reg <= 13)
539 Wide = true;
540 else if (Opc == ARM::t2LDMIA_UPD && Reg == 14)
541 Wide = true;
542 Mask |= 1 << Reg;
543 }
544 if (!Wide) {
545 unsigned NewOpc;
546 switch (Opc) {
547 case ARM::t2LDMIA_RET:
548 NewOpc = ARM::tPOP_RET;
549 break;
550 case ARM::t2LDMIA_UPD:
551 NewOpc = ARM::tPOP;
552 break;
553 case ARM::t2STMDB_UPD:
554 NewOpc = ARM::tPUSH;
555 break;
556 default:
557 llvm_unreachable("");
558 }
559 MachineInstrBuilder NewInstr =
560 BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: NewOpc)).setMIFlags(MBBI->getFlags());
561 for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
562 NewInstr.add(MO: MBBI->getOperand(i));
563 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(I: MBBI, MI: NewInstr);
564 MBB->erase(I: MBBI);
565 MBBI = NewMBBI;
566 }
567 unsigned SEHOpc =
568 (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
569 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: SEHOpc))
570 .addImm(Val: Mask)
571 .addImm(Val: Wide ? 1 : 0)
572 .setMIFlags(Flags);
573 break;
574 }
575 case ARM::VSTMDDB_UPD:
576 case ARM::VLDMDIA_UPD: {
577 int First = -1, Last = 0;
578 for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MBBI->operands(), N: 4)) {
579 unsigned Reg = RegInfo->getSEHRegNum(i: MO.getReg());
580 if (First == -1)
581 First = Reg;
582 Last = Reg;
583 }
584 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveFRegs))
585 .addImm(Val: First)
586 .addImm(Val: Last)
587 .setMIFlags(Flags);
588 break;
589 }
590 case ARM::tSUBspi:
591 case ARM::tADDspi:
592 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_StackAlloc))
593 .addImm(Val: MBBI->getOperand(i: 2).getImm() * 4)
594 .addImm(/*Wide=*/Val: 0)
595 .setMIFlags(Flags);
596 break;
597 case ARM::t2SUBspImm:
598 case ARM::t2SUBspImm12:
599 case ARM::t2ADDspImm:
600 case ARM::t2ADDspImm12:
601 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_StackAlloc))
602 .addImm(Val: MBBI->getOperand(i: 2).getImm())
603 .addImm(/*Wide=*/Val: 1)
604 .setMIFlags(Flags);
605 break;
606
607 case ARM::tMOVr:
608 if (MBBI->getOperand(i: 1).getReg() == ARM::SP &&
609 (Flags & MachineInstr::FrameSetup)) {
610 unsigned Reg = RegInfo->getSEHRegNum(i: MBBI->getOperand(i: 0).getReg());
611 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveSP))
612 .addImm(Val: Reg)
613 .setMIFlags(Flags);
614 } else if (MBBI->getOperand(i: 0).getReg() == ARM::SP &&
615 (Flags & MachineInstr::FrameDestroy)) {
616 unsigned Reg = RegInfo->getSEHRegNum(i: MBBI->getOperand(i: 1).getReg());
617 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveSP))
618 .addImm(Val: Reg)
619 .setMIFlags(Flags);
620 } else {
621 report_fatal_error(reason: "No SEH Opcode for MOV");
622 }
623 break;
624
625 case ARM::tBX_RET:
626 case ARM::t2BXAUT_RET:
627 case ARM::TCRETURNri:
628 case ARM::TCRETURNrinotr12:
629 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop_Ret))
630 .addImm(/*Wide=*/Val: 0)
631 .setMIFlags(Flags);
632 break;
633
634 case ARM::TCRETURNdi:
635 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop_Ret))
636 .addImm(/*Wide=*/Val: 1)
637 .setMIFlags(Flags);
638 break;
639 }
640 return MBB->insertAfter(I: MBBI, MI: MIB);
641}
642
643static MachineBasicBlock::iterator
644initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI) {
645 if (MBBI == MBB.begin())
646 return MachineBasicBlock::iterator();
647 return std::prev(x: MBBI);
648}
649
650static void insertSEHRange(MachineBasicBlock &MBB,
651 MachineBasicBlock::iterator Start,
652 const MachineBasicBlock::iterator &End,
653 const ARMBaseInstrInfo &TII, unsigned MIFlags) {
654 if (Start.isValid())
655 Start = std::next(x: Start);
656 else
657 Start = MBB.begin();
658
659 for (auto MI = Start; MI != End;) {
660 auto Next = std::next(x: MI);
661 // Check if this instruction already has got a SEH opcode added. In that
662 // case, don't do this generic mapping.
663 if (Next != End && isSEHInstruction(MI: *Next)) {
664 MI = std::next(x: Next);
665 while (MI != End && isSEHInstruction(MI: *MI))
666 ++MI;
667 continue;
668 }
669 insertSEH(MBBI: MI, TII, Flags: MIFlags);
670 MI = Next;
671 }
672}
673
674static void emitRegPlusImmediate(
675 bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
676 const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
677 unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
678 ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
679 if (isARM)
680 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, BaseReg: SrcReg, NumBytes,
681 Pred, PredReg, TII, MIFlags);
682 else
683 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, BaseReg: SrcReg, NumBytes,
684 Pred, PredReg, TII, MIFlags);
685}
686
687static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
688 MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
689 const ARMBaseInstrInfo &TII, int NumBytes,
690 unsigned MIFlags = MachineInstr::NoFlags,
691 ARMCC::CondCodes Pred = ARMCC::AL,
692 unsigned PredReg = 0) {
693 emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, DestReg: ARM::SP, SrcReg: ARM::SP, NumBytes,
694 MIFlags, Pred, PredReg);
695}
696
697static int sizeOfSPAdjustment(const MachineInstr &MI) {
698 int RegSize;
699 switch (MI.getOpcode()) {
700 case ARM::VSTMDDB_UPD:
701 RegSize = 8;
702 break;
703 case ARM::STMDB_UPD:
704 case ARM::t2STMDB_UPD:
705 RegSize = 4;
706 break;
707 case ARM::t2STR_PRE:
708 case ARM::STR_PRE_IMM:
709 return 4;
710 default:
711 llvm_unreachable("Unknown push or pop like instruction");
712 }
713
714 int count = 0;
715 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
716 // pred) so the list starts at 4.
717 for (int i = MI.getNumOperands() - 1; i >= 4; --i)
718 count += RegSize;
719 return count;
720}
721
722static bool WindowsRequiresStackProbe(const MachineFunction &MF,
723 size_t StackSizeInBytes) {
724 const MachineFrameInfo &MFI = MF.getFrameInfo();
725 const Function &F = MF.getFunction();
726 unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
727
728 StackProbeSize =
729 F.getFnAttributeAsParsedInteger(Kind: "stack-probe-size", Default: StackProbeSize);
730 return (StackSizeInBytes >= StackProbeSize) &&
731 !F.hasFnAttribute(Kind: "no-stack-arg-probe");
732}
733
734namespace {
735
736struct StackAdjustingInsts {
737 struct InstInfo {
738 MachineBasicBlock::iterator I;
739 unsigned SPAdjust;
740 bool BeforeFPSet;
741
742#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
743 void dump() {
744 dbgs() << " " << (BeforeFPSet ? "before-fp " : " ")
745 << "sp-adjust=" << SPAdjust;
746 I->dump();
747 }
748#endif
749 };
750
751 SmallVector<InstInfo, 4> Insts;
752
753 void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
754 bool BeforeFPSet = false) {
755 InstInfo Info = {.I: I, .SPAdjust: SPAdjust, .BeforeFPSet: BeforeFPSet};
756 Insts.push_back(Elt: Info);
757 }
758
759 void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
760 auto Info =
761 llvm::find_if(Range&: Insts, P: [&](InstInfo &Info) { return Info.I == I; });
762 assert(Info != Insts.end() && "invalid sp adjusting instruction");
763 Info->SPAdjust += ExtraBytes;
764 }
765
766 void emitDefCFAOffsets(MachineBasicBlock &MBB, bool HasFP) {
767 CFIInstBuilder CFIBuilder(MBB, MBB.end(), MachineInstr::FrameSetup);
768 unsigned CFAOffset = 0;
769 for (auto &Info : Insts) {
770 if (HasFP && !Info.BeforeFPSet)
771 return;
772
773 CFAOffset += Info.SPAdjust;
774 CFIBuilder.setInsertPoint(std::next(x: Info.I));
775 CFIBuilder.buildDefCFAOffset(Offset: CFAOffset);
776 }
777 }
778
779#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
780 void dump() {
781 dbgs() << "StackAdjustingInsts:\n";
782 for (auto &Info : Insts)
783 Info.dump();
784 }
785#endif
786};
787
788} // end anonymous namespace
789
790/// Emit an instruction sequence that will align the address in
791/// register Reg by zero-ing out the lower bits. For versions of the
792/// architecture that support Neon, this must be done in a single
793/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
794/// single instruction. That function only gets called when optimizing
795/// spilling of D registers on a core with the Neon instruction set
796/// present.
797static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
798 const TargetInstrInfo &TII,
799 MachineBasicBlock &MBB,
800 MachineBasicBlock::iterator MBBI,
801 const DebugLoc &DL, const unsigned Reg,
802 const Align Alignment,
803 const bool MustBeSingleInstruction) {
804 const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
805 const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
806 const unsigned AlignMask = Alignment.value() - 1U;
807 const unsigned NrBitsToZero = Log2(A: Alignment);
808 assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
809 if (!AFI->isThumbFunction()) {
810 // if the BFC instruction is available, use that to zero the lower
811 // bits:
812 // bfc Reg, #0, log2(Alignment)
813 // otherwise use BIC, if the mask to zero the required number of bits
814 // can be encoded in the bic immediate field
815 // bic Reg, Reg, Alignment-1
816 // otherwise, emit
817 // lsr Reg, Reg, log2(Alignment)
818 // lsl Reg, Reg, log2(Alignment)
819 if (CanUseBFC) {
820 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::BFC), DestReg: Reg)
821 .addReg(RegNo: Reg, Flags: RegState::Kill)
822 .addImm(Val: ~AlignMask)
823 .add(MOs: predOps(Pred: ARMCC::AL));
824 } else if (AlignMask <= 255) {
825 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::BICri), DestReg: Reg)
826 .addReg(RegNo: Reg, Flags: RegState::Kill)
827 .addImm(Val: AlignMask)
828 .add(MOs: predOps(Pred: ARMCC::AL))
829 .add(MO: condCodeOp());
830 } else {
831 assert(!MustBeSingleInstruction &&
832 "Shouldn't call emitAligningInstructions demanding a single "
833 "instruction to be emitted for large stack alignment for a target "
834 "without BFC.");
835 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVsi), DestReg: Reg)
836 .addReg(RegNo: Reg, Flags: RegState::Kill)
837 .addImm(Val: ARM_AM::getSORegOpc(ShOp: ARM_AM::lsr, Imm: NrBitsToZero))
838 .add(MOs: predOps(Pred: ARMCC::AL))
839 .add(MO: condCodeOp());
840 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVsi), DestReg: Reg)
841 .addReg(RegNo: Reg, Flags: RegState::Kill)
842 .addImm(Val: ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: NrBitsToZero))
843 .add(MOs: predOps(Pred: ARMCC::AL))
844 .add(MO: condCodeOp());
845 }
846 } else {
847 // Since this is only reached for Thumb-2 targets, the BFC instruction
848 // should always be available.
849 assert(CanUseBFC);
850 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::t2BFC), DestReg: Reg)
851 .addReg(RegNo: Reg, Flags: RegState::Kill)
852 .addImm(Val: ~AlignMask)
853 .add(MOs: predOps(Pred: ARMCC::AL));
854 }
855}
856
857/// We need the offset of the frame pointer relative to other MachineFrameInfo
858/// offsets which are encoded relative to SP at function begin.
859/// See also emitPrologue() for how the FP is set up.
860/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
861/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
862/// this to produce a conservative estimate that we check in an assert() later.
863static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
864 const MachineFunction &MF) {
865 ARMSubtarget::PushPopSplitVariation PushPopSplit =
866 STI.getPushPopSplitVariation(MF);
867 // For Thumb1, push.w isn't available, so the first push will always push
868 // r7 and lr onto the stack first.
869 if (AFI.isThumb1OnlyFunction())
870 return -AFI.getArgRegsSaveSize() - (2 * 4);
871 // This is a conservative estimation: Assume the frame pointer being r7 and
872 // pc("r15") up to r8 getting spilled before (= 8 registers).
873 int MaxRegBytes = 8 * 4;
874 if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
875 // Here, r11 can be stored below all of r4-r15.
876 MaxRegBytes = 11 * 4;
877 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
878 // Here, r11 can be stored below all of r4-r15 plus d8-d15.
879 MaxRegBytes = 11 * 4 + 8 * 8;
880 }
881 int FPCXTSaveSize =
882 (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
883 return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
884}
885
886void ARMFrameLowering::emitPrologue(MachineFunction &MF,
887 MachineBasicBlock &MBB) const {
888 MachineBasicBlock::iterator MBBI = MBB.begin();
889 MachineFrameInfo &MFI = MF.getFrameInfo();
890 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
891 const TargetMachine &TM = MF.getTarget();
892 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
893 const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
894 assert(!AFI->isThumb1OnlyFunction() &&
895 "This emitPrologue does not support Thumb1!");
896 bool isARM = !AFI->isThumbFunction();
897 Align Alignment = STI.getFrameLowering()->getStackAlign();
898 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
899 unsigned NumBytes = MFI.getStackSize();
900 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
901 int FPCXTSaveSize = 0;
902 bool NeedsWinCFI = needsWinCFI(MF);
903 ARMSubtarget::PushPopSplitVariation PushPopSplit =
904 STI.getPushPopSplitVariation(MF);
905
906 LLVM_DEBUG(dbgs() << "Emitting prologue for " << MF.getName() << "\n");
907
908 // Debug location must be unknown since the first debug location is used
909 // to determine the end of the prologue.
910 DebugLoc dl;
911
912 Register FramePtr = RegInfo->getFrameRegister(MF);
913
914 // Determine the sizes of each callee-save spill areas and record which frame
915 // belongs to which callee-save spill areas.
916 unsigned GPRCS1Size = 0, GPRCS2Size = 0, FPStatusSize = 0,
917 DPRCS1Size = 0, GPRCS3Size = 0, DPRCS2Size = 0;
918 int FramePtrSpillFI = 0;
919 int D8SpillFI = 0;
920
921 // All calls are tail calls in GHC calling conv, and functions have no
922 // prologue/epilogue.
923 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
924 return;
925
926 StackAdjustingInsts DefCFAOffsetCandidates;
927 bool HasFP = hasFP(MF);
928
929 if (!AFI->hasStackFrame() &&
930 (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, StackSizeInBytes: NumBytes))) {
931 if (NumBytes != 0) {
932 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -NumBytes,
933 MIFlags: MachineInstr::FrameSetup);
934 DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: NumBytes, BeforeFPSet: true);
935 }
936 if (!NeedsWinCFI)
937 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, HasFP);
938 if (NeedsWinCFI && MBBI != MBB.begin()) {
939 insertSEHRange(MBB, Start: {}, End: MBBI, TII, MIFlags: MachineInstr::FrameSetup);
940 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_PrologEnd))
941 .setMIFlag(MachineInstr::FrameSetup);
942 MF.setHasWinCFI(true);
943 }
944 return;
945 }
946
947 // Determine spill area sizes, and some important frame indices.
948 SpillArea FramePtrSpillArea = SpillArea::GPRCS1;
949 bool BeforeFPPush = true;
950 for (const CalleeSavedInfo &I : CSI) {
951 MCRegister Reg = I.getReg();
952 int FI = I.getFrameIdx();
953
954 SpillArea Area = getSpillArea(Reg, Variation: PushPopSplit,
955 NumAlignedDPRCS2Regs: AFI->getNumAlignedDPRCS2Regs(), RegInfo);
956
957 if (Reg == FramePtr.asMCReg()) {
958 FramePtrSpillFI = FI;
959 FramePtrSpillArea = Area;
960 }
961 if (Reg == ARM::D8)
962 D8SpillFI = FI;
963
964 switch (Area) {
965 case SpillArea::FPCXT:
966 FPCXTSaveSize += 4;
967 break;
968 case SpillArea::GPRCS1:
969 GPRCS1Size += 4;
970 break;
971 case SpillArea::GPRCS2:
972 GPRCS2Size += 4;
973 break;
974 case SpillArea::FPStatus:
975 FPStatusSize += 4;
976 break;
977 case SpillArea::DPRCS1:
978 DPRCS1Size += 8;
979 break;
980 case SpillArea::GPRCS3:
981 GPRCS3Size += 4;
982 break;
983 case SpillArea::DPRCS2:
984 DPRCS2Size += 8;
985 break;
986 }
987 }
988
989 MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push,
990 DPRCS1Push, GPRCS3Push;
991
992 // Move past the PAC computation.
993 if (AFI->shouldSignReturnAddress())
994 LastPush = MBBI++;
995
996 // Move past FPCXT area.
997 if (FPCXTSaveSize > 0) {
998 LastPush = MBBI++;
999 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: FPCXTSaveSize, BeforeFPSet: BeforeFPPush);
1000 }
1001
1002 // Allocate the vararg register save area.
1003 if (ArgRegsSaveSize) {
1004 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -ArgRegsSaveSize,
1005 MIFlags: MachineInstr::FrameSetup);
1006 LastPush = std::prev(x: MBBI);
1007 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: ArgRegsSaveSize, BeforeFPSet: BeforeFPPush);
1008 }
1009
1010 // Move past area 1.
1011 if (GPRCS1Size > 0) {
1012 GPRCS1Push = LastPush = MBBI++;
1013 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS1Size, BeforeFPSet: BeforeFPPush);
1014 if (FramePtrSpillArea == SpillArea::GPRCS1)
1015 BeforeFPPush = false;
1016 }
1017
1018 // Determine starting offsets of spill areas. These offsets are all positive
1019 // offsets from the bottom of the lowest-addressed callee-save area
1020 // (excluding DPRCS2, which is th the re-aligned stack region) to the bottom
1021 // of the spill area in question.
1022 unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
1023 unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
1024 unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
1025 unsigned FPStatusOffset = GPRCS2Offset - FPStatusSize;
1026
1027 Align DPRAlign = DPRCS1Size ? std::min(a: Align(8), b: Alignment) : Align(4);
1028 unsigned DPRGapSize = (ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1029 GPRCS2Size + FPStatusSize) %
1030 DPRAlign.value();
1031
1032 unsigned DPRCS1Offset = FPStatusOffset - DPRGapSize - DPRCS1Size;
1033
1034 if (HasFP) {
1035 // Offset from the CFA to the saved frame pointer, will be negative.
1036 [[maybe_unused]] int FPOffset = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI);
1037 LLVM_DEBUG(dbgs() << "FramePtrSpillFI: " << FramePtrSpillFI
1038 << ", FPOffset: " << FPOffset << "\n");
1039 assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
1040 "Max FP estimation is wrong");
1041 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
1042 NumBytes);
1043 }
1044 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
1045 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
1046 AFI->setDPRCalleeSavedArea1Offset(DPRCS1Offset);
1047
1048 // Move past area 2.
1049 if (GPRCS2Size > 0) {
1050 assert(PushPopSplit != ARMSubtarget::SplitR11WindowsSEH);
1051 GPRCS2Push = LastPush = MBBI++;
1052 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS2Size, BeforeFPSet: BeforeFPPush);
1053 if (FramePtrSpillArea == SpillArea::GPRCS2)
1054 BeforeFPPush = false;
1055 }
1056
1057 // Move past FP status save area.
1058 if (FPStatusSize > 0) {
1059 while (MBBI != MBB.end()) {
1060 unsigned Opc = MBBI->getOpcode();
1061 if (Opc == ARM::VMRS || Opc == ARM::VMRS_FPEXC)
1062 MBBI++;
1063 else
1064 break;
1065 }
1066 LastPush = MBBI++;
1067 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: FPStatusSize);
1068 }
1069
1070 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
1071 // .cfi_offset operations will reflect that.
1072 if (DPRGapSize) {
1073 assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
1074 if (LastPush != MBB.end() &&
1075 tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*LastPush, NumBytes: DPRGapSize))
1076 DefCFAOffsetCandidates.addExtraBytes(I: LastPush, ExtraBytes: DPRGapSize);
1077 else {
1078 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -DPRGapSize,
1079 MIFlags: MachineInstr::FrameSetup);
1080 DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: DPRGapSize, BeforeFPSet: BeforeFPPush);
1081 }
1082 }
1083
1084 // Move past DPRCS1Size.
1085 if (DPRCS1Size > 0) {
1086 // Since vpush register list cannot have gaps, there may be multiple vpush
1087 // instructions in the prologue.
1088 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
1089 DefCFAOffsetCandidates.addInst(I: MBBI, SPAdjust: sizeOfSPAdjustment(MI: *MBBI),
1090 BeforeFPSet: BeforeFPPush);
1091 DPRCS1Push = LastPush = MBBI++;
1092 }
1093 }
1094
1095 // Move past the aligned DPRCS2 area.
1096 if (DPRCS2Size > 0) {
1097 MBBI = skipAlignedDPRCS2Spills(MI: MBBI, NumAlignedDPRCS2Regs: AFI->getNumAlignedDPRCS2Regs());
1098 // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
1099 // leaves the stack pointer pointing to the DPRCS2 area.
1100 //
1101 // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
1102 NumBytes += MFI.getObjectOffset(ObjectIdx: D8SpillFI);
1103 } else
1104 NumBytes = DPRCS1Offset;
1105
1106 // Move GPRCS3, if using using SplitR11WindowsSEH.
1107 if (GPRCS3Size > 0) {
1108 assert(PushPopSplit == ARMSubtarget::SplitR11WindowsSEH);
1109 GPRCS3Push = LastPush = MBBI++;
1110 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS3Size, BeforeFPSet: BeforeFPPush);
1111 if (FramePtrSpillArea == SpillArea::GPRCS3)
1112 BeforeFPPush = false;
1113 }
1114
1115 bool NeedsWinCFIStackAlloc = NeedsWinCFI;
1116 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH && HasFP)
1117 NeedsWinCFIStackAlloc = false;
1118
1119 if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, StackSizeInBytes: NumBytes)) {
1120 uint32_t NumWords = NumBytes >> 2;
1121
1122 if (NumWords < 65536) {
1123 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVi16), DestReg: ARM::R4)
1124 .addImm(Val: NumWords)
1125 .setMIFlags(MachineInstr::FrameSetup)
1126 .add(MOs: predOps(Pred: ARMCC::AL));
1127 } else {
1128 // Split into two instructions here, instead of using t2MOVi32imm,
1129 // to allow inserting accurate SEH instructions (including accurate
1130 // instruction size for each of them).
1131 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVi16), DestReg: ARM::R4)
1132 .addImm(Val: NumWords & 0xffff)
1133 .setMIFlags(MachineInstr::FrameSetup)
1134 .add(MOs: predOps(Pred: ARMCC::AL));
1135 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVTi16), DestReg: ARM::R4)
1136 .addReg(RegNo: ARM::R4)
1137 .addImm(Val: NumWords >> 16)
1138 .setMIFlags(MachineInstr::FrameSetup)
1139 .add(MOs: predOps(Pred: ARMCC::AL));
1140 }
1141
1142 const ARMTargetLowering *TLI = STI.getTargetLowering();
1143 RTLIB::LibcallImpl ChkStkLibcall = TLI->getLibcallImpl(Call: RTLIB::STACK_PROBE);
1144 if (ChkStkLibcall == RTLIB::Unsupported)
1145 reportFatalUsageError(reason: "no available implementation of __chkstk");
1146 const char *ChkStk = TLI->getLibcallImplName(Call: ChkStkLibcall).data();
1147
1148 switch (TM.getCodeModel()) {
1149 case CodeModel::Tiny:
1150 llvm_unreachable("Tiny code model not available on ARM.");
1151 case CodeModel::Small:
1152 case CodeModel::Medium:
1153 case CodeModel::Kernel:
1154 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tBL))
1155 .add(MOs: predOps(Pred: ARMCC::AL))
1156 .addExternalSymbol(FnName: ChkStk)
1157 .addReg(RegNo: ARM::R4, Flags: RegState::Implicit)
1158 .setMIFlags(MachineInstr::FrameSetup);
1159 break;
1160 case CodeModel::Large:
1161 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVi32imm), DestReg: ARM::R12)
1162 .addExternalSymbol(FnName: ChkStk)
1163 .setMIFlags(MachineInstr::FrameSetup);
1164
1165 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tBLXr))
1166 .add(MOs: predOps(Pred: ARMCC::AL))
1167 .addReg(RegNo: ARM::R12, Flags: RegState::Kill)
1168 .addReg(RegNo: ARM::R4, Flags: RegState::Implicit)
1169 .setMIFlags(MachineInstr::FrameSetup);
1170 break;
1171 }
1172
1173 MachineInstrBuilder Instr, SEH;
1174 Instr = BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2SUBrr), DestReg: ARM::SP)
1175 .addReg(RegNo: ARM::SP, Flags: RegState::Kill)
1176 .addReg(RegNo: ARM::R4, Flags: RegState::Kill)
1177 .setMIFlags(MachineInstr::FrameSetup)
1178 .add(MOs: predOps(Pred: ARMCC::AL))
1179 .add(MO: condCodeOp());
1180 if (NeedsWinCFIStackAlloc) {
1181 SEH = BuildMI(MF, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_StackAlloc))
1182 .addImm(Val: NumBytes)
1183 .addImm(/*Wide=*/Val: 1)
1184 .setMIFlags(MachineInstr::FrameSetup);
1185 MBB.insertAfter(I: Instr, MI: SEH);
1186 }
1187 NumBytes = 0;
1188 }
1189
1190 if (NumBytes) {
1191 // Adjust SP after all the callee-save spills.
1192 if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
1193 tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*LastPush, NumBytes))
1194 DefCFAOffsetCandidates.addExtraBytes(I: LastPush, ExtraBytes: NumBytes);
1195 else {
1196 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -NumBytes,
1197 MIFlags: MachineInstr::FrameSetup);
1198 DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: NumBytes);
1199 }
1200
1201 if (HasFP && isARM)
1202 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1203 // Note it's not safe to do this in Thumb2 mode because it would have
1204 // taken two instructions:
1205 // mov sp, r7
1206 // sub sp, #24
1207 // If an interrupt is taken between the two instructions, then sp is in
1208 // an inconsistent state (pointing to the middle of callee-saved area).
1209 // The interrupt handler can end up clobbering the registers.
1210 AFI->setShouldRestoreSPFromFP(true);
1211 }
1212
1213 // Set FP to point to the stack slot that contains the previous FP.
1214 // For iOS, FP is R7, which has now been stored in spill area 1.
1215 // Otherwise, if this is not iOS, all the callee-saved registers go
1216 // into spill area 1, including the FP in R11. In either case, it
1217 // is in area one and the adjustment needs to take place just after
1218 // that push.
1219 MachineBasicBlock::iterator AfterPush;
1220 if (HasFP) {
1221 MachineBasicBlock::iterator FPPushInst;
1222 // Offset from SP immediately after the push which saved the FP to the FP
1223 // save slot.
1224 int64_t FPOffsetAfterPush;
1225 switch (FramePtrSpillArea) {
1226 case SpillArea::GPRCS1:
1227 FPPushInst = GPRCS1Push;
1228 FPOffsetAfterPush = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
1229 ArgRegsSaveSize + FPCXTSaveSize +
1230 sizeOfSPAdjustment(MI: *FPPushInst);
1231 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS1, offset "
1232 << FPOffsetAfterPush << " after that push\n");
1233 break;
1234 case SpillArea::GPRCS2:
1235 FPPushInst = GPRCS2Push;
1236 FPOffsetAfterPush = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
1237 ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1238 sizeOfSPAdjustment(MI: *FPPushInst);
1239 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS2, offset "
1240 << FPOffsetAfterPush << " after that push\n");
1241 break;
1242 case SpillArea::GPRCS3:
1243 FPPushInst = GPRCS3Push;
1244 FPOffsetAfterPush = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
1245 ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1246 FPStatusSize + GPRCS2Size + DPRCS1Size + DPRGapSize +
1247 sizeOfSPAdjustment(MI: *FPPushInst);
1248 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS3, offset "
1249 << FPOffsetAfterPush << " after that push\n");
1250 break;
1251 default:
1252 llvm_unreachable("frame pointer in unknown spill area");
1253 break;
1254 }
1255 AfterPush = std::next(x: FPPushInst);
1256 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1257 assert(FPOffsetAfterPush == 0);
1258
1259 // Emit the MOV or ADD to set up the frame pointer register.
1260 emitRegPlusImmediate(isARM: !AFI->isThumbFunction(), MBB, MBBI&: AfterPush, dl, TII,
1261 DestReg: FramePtr, SrcReg: ARM::SP, NumBytes: FPOffsetAfterPush,
1262 MIFlags: MachineInstr::FrameSetup);
1263
1264 if (!NeedsWinCFI) {
1265 // Emit DWARF info to find the CFA using the frame pointer from this
1266 // point onward.
1267 CFIInstBuilder CFIBuilder(MBB, AfterPush, MachineInstr::FrameSetup);
1268 if (FPOffsetAfterPush != 0)
1269 CFIBuilder.buildDefCFA(Reg: FramePtr, Offset: -MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI));
1270 else
1271 CFIBuilder.buildDefCFARegister(Reg: FramePtr);
1272 }
1273 }
1274
1275 // Emit a SEH opcode indicating the prologue end. The rest of the prologue
1276 // instructions below don't need to be replayed to unwind the stack.
1277 if (NeedsWinCFI && MBBI != MBB.begin()) {
1278 MachineBasicBlock::iterator End = MBBI;
1279 if (HasFP && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1280 End = AfterPush;
1281 insertSEHRange(MBB, Start: {}, End, TII, MIFlags: MachineInstr::FrameSetup);
1282 BuildMI(BB&: MBB, I: End, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_PrologEnd))
1283 .setMIFlag(MachineInstr::FrameSetup);
1284 MF.setHasWinCFI(true);
1285 }
1286
1287 // Now that the prologue's actual instructions are finalised, we can insert
1288 // the necessary DWARF cf instructions to describe the situation. Start by
1289 // recording where each register ended up:
1290 if (!NeedsWinCFI) {
1291 for (const auto &Entry : reverse(C: CSI)) {
1292 MCRegister Reg = Entry.getReg();
1293 int FI = Entry.getFrameIdx();
1294 MachineBasicBlock::iterator CFIPos;
1295 switch (getSpillArea(Reg, Variation: PushPopSplit, NumAlignedDPRCS2Regs: AFI->getNumAlignedDPRCS2Regs(),
1296 RegInfo)) {
1297 case SpillArea::GPRCS1:
1298 CFIPos = std::next(x: GPRCS1Push);
1299 break;
1300 case SpillArea::GPRCS2:
1301 CFIPos = std::next(x: GPRCS2Push);
1302 break;
1303 case SpillArea::DPRCS1:
1304 CFIPos = std::next(x: DPRCS1Push);
1305 break;
1306 case SpillArea::GPRCS3:
1307 CFIPos = std::next(x: GPRCS3Push);
1308 break;
1309 case SpillArea::FPStatus:
1310 case SpillArea::FPCXT:
1311 case SpillArea::DPRCS2:
1312 // FPCXT and DPRCS2 are not represented in the DWARF info.
1313 break;
1314 }
1315
1316 if (CFIPos.isValid()) {
1317 CFIInstBuilder(MBB, CFIPos, MachineInstr::FrameSetup)
1318 .buildOffset(Reg: Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg,
1319 Offset: MFI.getObjectOffset(ObjectIdx: FI));
1320 }
1321 }
1322 }
1323
1324 // Now we can emit descriptions of where the canonical frame address was
1325 // throughout the process. If we have a frame pointer, it takes over the job
1326 // half-way through, so only the first few .cfi_def_cfa_offset instructions
1327 // actually get emitted.
1328 if (!NeedsWinCFI) {
1329 LLVM_DEBUG(DefCFAOffsetCandidates.dump());
1330 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, HasFP);
1331 }
1332
1333 if (STI.isTargetELF() && hasFP(MF))
1334 MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
1335 AFI->getFramePtrSpillOffset());
1336
1337 AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1338 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1339 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1340 AFI->setFPStatusSavesSize(FPStatusSize);
1341 AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1342 AFI->setDPRCalleeSavedArea1Size(DPRCS1Size);
1343 AFI->setGPRCalleeSavedArea3Size(GPRCS3Size);
1344
1345 // If we need dynamic stack realignment, do it here. Be paranoid and make
1346 // sure if we also have VLAs, we have a base pointer for frame access.
1347 // If aligned NEON registers were spilled, the stack has already been
1348 // realigned.
1349 if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1350 Align MaxAlign = MFI.getMaxAlign();
1351 assert(!AFI->isThumb1OnlyFunction());
1352 if (!AFI->isThumbFunction()) {
1353 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, DL: dl, Reg: ARM::SP, Alignment: MaxAlign,
1354 MustBeSingleInstruction: false);
1355 } else {
1356 // We cannot use sp as source/dest register here, thus we're using r4 to
1357 // perform the calculations. We're emitting the following sequence:
1358 // mov r4, sp
1359 // -- use emitAligningInstructions to produce best sequence to zero
1360 // -- out lower bits in r4
1361 // mov sp, r4
1362 // FIXME: It will be better just to find spare register here.
1363 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::R4)
1364 .addReg(RegNo: ARM::SP, Flags: RegState::Kill)
1365 .add(MOs: predOps(Pred: ARMCC::AL));
1366 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, DL: dl, Reg: ARM::R4, Alignment: MaxAlign,
1367 MustBeSingleInstruction: false);
1368 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP)
1369 .addReg(RegNo: ARM::R4, Flags: RegState::Kill)
1370 .add(MOs: predOps(Pred: ARMCC::AL));
1371 }
1372
1373 AFI->setShouldRestoreSPFromFP(true);
1374 }
1375
1376 // If we need a base pointer, set it up here. It's whatever the value
1377 // of the stack pointer is at this point. Any variable size objects
1378 // will be allocated after this, so we can still use the base pointer
1379 // to reference locals.
1380 // FIXME: Clarify FrameSetup flags here.
1381 if (RegInfo->hasBasePointer(MF)) {
1382 if (isARM)
1383 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::MOVr), DestReg: RegInfo->getBaseRegister())
1384 .addReg(RegNo: ARM::SP)
1385 .add(MOs: predOps(Pred: ARMCC::AL))
1386 .add(MO: condCodeOp());
1387 else
1388 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: RegInfo->getBaseRegister())
1389 .addReg(RegNo: ARM::SP)
1390 .add(MOs: predOps(Pred: ARMCC::AL));
1391 }
1392
1393 // If the frame has variable sized objects then the epilogue must restore
1394 // the sp from fp. We can assume there's an FP here since hasFP already
1395 // checks for hasVarSizedObjects.
1396 if (MFI.hasVarSizedObjects())
1397 AFI->setShouldRestoreSPFromFP(true);
1398}
1399
1400void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
1401 MachineBasicBlock &MBB) const {
1402 MachineFrameInfo &MFI = MF.getFrameInfo();
1403 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1404 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1405 const ARMBaseInstrInfo &TII =
1406 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1407 assert(!AFI->isThumb1OnlyFunction() &&
1408 "This emitEpilogue does not support Thumb1!");
1409 bool isARM = !AFI->isThumbFunction();
1410 ARMSubtarget::PushPopSplitVariation PushPopSplit =
1411 STI.getPushPopSplitVariation(MF);
1412
1413 LLVM_DEBUG(dbgs() << "Emitting epilogue for " << MF.getName() << "\n");
1414
1415 // Amount of stack space we reserved next to incoming args for either
1416 // varargs registers or stack arguments in tail calls made by this function.
1417 unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1418
1419 // How much of the stack used by incoming arguments this function is expected
1420 // to restore in this particular epilogue.
1421 int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1422 int NumBytes = (int)MFI.getStackSize();
1423 Register FramePtr = RegInfo->getFrameRegister(MF);
1424
1425 // All calls are tail calls in GHC calling conv, and functions have no
1426 // prologue/epilogue.
1427 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1428 return;
1429
1430 // First put ourselves on the first (from top) terminator instructions.
1431 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1432 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
1433
1434 MachineBasicBlock::iterator RangeStart;
1435 if (!AFI->hasStackFrame()) {
1436 if (MF.hasWinCFI()) {
1437 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_EpilogStart))
1438 .setMIFlag(MachineInstr::FrameDestroy);
1439 RangeStart = initMBBRange(MBB, MBBI);
1440 }
1441
1442 if (NumBytes + IncomingArgStackToRestore != 0)
1443 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1444 NumBytes: NumBytes + IncomingArgStackToRestore,
1445 MIFlags: MachineInstr::FrameDestroy);
1446 } else {
1447 // Unwind MBBI to point to first LDR / VLDRD.
1448 if (MBBI != MBB.begin()) {
1449 do {
1450 --MBBI;
1451 } while (MBBI != MBB.begin() &&
1452 MBBI->getFlag(Flag: MachineInstr::FrameDestroy));
1453 if (!MBBI->getFlag(Flag: MachineInstr::FrameDestroy))
1454 ++MBBI;
1455 }
1456
1457 if (MF.hasWinCFI()) {
1458 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_EpilogStart))
1459 .setMIFlag(MachineInstr::FrameDestroy);
1460 RangeStart = initMBBRange(MBB, MBBI);
1461 }
1462
1463 // Move SP to start of FP callee save spill area.
1464 NumBytes -=
1465 (ReservedArgStack + AFI->getFPCXTSaveAreaSize() +
1466 AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() +
1467 AFI->getFPStatusSavesSize() + AFI->getDPRCalleeSavedGapSize() +
1468 AFI->getDPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea3Size());
1469
1470 // Reset SP based on frame pointer only if the stack frame extends beyond
1471 // frame pointer stack slot or target is ELF and the function has FP.
1472 if (AFI->shouldRestoreSPFromFP()) {
1473 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1474 if (NumBytes) {
1475 if (isARM)
1476 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg: ARM::SP, BaseReg: FramePtr, NumBytes: -NumBytes,
1477 Pred: ARMCC::AL, PredReg: 0, TII,
1478 MIFlags: MachineInstr::FrameDestroy);
1479 else {
1480 // It's not possible to restore SP from FP in a single instruction.
1481 // For iOS, this looks like:
1482 // mov sp, r7
1483 // sub sp, #24
1484 // This is bad, if an interrupt is taken after the mov, sp is in an
1485 // inconsistent state.
1486 // Use the first callee-saved register as a scratch register.
1487 assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1488 "No scratch register to restore SP from FP!");
1489 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg: ARM::R4, BaseReg: FramePtr, NumBytes: -NumBytes,
1490 Pred: ARMCC::AL, PredReg: 0, TII, MIFlags: MachineInstr::FrameDestroy);
1491 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP)
1492 .addReg(RegNo: ARM::R4)
1493 .add(MOs: predOps(Pred: ARMCC::AL))
1494 .setMIFlag(MachineInstr::FrameDestroy);
1495 }
1496 } else {
1497 // Thumb2 or ARM.
1498 if (isARM)
1499 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::MOVr), DestReg: ARM::SP)
1500 .addReg(RegNo: FramePtr)
1501 .add(MOs: predOps(Pred: ARMCC::AL))
1502 .add(MO: condCodeOp())
1503 .setMIFlag(MachineInstr::FrameDestroy);
1504 else
1505 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP)
1506 .addReg(RegNo: FramePtr)
1507 .add(MOs: predOps(Pred: ARMCC::AL))
1508 .setMIFlag(MachineInstr::FrameDestroy);
1509 }
1510 } else if (NumBytes &&
1511 !tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*MBBI, NumBytes))
1512 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1513 MIFlags: MachineInstr::FrameDestroy);
1514
1515 // Increment past our save areas.
1516 if (AFI->getGPRCalleeSavedArea3Size()) {
1517 assert(PushPopSplit == ARMSubtarget::SplitR11WindowsSEH);
1518 (void)PushPopSplit;
1519 MBBI++;
1520 }
1521
1522 if (MBBI != MBB.end() && AFI->getDPRCalleeSavedArea1Size()) {
1523 MBBI++;
1524 // Since vpop register list cannot have gaps, there may be multiple vpop
1525 // instructions in the epilogue.
1526 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
1527 MBBI++;
1528 }
1529 if (AFI->getDPRCalleeSavedGapSize()) {
1530 assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
1531 "unexpected DPR alignment gap");
1532 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: AFI->getDPRCalleeSavedGapSize(),
1533 MIFlags: MachineInstr::FrameDestroy);
1534 }
1535
1536 if (AFI->getGPRCalleeSavedArea2Size()) {
1537 assert(PushPopSplit != ARMSubtarget::SplitR11WindowsSEH);
1538 (void)PushPopSplit;
1539 MBBI++;
1540 }
1541 if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
1542
1543 if (ReservedArgStack || IncomingArgStackToRestore) {
1544 assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&
1545 "attempting to restore negative stack amount");
1546 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1547 NumBytes: ReservedArgStack + IncomingArgStackToRestore,
1548 MIFlags: MachineInstr::FrameDestroy);
1549 }
1550
1551 // Validate PAC, It should have been already popped into R12. For CMSE entry
1552 // function, the validation instruction is emitted during expansion of the
1553 // tBXNS_RET, since the validation must use the value of SP at function
1554 // entry, before saving, resp. after restoring, FPCXTNS.
1555 if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction()) {
1556 bool CanUseBXAut =
1557 STI.isThumb() && STI.hasV8_1MMainlineOps() && STI.hasPACBTI();
1558 auto TMBBI = MBB.getFirstTerminator();
1559 bool IsBXReturn =
1560 TMBBI != MBB.end() && TMBBI->getOpcode() == ARM::tBX_RET;
1561 if (IsBXReturn && CanUseBXAut)
1562 TMBBI->setDesc(STI.getInstrInfo()->get(Opcode: ARM::t2BXAUT_RET));
1563 else
1564 BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc(), MCID: STI.getInstrInfo()->get(Opcode: ARM::t2AUT));
1565 }
1566 }
1567
1568 if (MF.hasWinCFI()) {
1569 insertSEHRange(MBB, Start: RangeStart, End: MBB.end(), TII, MIFlags: MachineInstr::FrameDestroy);
1570 BuildMI(BB&: MBB, I: MBB.end(), MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_EpilogEnd))
1571 .setMIFlag(MachineInstr::FrameDestroy);
1572 }
1573}
1574
1575/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1576/// debug info. It's the same as what we use for resolving the code-gen
1577/// references for now. FIXME: This can go wrong when references are
1578/// SP-relative and simple call frames aren't used.
1579StackOffset ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1580 int FI,
1581 Register &FrameReg) const {
1582 return StackOffset::getFixed(Fixed: ResolveFrameIndexReference(MF, FI, FrameReg, SPAdj: 0));
1583}
1584
1585int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
1586 int FI, Register &FrameReg,
1587 int SPAdj) const {
1588 const MachineFrameInfo &MFI = MF.getFrameInfo();
1589 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1590 MF.getSubtarget().getRegisterInfo());
1591 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1592 int Offset = MFI.getObjectOffset(ObjectIdx: FI) + MFI.getStackSize();
1593 int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1594 bool isFixed = MFI.isFixedObjectIndex(ObjectIdx: FI);
1595
1596 FrameReg = ARM::SP;
1597 Offset += SPAdj;
1598
1599 // SP can move around if there are allocas. We may also lose track of SP
1600 // when emergency spilling inside a non-reserved call frame setup.
1601 bool hasMovingSP = !hasReservedCallFrame(MF);
1602
1603 // When dynamically realigning the stack, use the frame pointer for
1604 // parameters, and the stack/base pointer for locals.
1605 if (RegInfo->hasStackRealignment(MF)) {
1606 assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1607 if (isFixed) {
1608 FrameReg = RegInfo->getFrameRegister(MF);
1609 Offset = FPOffset;
1610 } else if (hasMovingSP) {
1611 assert(RegInfo->hasBasePointer(MF) &&
1612 "VLAs and dynamic stack alignment, but missing base pointer!");
1613 FrameReg = RegInfo->getBaseRegister();
1614 Offset -= SPAdj;
1615 }
1616 return Offset;
1617 }
1618
1619 // If there is a frame pointer, use it when we can.
1620 if (hasFP(MF) && AFI->hasStackFrame()) {
1621 // Use frame pointer to reference fixed objects. Use it for locals if
1622 // there are VLAs (and thus the SP isn't reliable as a base).
1623 if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1624 FrameReg = RegInfo->getFrameRegister(MF);
1625 return FPOffset;
1626 } else if (hasMovingSP) {
1627 assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1628 if (AFI->isThumb2Function()) {
1629 // Try to use the frame pointer if we can, else use the base pointer
1630 // since it's available. This is handy for the emergency spill slot, in
1631 // particular.
1632 if (FPOffset >= -255 && FPOffset < 0) {
1633 FrameReg = RegInfo->getFrameRegister(MF);
1634 return FPOffset;
1635 }
1636 }
1637 } else if (AFI->isThumbFunction()) {
1638 // Prefer SP to base pointer, if the offset is suitably aligned and in
1639 // range as the effective range of the immediate offset is bigger when
1640 // basing off SP.
1641 // Use add <rd>, sp, #<imm8>
1642 // ldr <rd>, [sp, #<imm8>]
1643 if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
1644 return Offset;
1645 // In Thumb2 mode, the negative offset is very limited. Try to avoid
1646 // out of range references. ldr <rt>,[<rn>, #-<imm8>]
1647 if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
1648 FrameReg = RegInfo->getFrameRegister(MF);
1649 return FPOffset;
1650 }
1651 } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
1652 // Otherwise, use SP or FP, whichever is closer to the stack slot.
1653 FrameReg = RegInfo->getFrameRegister(MF);
1654 return FPOffset;
1655 }
1656 }
1657 // Use the base pointer if we have one.
1658 // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1659 // That can happen if we forced a base pointer for a large call frame.
1660 if (RegInfo->hasBasePointer(MF)) {
1661 FrameReg = RegInfo->getBaseRegister();
1662 Offset -= SPAdj;
1663 }
1664 return Offset;
1665}
1666
1667void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1668 MachineBasicBlock::iterator MI,
1669 ArrayRef<CalleeSavedInfo> CSI,
1670 unsigned StmOpc, unsigned StrOpc,
1671 bool NoGap,
1672 function_ref<bool(unsigned)> Func) const {
1673 MachineFunction &MF = *MBB.getParent();
1674 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1675 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1676
1677 DebugLoc DL;
1678
1679 using RegAndKill = std::pair<unsigned, bool>;
1680
1681 SmallVector<RegAndKill, 4> Regs;
1682 unsigned i = CSI.size();
1683 while (i != 0) {
1684 unsigned LastReg = 0;
1685 for (; i != 0; --i) {
1686 MCRegister Reg = CSI[i-1].getReg();
1687 if (!Func(Reg))
1688 continue;
1689
1690 const MachineRegisterInfo &MRI = MF.getRegInfo();
1691 bool isLiveIn = MRI.isLiveIn(Reg);
1692 if (!isLiveIn && !MRI.isReserved(PhysReg: Reg))
1693 MBB.addLiveIn(PhysReg: Reg);
1694 // If NoGap is true, push consecutive registers and then leave the rest
1695 // for other instructions. e.g.
1696 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1697 if (NoGap && LastReg && LastReg != Reg-1)
1698 break;
1699 LastReg = Reg;
1700 // Do not set a kill flag on values that are also marked as live-in. This
1701 // happens with the @llvm-returnaddress intrinsic and with arguments
1702 // passed in callee saved registers.
1703 // Omitting the kill flags is conservatively correct even if the live-in
1704 // is not used after all.
1705 Regs.push_back(Elt: std::make_pair(x&: Reg, /*isKill=*/y: !isLiveIn));
1706 }
1707
1708 if (Regs.empty())
1709 continue;
1710
1711 llvm::sort(C&: Regs, Comp: [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1712 return TRI.getEncodingValue(Reg: LHS.first) < TRI.getEncodingValue(Reg: RHS.first);
1713 });
1714
1715 if (Regs.size() > 1 || StrOpc== 0) {
1716 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: StmOpc), DestReg: ARM::SP)
1717 .addReg(RegNo: ARM::SP)
1718 .setMIFlags(MachineInstr::FrameSetup)
1719 .add(MOs: predOps(Pred: ARMCC::AL));
1720 for (const auto &[Reg, Kill] : Regs)
1721 MIB.addReg(RegNo: Reg, Flags: getKillRegState(B: Kill));
1722 } else if (Regs.size() == 1) {
1723 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: StrOpc), DestReg: ARM::SP)
1724 .addReg(RegNo: Regs[0].first, Flags: getKillRegState(B: Regs[0].second))
1725 .addReg(RegNo: ARM::SP)
1726 .setMIFlags(MachineInstr::FrameSetup)
1727 .addImm(Val: -4)
1728 .add(MOs: predOps(Pred: ARMCC::AL));
1729 }
1730 Regs.clear();
1731
1732 // Put any subsequent vpush instructions before this one: they will refer to
1733 // higher register numbers so need to be pushed first in order to preserve
1734 // monotonicity.
1735 if (MI != MBB.begin())
1736 --MI;
1737 }
1738}
1739
1740void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1741 MachineBasicBlock::iterator MI,
1742 MutableArrayRef<CalleeSavedInfo> CSI,
1743 unsigned LdmOpc, unsigned LdrOpc,
1744 bool isVarArg, bool NoGap,
1745 function_ref<bool(unsigned)> Func) const {
1746 MachineFunction &MF = *MBB.getParent();
1747 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1748 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1749 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1750 bool hasPAC = AFI->shouldSignReturnAddress();
1751 DebugLoc DL;
1752 bool isTailCall = false;
1753 bool isInterrupt = false;
1754 bool isTrap = false;
1755 bool isCmseEntry = false;
1756 ARMSubtarget::PushPopSplitVariation PushPopSplit =
1757 STI.getPushPopSplitVariation(MF);
1758 if (MBB.end() != MI) {
1759 DL = MI->getDebugLoc();
1760 unsigned RetOpcode = MI->getOpcode();
1761 isTailCall =
1762 (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri ||
1763 RetOpcode == ARM::TCRETURNrinotr12);
1764 isInterrupt =
1765 RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1766 isTrap = RetOpcode == ARM::TRAP || RetOpcode == ARM::tTRAP;
1767 isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
1768 }
1769
1770 SmallVector<unsigned, 4> Regs;
1771 unsigned i = CSI.size();
1772 while (i != 0) {
1773 unsigned LastReg = 0;
1774 bool DeleteRet = false;
1775 for (; i != 0; --i) {
1776 CalleeSavedInfo &Info = CSI[i-1];
1777 MCRegister Reg = Info.getReg();
1778 if (!Func(Reg))
1779 continue;
1780
1781 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1782 !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
1783 STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1784 (PushPopSplit != ARMSubtarget::SplitR11WindowsSEH &&
1785 PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) {
1786 Reg = ARM::PC;
1787 // Fold the return instruction into the LDM.
1788 DeleteRet = true;
1789 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1790 }
1791
1792 // If NoGap is true, pop consecutive registers and then leave the rest
1793 // for other instructions. e.g.
1794 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1795 if (NoGap && LastReg && LastReg != Reg-1)
1796 break;
1797
1798 LastReg = Reg;
1799 Regs.push_back(Elt: Reg);
1800 }
1801
1802 if (Regs.empty())
1803 continue;
1804
1805 llvm::sort(C&: Regs, Comp: [&](unsigned LHS, unsigned RHS) {
1806 return TRI.getEncodingValue(Reg: LHS) < TRI.getEncodingValue(Reg: RHS);
1807 });
1808
1809 if (Regs.size() > 1 || LdrOpc == 0) {
1810 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: LdmOpc), DestReg: ARM::SP)
1811 .addReg(RegNo: ARM::SP)
1812 .add(MOs: predOps(Pred: ARMCC::AL))
1813 .setMIFlags(MachineInstr::FrameDestroy);
1814 for (unsigned Reg : Regs)
1815 MIB.addReg(RegNo: Reg, Flags: getDefRegState(B: true));
1816 if (DeleteRet) {
1817 if (MI != MBB.end()) {
1818 MIB.copyImplicitOps(OtherMI: *MI);
1819 MI->eraseFromParent();
1820 }
1821 }
1822 MI = MIB;
1823 } else if (Regs.size() == 1) {
1824 // If we adjusted the reg to PC from LR above, switch it back here. We
1825 // only do that for LDM.
1826 if (Regs[0] == ARM::PC)
1827 Regs[0] = ARM::LR;
1828 MachineInstrBuilder MIB =
1829 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: LdrOpc), DestReg: Regs[0])
1830 .addReg(RegNo: ARM::SP, Flags: RegState::Define)
1831 .addReg(RegNo: ARM::SP)
1832 .setMIFlags(MachineInstr::FrameDestroy);
1833 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1834 // that refactoring is complete (eventually).
1835 if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1836 MIB.addReg(RegNo: 0);
1837 MIB.addImm(Val: ARM_AM::getAM2Opc(Opc: ARM_AM::add, Imm12: 4, SO: ARM_AM::no_shift));
1838 } else
1839 MIB.addImm(Val: 4);
1840 MIB.add(MOs: predOps(Pred: ARMCC::AL));
1841 }
1842 Regs.clear();
1843
1844 // Put any subsequent vpop instructions after this one: they will refer to
1845 // higher register numbers so need to be popped afterwards.
1846 if (MI != MBB.end())
1847 ++MI;
1848 }
1849}
1850
1851void ARMFrameLowering::emitFPStatusSaves(MachineBasicBlock &MBB,
1852 MachineBasicBlock::iterator MI,
1853 ArrayRef<CalleeSavedInfo> CSI,
1854 unsigned PushOpc) const {
1855 MachineFunction &MF = *MBB.getParent();
1856 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1857
1858 SmallVector<MCRegister> Regs;
1859 auto RegPresent = [&CSI](MCRegister Reg) {
1860 return llvm::any_of(Range&: CSI, P: [Reg](const CalleeSavedInfo &C) {
1861 return C.getReg() == Reg;
1862 });
1863 };
1864
1865 // If we need to save FPSCR, then we must move FPSCR into R4 with the VMRS
1866 // instruction.
1867 if (RegPresent(ARM::FPSCR)) {
1868 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: TII.get(Opcode: ARM::VMRS), DestReg: ARM::R4)
1869 .add(MOs: predOps(Pred: ARMCC::AL))
1870 .setMIFlags(MachineInstr::FrameSetup);
1871
1872 Regs.push_back(Elt: ARM::R4);
1873 }
1874
1875 // If we need to save FPEXC, then we must move FPEXC into R5 with the
1876 // VMRS_FPEXC instruction.
1877 if (RegPresent(ARM::FPEXC)) {
1878 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: TII.get(Opcode: ARM::VMRS_FPEXC), DestReg: ARM::R5)
1879 .add(MOs: predOps(Pred: ARMCC::AL))
1880 .setMIFlags(MachineInstr::FrameSetup);
1881
1882 Regs.push_back(Elt: ARM::R5);
1883 }
1884
1885 // If neither FPSCR and FPEXC are present, then do nothing.
1886 if (Regs.size() == 0)
1887 return;
1888
1889 // Push both R4 and R5 onto the stack, if present.
1890 MachineInstrBuilder MIB =
1891 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: TII.get(Opcode: PushOpc), DestReg: ARM::SP)
1892 .addReg(RegNo: ARM::SP)
1893 .add(MOs: predOps(Pred: ARMCC::AL))
1894 .setMIFlags(MachineInstr::FrameSetup);
1895
1896 for (Register Reg : Regs) {
1897 MIB.addReg(RegNo: Reg);
1898 }
1899}
1900
1901void ARMFrameLowering::emitFPStatusRestores(
1902 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1903 MutableArrayRef<CalleeSavedInfo> CSI, unsigned LdmOpc) const {
1904 MachineFunction &MF = *MBB.getParent();
1905 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1906
1907 auto RegPresent = [&CSI](MCRegister Reg) {
1908 return llvm::any_of(Range&: CSI, P: [Reg](const CalleeSavedInfo &C) {
1909 return C.getReg() == Reg;
1910 });
1911 };
1912
1913 // Do nothing if we don't need to restore any FP status registers.
1914 if (!RegPresent(ARM::FPSCR) && !RegPresent(ARM::FPEXC))
1915 return;
1916
1917 // Pop registers off of the stack.
1918 MachineInstrBuilder MIB =
1919 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: TII.get(Opcode: LdmOpc), DestReg: ARM::SP)
1920 .addReg(RegNo: ARM::SP)
1921 .add(MOs: predOps(Pred: ARMCC::AL))
1922 .setMIFlags(MachineInstr::FrameDestroy);
1923
1924 // If FPSCR was saved, it will be popped into R4.
1925 if (RegPresent(ARM::FPSCR)) {
1926 MIB.addReg(RegNo: ARM::R4, Flags: RegState::Define);
1927 }
1928
1929 // If FPEXC was saved, it will be popped into R5.
1930 if (RegPresent(ARM::FPEXC)) {
1931 MIB.addReg(RegNo: ARM::R5, Flags: RegState::Define);
1932 }
1933
1934 // Move the FPSCR value back into the register with the VMSR instruction.
1935 if (RegPresent(ARM::FPSCR)) {
1936 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: STI.getInstrInfo()->get(Opcode: ARM::VMSR))
1937 .addReg(RegNo: ARM::R4)
1938 .add(MOs: predOps(Pred: ARMCC::AL))
1939 .setMIFlags(MachineInstr::FrameDestroy);
1940 }
1941
1942 // Move the FPEXC value back into the register with the VMSR_FPEXC
1943 // instruction.
1944 if (RegPresent(ARM::FPEXC)) {
1945 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: STI.getInstrInfo()->get(Opcode: ARM::VMSR_FPEXC))
1946 .addReg(RegNo: ARM::R5)
1947 .add(MOs: predOps(Pred: ARMCC::AL))
1948 .setMIFlags(MachineInstr::FrameDestroy);
1949 }
1950}
1951
1952/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1953/// starting from d8. Also insert stack realignment code and leave the stack
1954/// pointer pointing to the d8 spill slot.
1955static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
1956 MachineBasicBlock::iterator MI,
1957 unsigned NumAlignedDPRCS2Regs,
1958 ArrayRef<CalleeSavedInfo> CSI,
1959 const TargetRegisterInfo *TRI) {
1960 MachineFunction &MF = *MBB.getParent();
1961 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1962 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1963 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1964 MachineFrameInfo &MFI = MF.getFrameInfo();
1965
1966 // Mark the D-register spill slots as properly aligned. Since MFI computes
1967 // stack slot layout backwards, this can actually mean that the d-reg stack
1968 // slot offsets can be wrong. The offset for d8 will always be correct.
1969 for (const CalleeSavedInfo &I : CSI) {
1970 unsigned DNum = I.getReg() - ARM::D8;
1971 if (DNum > NumAlignedDPRCS2Regs - 1)
1972 continue;
1973 int FI = I.getFrameIdx();
1974 // The even-numbered registers will be 16-byte aligned, the odd-numbered
1975 // registers will be 8-byte aligned.
1976 MFI.setObjectAlignment(ObjectIdx: FI, Alignment: DNum % 2 ? Align(8) : Align(16));
1977
1978 // The stack slot for D8 needs to be maximally aligned because this is
1979 // actually the point where we align the stack pointer. MachineFrameInfo
1980 // computes all offsets relative to the incoming stack pointer which is a
1981 // bit weird when realigning the stack. Any extra padding for this
1982 // over-alignment is not realized because the code inserted below adjusts
1983 // the stack pointer by numregs * 8 before aligning the stack pointer.
1984 if (DNum == 0)
1985 MFI.setObjectAlignment(ObjectIdx: FI, Alignment: MFI.getMaxAlign());
1986 }
1987
1988 // Move the stack pointer to the d8 spill slot, and align it at the same
1989 // time. Leave the stack slot address in the scratch register r4.
1990 //
1991 // sub r4, sp, #numregs * 8
1992 // bic r4, r4, #align - 1
1993 // mov sp, r4
1994 //
1995 bool isThumb = AFI->isThumbFunction();
1996 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1997 AFI->setShouldRestoreSPFromFP(true);
1998
1999 // sub r4, sp, #numregs * 8
2000 // The immediate is <= 64, so it doesn't need any special encoding.
2001 unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
2002 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: ARM::R4)
2003 .addReg(RegNo: ARM::SP)
2004 .addImm(Val: 8 * NumAlignedDPRCS2Regs)
2005 .add(MOs: predOps(Pred: ARMCC::AL))
2006 .add(MO: condCodeOp());
2007
2008 Align MaxAlign = MF.getFrameInfo().getMaxAlign();
2009 // We must set parameter MustBeSingleInstruction to true, since
2010 // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
2011 // stack alignment. Luckily, this can always be done since all ARM
2012 // architecture versions that support Neon also support the BFC
2013 // instruction.
2014 emitAligningInstructions(MF, AFI, TII, MBB, MBBI: MI, DL, Reg: ARM::R4, Alignment: MaxAlign, MustBeSingleInstruction: true);
2015
2016 // mov sp, r4
2017 // The stack pointer must be adjusted before spilling anything, otherwise
2018 // the stack slots could be clobbered by an interrupt handler.
2019 // Leave r4 live, it is used below.
2020 Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
2021 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: ARM::SP)
2022 .addReg(RegNo: ARM::R4)
2023 .add(MOs: predOps(Pred: ARMCC::AL));
2024 if (!isThumb)
2025 MIB.add(MO: condCodeOp());
2026
2027 // Now spill NumAlignedDPRCS2Regs registers starting from d8.
2028 // r4 holds the stack slot address.
2029 unsigned NextReg = ARM::D8;
2030
2031 // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
2032 // The writeback is only needed when emitting two vst1.64 instructions.
2033 if (NumAlignedDPRCS2Regs >= 6) {
2034 MCRegister SupReg =
2035 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QQPRRegClass);
2036 MBB.addLiveIn(PhysReg: SupReg);
2037 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VST1d64Qwb_fixed), DestReg: ARM::R4)
2038 .addReg(RegNo: ARM::R4, Flags: RegState::Kill)
2039 .addImm(Val: 16)
2040 .addReg(RegNo: NextReg)
2041 .addReg(RegNo: SupReg, Flags: RegState::ImplicitKill)
2042 .add(MOs: predOps(Pred: ARMCC::AL));
2043 NextReg += 4;
2044 NumAlignedDPRCS2Regs -= 4;
2045 }
2046
2047 // We won't modify r4 beyond this point. It currently points to the next
2048 // register to be spilled.
2049 unsigned R4BaseReg = NextReg;
2050
2051 // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
2052 if (NumAlignedDPRCS2Regs >= 4) {
2053 MCRegister SupReg =
2054 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QQPRRegClass);
2055 MBB.addLiveIn(PhysReg: SupReg);
2056 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VST1d64Q))
2057 .addReg(RegNo: ARM::R4)
2058 .addImm(Val: 16)
2059 .addReg(RegNo: NextReg)
2060 .addReg(RegNo: SupReg, Flags: RegState::ImplicitKill)
2061 .add(MOs: predOps(Pred: ARMCC::AL));
2062 NextReg += 4;
2063 NumAlignedDPRCS2Regs -= 4;
2064 }
2065
2066 // 16-byte aligned vst1.64 with 2 d-regs.
2067 if (NumAlignedDPRCS2Regs >= 2) {
2068 MCRegister SupReg =
2069 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QPRRegClass);
2070 MBB.addLiveIn(PhysReg: SupReg);
2071 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VST1q64))
2072 .addReg(RegNo: ARM::R4)
2073 .addImm(Val: 16)
2074 .addReg(RegNo: SupReg)
2075 .add(MOs: predOps(Pred: ARMCC::AL));
2076 NextReg += 2;
2077 NumAlignedDPRCS2Regs -= 2;
2078 }
2079
2080 // Finally, use a vanilla vstr.64 for the odd last register.
2081 if (NumAlignedDPRCS2Regs) {
2082 MBB.addLiveIn(PhysReg: NextReg);
2083 // vstr.64 uses addrmode5 which has an offset scale of 4.
2084 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VSTRD))
2085 .addReg(RegNo: NextReg)
2086 .addReg(RegNo: ARM::R4)
2087 .addImm(Val: (NextReg - R4BaseReg) * 2)
2088 .add(MOs: predOps(Pred: ARMCC::AL));
2089 }
2090
2091 // The last spill instruction inserted should kill the scratch register r4.
2092 std::prev(x: MI)->addRegisterKilled(IncomingReg: ARM::R4, RegInfo: TRI);
2093}
2094
2095/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
2096/// iterator to the following instruction.
2097static MachineBasicBlock::iterator
2098skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
2099 unsigned NumAlignedDPRCS2Regs) {
2100 // sub r4, sp, #numregs * 8
2101 // bic r4, r4, #align - 1
2102 // mov sp, r4
2103 ++MI; ++MI; ++MI;
2104 assert(MI->mayStore() && "Expecting spill instruction");
2105
2106 // These switches all fall through.
2107 switch(NumAlignedDPRCS2Regs) {
2108 case 7:
2109 ++MI;
2110 assert(MI->mayStore() && "Expecting spill instruction");
2111 [[fallthrough]];
2112 default:
2113 ++MI;
2114 assert(MI->mayStore() && "Expecting spill instruction");
2115 [[fallthrough]];
2116 case 1:
2117 case 2:
2118 case 4:
2119 assert(MI->killsRegister(ARM::R4, /*TRI=*/nullptr) && "Missed kill flag");
2120 ++MI;
2121 }
2122 return MI;
2123}
2124
2125/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
2126/// starting from d8. These instructions are assumed to execute while the
2127/// stack is still aligned, unlike the code inserted by emitPopInst.
2128static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
2129 MachineBasicBlock::iterator MI,
2130 unsigned NumAlignedDPRCS2Regs,
2131 ArrayRef<CalleeSavedInfo> CSI,
2132 const TargetRegisterInfo *TRI) {
2133 MachineFunction &MF = *MBB.getParent();
2134 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2135 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
2136 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2137
2138 // Find the frame index assigned to d8.
2139 int D8SpillFI = 0;
2140 for (const CalleeSavedInfo &I : CSI)
2141 if (I.getReg() == ARM::D8) {
2142 D8SpillFI = I.getFrameIdx();
2143 break;
2144 }
2145
2146 // Materialize the address of the d8 spill slot into the scratch register r4.
2147 // This can be fairly complicated if the stack frame is large, so just use
2148 // the normal frame index elimination mechanism to do it. This code runs as
2149 // the initial part of the epilog where the stack and base pointers haven't
2150 // been changed yet.
2151 bool isThumb = AFI->isThumbFunction();
2152 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
2153
2154 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
2155 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: ARM::R4)
2156 .addFrameIndex(Idx: D8SpillFI)
2157 .addImm(Val: 0)
2158 .add(MOs: predOps(Pred: ARMCC::AL))
2159 .add(MO: condCodeOp());
2160
2161 // Now restore NumAlignedDPRCS2Regs registers starting from d8.
2162 unsigned NextReg = ARM::D8;
2163
2164 // 16-byte aligned vld1.64 with 4 d-regs and writeback.
2165 if (NumAlignedDPRCS2Regs >= 6) {
2166 MCRegister SupReg =
2167 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QQPRRegClass);
2168 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLD1d64Qwb_fixed), DestReg: NextReg)
2169 .addReg(RegNo: ARM::R4, Flags: RegState::Define)
2170 .addReg(RegNo: ARM::R4, Flags: RegState::Kill)
2171 .addImm(Val: 16)
2172 .addReg(RegNo: SupReg, Flags: RegState::ImplicitDefine)
2173 .add(MOs: predOps(Pred: ARMCC::AL));
2174 NextReg += 4;
2175 NumAlignedDPRCS2Regs -= 4;
2176 }
2177
2178 // We won't modify r4 beyond this point. It currently points to the next
2179 // register to be spilled.
2180 unsigned R4BaseReg = NextReg;
2181
2182 // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
2183 if (NumAlignedDPRCS2Regs >= 4) {
2184 MCRegister SupReg =
2185 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QQPRRegClass);
2186 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLD1d64Q), DestReg: NextReg)
2187 .addReg(RegNo: ARM::R4)
2188 .addImm(Val: 16)
2189 .addReg(RegNo: SupReg, Flags: RegState::ImplicitDefine)
2190 .add(MOs: predOps(Pred: ARMCC::AL));
2191 NextReg += 4;
2192 NumAlignedDPRCS2Regs -= 4;
2193 }
2194
2195 // 16-byte aligned vld1.64 with 2 d-regs.
2196 if (NumAlignedDPRCS2Regs >= 2) {
2197 MCRegister SupReg =
2198 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QPRRegClass);
2199 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLD1q64), DestReg: SupReg)
2200 .addReg(RegNo: ARM::R4)
2201 .addImm(Val: 16)
2202 .add(MOs: predOps(Pred: ARMCC::AL));
2203 NextReg += 2;
2204 NumAlignedDPRCS2Regs -= 2;
2205 }
2206
2207 // Finally, use a vanilla vldr.64 for the remaining odd register.
2208 if (NumAlignedDPRCS2Regs)
2209 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLDRD), DestReg: NextReg)
2210 .addReg(RegNo: ARM::R4)
2211 .addImm(Val: 2 * (NextReg - R4BaseReg))
2212 .add(MOs: predOps(Pred: ARMCC::AL));
2213
2214 // Last store kills r4.
2215 std::prev(x: MI)->addRegisterKilled(IncomingReg: ARM::R4, RegInfo: TRI);
2216}
2217
2218bool ARMFrameLowering::spillCalleeSavedRegisters(
2219 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2220 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2221 if (CSI.empty())
2222 return false;
2223
2224 MachineFunction &MF = *MBB.getParent();
2225 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2226 ARMSubtarget::PushPopSplitVariation PushPopSplit =
2227 STI.getPushPopSplitVariation(MF);
2228 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
2229
2230 unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
2231 unsigned PushOneOpc = AFI->isThumbFunction() ?
2232 ARM::t2STR_PRE : ARM::STR_PRE_IMM;
2233 unsigned FltOpc = ARM::VSTMDDB_UPD;
2234 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2235 // Compute PAC in R12.
2236 if (AFI->shouldSignReturnAddress()) {
2237 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: STI.getInstrInfo()->get(Opcode: ARM::t2PAC))
2238 .setMIFlags(MachineInstr::FrameSetup);
2239 }
2240 // Save the non-secure floating point context.
2241 if (llvm::any_of(Range&: CSI, P: [](const CalleeSavedInfo &C) {
2242 return C.getReg() == ARM::FPCXTNS;
2243 })) {
2244 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: STI.getInstrInfo()->get(Opcode: ARM::VSTR_FPCXTNS_pre),
2245 DestReg: ARM::SP)
2246 .addReg(RegNo: ARM::SP)
2247 .addImm(Val: -4)
2248 .add(MOs: predOps(Pred: ARMCC::AL));
2249 }
2250
2251 auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
2252 RegInfo](unsigned Reg, SpillArea TestArea) {
2253 return getSpillArea(Reg, Variation: PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
2254 TestArea;
2255 };
2256 auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
2257 return CheckRegArea(Reg, SpillArea::GPRCS1);
2258 };
2259 auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
2260 return CheckRegArea(Reg, SpillArea::GPRCS2);
2261 };
2262 auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
2263 return CheckRegArea(Reg, SpillArea::DPRCS1);
2264 };
2265 auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
2266 return CheckRegArea(Reg, SpillArea::GPRCS3);
2267 };
2268
2269 emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false, Func: IsGPRCS1);
2270 emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false, Func: IsGPRCS2);
2271 emitFPStatusSaves(MBB, MI, CSI, PushOpc);
2272 emitPushInst(MBB, MI, CSI, StmOpc: FltOpc, StrOpc: 0, NoGap: true, Func: IsDPRCS1);
2273 emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false, Func: IsGPRCS3);
2274
2275 // The code above does not insert spill code for the aligned DPRCS2 registers.
2276 // The stack realignment code will be inserted between the push instructions
2277 // and these spills.
2278 if (NumAlignedDPRCS2Regs)
2279 emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2280
2281 return true;
2282}
2283
2284bool ARMFrameLowering::restoreCalleeSavedRegisters(
2285 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2286 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2287 if (CSI.empty())
2288 return false;
2289
2290 MachineFunction &MF = *MBB.getParent();
2291 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2292 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
2293
2294 bool isVarArg = AFI->getArgRegsSaveSize() > 0;
2295 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2296 ARMSubtarget::PushPopSplitVariation PushPopSplit =
2297 STI.getPushPopSplitVariation(MF);
2298
2299 // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2300 // registers. Do that here instead.
2301 if (NumAlignedDPRCS2Regs)
2302 emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2303
2304 unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
2305 unsigned LdrOpc =
2306 AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
2307 unsigned FltOpc = ARM::VLDMDIA_UPD;
2308
2309 auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
2310 RegInfo](unsigned Reg, SpillArea TestArea) {
2311 return getSpillArea(Reg, Variation: PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
2312 TestArea;
2313 };
2314 auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
2315 return CheckRegArea(Reg, SpillArea::GPRCS1);
2316 };
2317 auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
2318 return CheckRegArea(Reg, SpillArea::GPRCS2);
2319 };
2320 auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
2321 return CheckRegArea(Reg, SpillArea::DPRCS1);
2322 };
2323 auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
2324 return CheckRegArea(Reg, SpillArea::GPRCS3);
2325 };
2326
2327 emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false, Func: IsGPRCS3);
2328 emitPopInst(MBB, MI, CSI, LdmOpc: FltOpc, LdrOpc: 0, isVarArg, NoGap: true, Func: IsDPRCS1);
2329 emitFPStatusRestores(MBB, MI, CSI, LdmOpc: PopOpc);
2330 emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false, Func: IsGPRCS2);
2331 emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false, Func: IsGPRCS1);
2332
2333 return true;
2334}
2335
2336// FIXME: Make generic?
2337static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF,
2338 const ARMBaseInstrInfo &TII) {
2339 unsigned FnSize = 0;
2340 for (auto &MBB : MF) {
2341 for (auto &MI : MBB)
2342 FnSize += TII.getInstSizeInBytes(MI);
2343 }
2344 if (MF.getJumpTableInfo())
2345 for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
2346 FnSize += Table.MBBs.size() * 4;
2347 FnSize += MF.getConstantPool()->getConstants().size() * 4;
2348 LLVM_DEBUG(dbgs() << "Estimated function size for " << MF.getName() << " = "
2349 << FnSize << " bytes\n");
2350 return FnSize;
2351}
2352
2353/// estimateRSStackSizeLimit - Look at each instruction that references stack
2354/// frames and return the stack size limit beyond which some of these
2355/// instructions will require a scratch register during their expansion later.
2356// FIXME: Move to TII?
2357static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
2358 const TargetFrameLowering *TFI,
2359 bool &HasNonSPFrameIndex) {
2360 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2361 const ARMBaseInstrInfo &TII =
2362 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2363 unsigned Limit = (1 << 12) - 1;
2364 for (auto &MBB : MF) {
2365 for (auto &MI : MBB) {
2366 if (MI.isDebugInstr())
2367 continue;
2368 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
2369 if (!MI.getOperand(i).isFI())
2370 continue;
2371
2372 // When using ADDri to get the address of a stack object, 255 is the
2373 // largest offset guaranteed to fit in the immediate offset.
2374 if (MI.getOpcode() == ARM::ADDri) {
2375 Limit = std::min(a: Limit, b: (1U << 8) - 1);
2376 break;
2377 }
2378 // t2ADDri will not require an extra register, it can reuse the
2379 // destination.
2380 if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
2381 break;
2382
2383 const MCInstrDesc &MCID = MI.getDesc();
2384 const TargetRegisterClass *RegClass = TII.getRegClass(MCID, OpNum: i);
2385 if (RegClass && !RegClass->contains(Reg: ARM::SP))
2386 HasNonSPFrameIndex = true;
2387
2388 // Otherwise check the addressing mode.
2389 switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
2390 case ARMII::AddrMode_i12:
2391 case ARMII::AddrMode2:
2392 // Default 12 bit limit.
2393 break;
2394 case ARMII::AddrMode3:
2395 case ARMII::AddrModeT2_i8neg:
2396 Limit = std::min(a: Limit, b: (1U << 8) - 1);
2397 break;
2398 case ARMII::AddrMode5FP16:
2399 Limit = std::min(a: Limit, b: ((1U << 8) - 1) * 2);
2400 break;
2401 case ARMII::AddrMode5:
2402 case ARMII::AddrModeT2_i8s4:
2403 case ARMII::AddrModeT2_ldrex:
2404 Limit = std::min(a: Limit, b: ((1U << 8) - 1) * 4);
2405 break;
2406 case ARMII::AddrModeT2_i12:
2407 // i12 supports only positive offset so these will be converted to
2408 // i8 opcodes. See llvm::rewriteT2FrameIndex.
2409 if (TFI->hasFP(MF) && AFI->hasStackFrame())
2410 Limit = std::min(a: Limit, b: (1U << 8) - 1);
2411 break;
2412 case ARMII::AddrMode4:
2413 case ARMII::AddrMode6:
2414 // Addressing modes 4 & 6 (load/store) instructions can't encode an
2415 // immediate offset for stack references.
2416 return 0;
2417 case ARMII::AddrModeT2_i7:
2418 Limit = std::min(a: Limit, b: ((1U << 7) - 1) * 1);
2419 break;
2420 case ARMII::AddrModeT2_i7s2:
2421 Limit = std::min(a: Limit, b: ((1U << 7) - 1) * 2);
2422 break;
2423 case ARMII::AddrModeT2_i7s4:
2424 Limit = std::min(a: Limit, b: ((1U << 7) - 1) * 4);
2425 break;
2426 default:
2427 llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2428 }
2429 break; // At most one FI per instruction
2430 }
2431 }
2432 }
2433
2434 return Limit;
2435}
2436
2437// In functions that realign the stack, it can be an advantage to spill the
2438// callee-saved vector registers after realigning the stack. The vst1 and vld1
2439// instructions take alignment hints that can improve performance.
2440static void
2441checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
2442 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
2443 if (!SpillAlignedNEONRegs)
2444 return;
2445
2446 // Naked functions don't spill callee-saved registers.
2447 if (MF.getFunction().hasFnAttribute(Kind: Attribute::Naked))
2448 return;
2449
2450 // We are planning to use NEON instructions vst1 / vld1.
2451 if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2452 return;
2453
2454 // Don't bother if the default stack alignment is sufficiently high.
2455 if (MF.getSubtarget().getFrameLowering()->getStackAlign() >= Align(8))
2456 return;
2457
2458 // Aligned spills require stack realignment.
2459 if (!static_cast<const ARMBaseRegisterInfo *>(
2460 MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
2461 return;
2462
2463 // We always spill contiguous d-registers starting from d8. Count how many
2464 // needs spilling. The register allocator will almost always use the
2465 // callee-saved registers in order, but it can happen that there are holes in
2466 // the range. Registers above the hole will be spilled to the standard DPRCS
2467 // area.
2468 unsigned NumSpills = 0;
2469 for (; NumSpills < 8; ++NumSpills)
2470 if (!SavedRegs.test(Idx: ARM::D8 + NumSpills))
2471 break;
2472
2473 // Don't do this for just one d-register. It's not worth it.
2474 if (NumSpills < 2)
2475 return;
2476
2477 // Spill the first NumSpills D-registers after realigning the stack.
2478 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2479
2480 // A scratch register is required for the vst1 / vld1 instructions.
2481 SavedRegs.set(ARM::R4);
2482}
2483
2484bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2485 // For CMSE entry functions, we want to save the FPCXT_NS immediately
2486 // upon function entry (resp. restore it immediately before return)
2487 if (STI.hasV8_1MMainlineOps() &&
2488 MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction())
2489 return false;
2490
2491 // We are disabling shrinkwrapping for now when PAC is enabled, as
2492 // shrinkwrapping can cause clobbering of r12 when the PAC code is
2493 // generated. A follow-up patch will fix this in a more performant manner.
2494 if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(
2495 SpillsLR: true /* SpillsLR */))
2496 return false;
2497
2498 return true;
2499}
2500
2501bool ARMFrameLowering::requiresAAPCSFrameRecord(
2502 const MachineFunction &MF) const {
2503 const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
2504 return Subtarget.createAAPCSFrameChain() && hasFP(MF);
2505}
2506
2507// Thumb1 may require a spill when storing to a frame index through FP (or any
2508// access with execute-only), for cases where FP is a high register (R11). This
2509// scans the function for cases where this may happen.
2510static bool canSpillOnFrameIndexAccess(const MachineFunction &MF,
2511 const TargetFrameLowering &TFI) {
2512 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2513 if (!AFI->isThumb1OnlyFunction())
2514 return false;
2515
2516 const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
2517 for (const auto &MBB : MF)
2518 for (const auto &MI : MBB)
2519 if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi ||
2520 STI.genExecuteOnly())
2521 for (const auto &Op : MI.operands())
2522 if (Op.isFI()) {
2523 Register Reg;
2524 TFI.getFrameIndexReference(MF, FI: Op.getIndex(), FrameReg&: Reg);
2525 if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
2526 return true;
2527 }
2528 return false;
2529}
2530
2531void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
2532 BitVector &SavedRegs,
2533 RegScavenger *RS) const {
2534 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2535 // This tells PEI to spill the FP as if it is any other callee-save register
2536 // to take advantage the eliminateFrameIndex machinery. This also ensures it
2537 // is spilled in the order specified by getCalleeSavedRegs() to make it easier
2538 // to combine multiple loads / stores.
2539 bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF)) &&
2540 !MF.getTarget().Options.DisableFramePointerElim(MF);
2541 bool CS1Spilled = false;
2542 bool LRSpilled = false;
2543 unsigned NumGPRSpills = 0;
2544 unsigned NumFPRSpills = 0;
2545 SmallVector<unsigned, 4> UnspilledCS1GPRs;
2546 SmallVector<unsigned, 4> UnspilledCS2GPRs;
2547 const Function &F = MF.getFunction();
2548 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
2549 MF.getSubtarget().getRegisterInfo());
2550 const ARMBaseInstrInfo &TII =
2551 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2552 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2553 MachineFrameInfo &MFI = MF.getFrameInfo();
2554 MachineRegisterInfo &MRI = MF.getRegInfo();
2555 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2556 (void)TRI; // Silence unused warning in non-assert builds.
2557 Register FramePtr = STI.getFramePointerReg();
2558 ARMSubtarget::PushPopSplitVariation PushPopSplit =
2559 STI.getPushPopSplitVariation(MF);
2560
2561 // For a floating point interrupt, save these registers always, since LLVM
2562 // currently doesn't model reads/writes to these registers.
2563 if (F.hasFnAttribute(Kind: "interrupt") && F.hasFnAttribute(Kind: "save-fp")) {
2564 SavedRegs.set(ARM::FPSCR);
2565 SavedRegs.set(ARM::R4);
2566
2567 // This register will only be present on non-MClass registers.
2568 if (STI.isMClass()) {
2569 SavedRegs.reset(Idx: ARM::FPEXC);
2570 } else {
2571 SavedRegs.set(ARM::FPEXC);
2572 SavedRegs.set(ARM::R5);
2573 }
2574 }
2575
2576 // Spill R4 if Thumb2 function requires stack realignment - it will be used as
2577 // scratch register. Also spill R4 if Thumb2 function has varsized objects,
2578 // since it's not always possible to restore sp from fp in a single
2579 // instruction.
2580 // FIXME: It will be better just to find spare register here.
2581 if (AFI->isThumb2Function() &&
2582 (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)))
2583 SavedRegs.set(ARM::R4);
2584
2585 // If a stack probe will be emitted, spill R4 and LR, since they are
2586 // clobbered by the stack probe call.
2587 // This estimate should be a safe, conservative estimate. The actual
2588 // stack probe is enabled based on the size of the local objects;
2589 // this estimate also includes the varargs store size.
2590 if (STI.isTargetWindows() &&
2591 WindowsRequiresStackProbe(MF, StackSizeInBytes: MFI.estimateStackSize(MF))) {
2592 SavedRegs.set(ARM::R4);
2593 SavedRegs.set(ARM::LR);
2594 }
2595
2596 if (AFI->isThumb1OnlyFunction()) {
2597 // Spill LR if Thumb1 function uses variable length argument lists.
2598 if (AFI->getArgRegsSaveSize() > 0)
2599 SavedRegs.set(ARM::LR);
2600
2601 // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2602 // requires stack alignment. We don't know for sure what the stack size
2603 // will be, but for this, an estimate is good enough. If there anything
2604 // changes it, it'll be a spill, which implies we've used all the registers
2605 // and so R4 is already used, so not marking it here will be OK.
2606 // FIXME: It will be better just to find spare register here.
2607 if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||
2608 MFI.estimateStackSize(MF) > 508)
2609 SavedRegs.set(ARM::R4);
2610 }
2611
2612 // See if we can spill vector registers to aligned stack.
2613 checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2614
2615 // Spill the BasePtr if it's used.
2616 if (RegInfo->hasBasePointer(MF))
2617 SavedRegs.set(RegInfo->getBaseRegister());
2618
2619 // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2620 if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2621 CanEliminateFrame = false;
2622
2623 // When return address signing is enabled R12 is treated as callee-saved.
2624 if (AFI->shouldSignReturnAddress())
2625 CanEliminateFrame = false;
2626
2627 // Don't spill FP if the frame can be eliminated. This is determined
2628 // by scanning the callee-save registers to see if any is modified.
2629 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF: &MF);
2630 for (unsigned i = 0; CSRegs[i]; ++i) {
2631 unsigned Reg = CSRegs[i];
2632 bool Spilled = false;
2633 if (SavedRegs.test(Idx: Reg)) {
2634 Spilled = true;
2635 CanEliminateFrame = false;
2636 }
2637
2638 if (!ARM::GPRRegClass.contains(Reg)) {
2639 if (Spilled) {
2640 if (ARM::SPRRegClass.contains(Reg))
2641 NumFPRSpills++;
2642 else if (ARM::DPRRegClass.contains(Reg))
2643 NumFPRSpills += 2;
2644 else if (ARM::QPRRegClass.contains(Reg))
2645 NumFPRSpills += 4;
2646 }
2647 continue;
2648 }
2649
2650 if (Spilled) {
2651 NumGPRSpills++;
2652
2653 if (PushPopSplit != ARMSubtarget::SplitR7) {
2654 if (Reg == ARM::LR)
2655 LRSpilled = true;
2656 CS1Spilled = true;
2657 continue;
2658 }
2659
2660 // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2661 switch (Reg) {
2662 case ARM::LR:
2663 LRSpilled = true;
2664 [[fallthrough]];
2665 case ARM::R0: case ARM::R1:
2666 case ARM::R2: case ARM::R3:
2667 case ARM::R4: case ARM::R5:
2668 case ARM::R6: case ARM::R7:
2669 CS1Spilled = true;
2670 break;
2671 default:
2672 break;
2673 }
2674 } else {
2675 if (PushPopSplit != ARMSubtarget::SplitR7) {
2676 UnspilledCS1GPRs.push_back(Elt: Reg);
2677 continue;
2678 }
2679
2680 switch (Reg) {
2681 case ARM::R0: case ARM::R1:
2682 case ARM::R2: case ARM::R3:
2683 case ARM::R4: case ARM::R5:
2684 case ARM::R6: case ARM::R7:
2685 case ARM::LR:
2686 UnspilledCS1GPRs.push_back(Elt: Reg);
2687 break;
2688 default:
2689 UnspilledCS2GPRs.push_back(Elt: Reg);
2690 break;
2691 }
2692 }
2693 }
2694
2695 bool ForceLRSpill = false;
2696 if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2697 unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2698 // Force LR to be spilled if the Thumb function size is > 2048. This enables
2699 // use of BL to implement far jump.
2700 if (FnSize >= (1 << 11)) {
2701 CanEliminateFrame = false;
2702 ForceLRSpill = true;
2703 }
2704 }
2705
2706 // If any of the stack slot references may be out of range of an immediate
2707 // offset, make sure a register (or a spill slot) is available for the
2708 // register scavenger. Note that if we're indexing off the frame pointer, the
2709 // effective stack size is 4 bytes larger since the FP points to the stack
2710 // slot of the previous FP. Also, if we have variable sized objects in the
2711 // function, stack slot references will often be negative, and some of
2712 // our instructions are positive-offset only, so conservatively consider
2713 // that case to want a spill slot (or register) as well. Similarly, if
2714 // the function adjusts the stack pointer during execution and the
2715 // adjustments aren't already part of our stack size estimate, our offset
2716 // calculations may be off, so be conservative.
2717 // FIXME: We could add logic to be more precise about negative offsets
2718 // and which instructions will need a scratch register for them. Is it
2719 // worth the effort and added fragility?
2720 unsigned EstimatedStackSize =
2721 MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
2722
2723 // Determine biggest (positive) SP offset in MachineFrameInfo.
2724 int MaxFixedOffset = 0;
2725 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
2726 int MaxObjectOffset = MFI.getObjectOffset(ObjectIdx: I) + MFI.getObjectSize(ObjectIdx: I);
2727 MaxFixedOffset = std::max(a: MaxFixedOffset, b: MaxObjectOffset);
2728 }
2729
2730 bool HasFP = hasFP(MF);
2731 if (HasFP) {
2732 if (AFI->hasStackFrame())
2733 EstimatedStackSize += 4;
2734 } else {
2735 // If FP is not used, SP will be used to access arguments, so count the
2736 // size of arguments into the estimation.
2737 EstimatedStackSize += MaxFixedOffset;
2738 }
2739 EstimatedStackSize += 16; // For possible paddings.
2740
2741 unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2742 bool HasNonSPFrameIndex = false;
2743 if (AFI->isThumb1OnlyFunction()) {
2744 // For Thumb1, don't bother to iterate over the function. The only
2745 // instruction that requires an emergency spill slot is a store to a
2746 // frame index.
2747 //
2748 // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2749 // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2750 // a 5-bit unsigned immediate.
2751 //
2752 // We could try to check if the function actually contains a tSTRspi
2753 // that might need the spill slot, but it's not really important.
2754 // Functions with VLAs or extremely large call frames are rare, and
2755 // if a function is allocating more than 1KB of stack, an extra 4-byte
2756 // slot probably isn't relevant.
2757 //
2758 // A special case is the scenario where r11 is used as FP, where accesses
2759 // to a frame index will require its value to be moved into a low reg.
2760 // This is handled later on, once we are able to determine if we have any
2761 // fp-relative accesses.
2762 if (RegInfo->hasBasePointer(MF))
2763 EstimatedRSStackSizeLimit = (1U << 5) * 4;
2764 else
2765 EstimatedRSStackSizeLimit = (1U << 8) * 4;
2766 EstimatedRSFixedSizeLimit = (1U << 5) * 4;
2767 } else {
2768 EstimatedRSStackSizeLimit =
2769 estimateRSStackSizeLimit(MF, TFI: this, HasNonSPFrameIndex);
2770 EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2771 }
2772 // Final estimate of whether sp or bp-relative accesses might require
2773 // scavenging.
2774 bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2775
2776 // If the stack pointer moves and we don't have a base pointer, the
2777 // estimate logic doesn't work. The actual offsets might be larger when
2778 // we're constructing a call frame, or we might need to use negative
2779 // offsets from fp.
2780 bool HasMovingSP = MFI.hasVarSizedObjects() ||
2781 (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2782 bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
2783
2784 // If we have a frame pointer, we assume arguments will be accessed
2785 // relative to the frame pointer. Check whether fp-relative accesses to
2786 // arguments require scavenging.
2787 //
2788 // We could do slightly better on Thumb1; in some cases, an sp-relative
2789 // offset would be legal even though an fp-relative offset is not.
2790 int MaxFPOffset = getMaxFPOffset(STI, AFI: *AFI, MF);
2791 bool HasLargeArgumentList =
2792 HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2793
2794 bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
2795 HasLargeArgumentList || HasNonSPFrameIndex;
2796 LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2797 << "; EstimatedStack: " << EstimatedStackSize
2798 << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2799 << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2800 if (BigFrameOffsets ||
2801 !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
2802 AFI->setHasStackFrame(true);
2803
2804 // Save the FP if:
2805 // 1. We currently need it (HasFP), OR
2806 // 2. We might need it later due to stack realignment from aligned DPRCS2
2807 // saves (which will make hasFP() become true in emitPrologue).
2808 if (HasFP || (isFPReserved(MF) && AFI->getNumAlignedDPRCS2Regs() > 0)) {
2809 SavedRegs.set(FramePtr);
2810 // If the frame pointer is required by the ABI, also spill LR so that we
2811 // emit a complete frame record.
2812 if ((requiresAAPCSFrameRecord(MF) ||
2813 MF.getTarget().Options.DisableFramePointerElim(MF)) &&
2814 !LRSpilled) {
2815 SavedRegs.set(ARM::LR);
2816 LRSpilled = true;
2817 NumGPRSpills++;
2818 auto LRPos = llvm::find(Range&: UnspilledCS1GPRs, Val: ARM::LR);
2819 if (LRPos != UnspilledCS1GPRs.end())
2820 UnspilledCS1GPRs.erase(CI: LRPos);
2821 }
2822 auto FPPos = llvm::find(Range&: UnspilledCS1GPRs, Val: FramePtr);
2823 if (FPPos != UnspilledCS1GPRs.end())
2824 UnspilledCS1GPRs.erase(CI: FPPos);
2825 NumGPRSpills++;
2826 if (FramePtr == ARM::R7)
2827 CS1Spilled = true;
2828 }
2829
2830 // This is the number of extra spills inserted for callee-save GPRs which
2831 // would not otherwise be used by the function. When greater than zero it
2832 // guaranteees that it is possible to scavenge a register to hold the
2833 // address of a stack slot. On Thumb1, the register must be a valid operand
2834 // to tSTRi, i.e. r4-r7. For other subtargets, this is any GPR, i.e. r4-r11
2835 // or lr.
2836 //
2837 // If we don't insert a spill, we instead allocate an emergency spill
2838 // slot, which can be used by scavenging to spill an arbitrary register.
2839 //
2840 // We currently don't try to figure out whether any specific instruction
2841 // requires scavening an additional register.
2842 unsigned NumExtraCSSpill = 0;
2843
2844 if (AFI->isThumb1OnlyFunction()) {
2845 // For Thumb1-only targets, we need some low registers when we save and
2846 // restore the high registers (which aren't allocatable, but could be
2847 // used by inline assembly) because the push/pop instructions can not
2848 // access high registers. If necessary, we might need to push more low
2849 // registers to ensure that there is at least one free that can be used
2850 // for the saving & restoring, and preferably we should ensure that as
2851 // many as are needed are available so that fewer push/pop instructions
2852 // are required.
2853
2854 // Low registers which are not currently pushed, but could be (r4-r7).
2855 SmallVector<unsigned, 4> AvailableRegs;
2856
2857 // Unused argument registers (r0-r3) can be clobbered in the prologue for
2858 // free.
2859 int EntryRegDeficit = 0;
2860 for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2861 if (!MF.getRegInfo().isLiveIn(Reg)) {
2862 --EntryRegDeficit;
2863 LLVM_DEBUG(dbgs()
2864 << printReg(Reg, TRI)
2865 << " is unused argument register, EntryRegDeficit = "
2866 << EntryRegDeficit << "\n");
2867 }
2868 }
2869
2870 // Unused return registers can be clobbered in the epilogue for free.
2871 int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
2872 LLVM_DEBUG(dbgs() << AFI->getReturnRegsCount()
2873 << " return regs used, ExitRegDeficit = "
2874 << ExitRegDeficit << "\n");
2875
2876 int RegDeficit = std::max(a: EntryRegDeficit, b: ExitRegDeficit);
2877 LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2878
2879 // r4-r6 can be used in the prologue if they are pushed by the first push
2880 // instruction.
2881 for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2882 if (SavedRegs.test(Idx: Reg)) {
2883 --RegDeficit;
2884 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2885 << " is saved low register, RegDeficit = "
2886 << RegDeficit << "\n");
2887 } else {
2888 AvailableRegs.push_back(Elt: Reg);
2889 LLVM_DEBUG(
2890 dbgs()
2891 << printReg(Reg, TRI)
2892 << " is non-saved low register, adding to AvailableRegs\n");
2893 }
2894 }
2895
2896 // r7 can be used if it is not being used as the frame pointer.
2897 if (!HasFP || FramePtr != ARM::R7) {
2898 if (SavedRegs.test(Idx: ARM::R7)) {
2899 --RegDeficit;
2900 LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2901 << RegDeficit << "\n");
2902 } else {
2903 AvailableRegs.push_back(Elt: ARM::R7);
2904 LLVM_DEBUG(
2905 dbgs()
2906 << "%r7 is non-saved low register, adding to AvailableRegs\n");
2907 }
2908 }
2909
2910 // Each of r8-r11 needs to be copied to a low register, then pushed.
2911 for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2912 if (SavedRegs.test(Idx: Reg)) {
2913 ++RegDeficit;
2914 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2915 << " is saved high register, RegDeficit = "
2916 << RegDeficit << "\n");
2917 }
2918 }
2919
2920 // LR can only be used by PUSH, not POP, and can't be used at all if the
2921 // llvm.returnaddress intrinsic is used. This is only worth doing if we
2922 // are more limited at function entry than exit.
2923 if ((EntryRegDeficit > ExitRegDeficit) &&
2924 !(MF.getRegInfo().isLiveIn(Reg: ARM::LR) &&
2925 MF.getFrameInfo().isReturnAddressTaken())) {
2926 if (SavedRegs.test(Idx: ARM::LR)) {
2927 --RegDeficit;
2928 LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2929 << RegDeficit << "\n");
2930 } else {
2931 AvailableRegs.push_back(Elt: ARM::LR);
2932 LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2933 }
2934 }
2935
2936 // If there are more high registers that need pushing than low registers
2937 // available, push some more low registers so that we can use fewer push
2938 // instructions. This might not reduce RegDeficit all the way to zero,
2939 // because we can only guarantee that r4-r6 are available, but r8-r11 may
2940 // need saving.
2941 LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2942 for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
2943 unsigned Reg = AvailableRegs.pop_back_val();
2944 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2945 << " to make up reg deficit\n");
2946 SavedRegs.set(Reg);
2947 NumGPRSpills++;
2948 CS1Spilled = true;
2949 assert(!MRI.isReserved(Reg) && "Should not be reserved");
2950 if (Reg != ARM::LR && !MRI.isPhysRegUsed(PhysReg: Reg))
2951 NumExtraCSSpill++;
2952 UnspilledCS1GPRs.erase(CI: llvm::find(Range&: UnspilledCS1GPRs, Val: Reg));
2953 if (Reg == ARM::LR)
2954 LRSpilled = true;
2955 }
2956 LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2957 << "\n");
2958 }
2959
2960 // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2961 // restore LR in that case.
2962 bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2963
2964 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2965 // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2966 if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2967 SavedRegs.set(ARM::LR);
2968 NumGPRSpills++;
2969 SmallVectorImpl<unsigned>::iterator LRPos;
2970 LRPos = llvm::find(Range&: UnspilledCS1GPRs, Val: (unsigned)ARM::LR);
2971 if (LRPos != UnspilledCS1GPRs.end())
2972 UnspilledCS1GPRs.erase(CI: LRPos);
2973
2974 ForceLRSpill = false;
2975 if (!MRI.isReserved(PhysReg: ARM::LR) && !MRI.isPhysRegUsed(PhysReg: ARM::LR) &&
2976 !AFI->isThumb1OnlyFunction())
2977 NumExtraCSSpill++;
2978 }
2979
2980 // If stack and double are 8-byte aligned and we are spilling an odd number
2981 // of GPRs, spill one extra callee save GPR so we won't have to pad between
2982 // the integer and double callee save areas.
2983 LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2984 const Align TargetAlign = getStackAlign();
2985 if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
2986 if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2987 for (unsigned Reg : UnspilledCS1GPRs) {
2988 // Don't spill high register if the function is thumb. In the case of
2989 // Windows on ARM, accept R11 (frame pointer)
2990 if (!AFI->isThumbFunction() ||
2991 (STI.isTargetWindows() && Reg == ARM::R11) ||
2992 isARMLowRegister(Reg) ||
2993 (Reg == ARM::LR && !ExpensiveLRRestore)) {
2994 SavedRegs.set(Reg);
2995 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2996 << " to make up alignment\n");
2997 if (!MRI.isReserved(PhysReg: Reg) && !MRI.isPhysRegUsed(PhysReg: Reg) &&
2998 !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2999 NumExtraCSSpill++;
3000 break;
3001 }
3002 }
3003 } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
3004 unsigned Reg = UnspilledCS2GPRs.front();
3005 SavedRegs.set(Reg);
3006 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
3007 << " to make up alignment\n");
3008 if (!MRI.isReserved(PhysReg: Reg) && !MRI.isPhysRegUsed(PhysReg: Reg))
3009 NumExtraCSSpill++;
3010 }
3011 }
3012
3013 // Estimate if we might need to scavenge registers at some point in order
3014 // to materialize a stack offset. If so, either spill one additional
3015 // callee-saved register or reserve a special spill slot to facilitate
3016 // register scavenging. Thumb1 needs a spill slot for stack pointer
3017 // adjustments and for frame index accesses when FP is high register,
3018 // even when the frame itself is small.
3019 unsigned RegsNeeded = 0;
3020 if (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, TFI: *this)) {
3021 RegsNeeded++;
3022 // With thumb1 execute-only we may need an additional register for saving
3023 // and restoring the CPSR.
3024 if (AFI->isThumb1OnlyFunction() && STI.genExecuteOnly() && !STI.useMovt())
3025 RegsNeeded++;
3026 }
3027
3028 if (RegsNeeded > NumExtraCSSpill) {
3029 // If any non-reserved CS register isn't spilled, just spill one or two
3030 // extra. That should take care of it!
3031 unsigned NumExtras = TargetAlign.value() / 4;
3032 SmallVector<unsigned, 2> Extras;
3033 while (NumExtras && !UnspilledCS1GPRs.empty()) {
3034 unsigned Reg = UnspilledCS1GPRs.pop_back_val();
3035 if (!MRI.isReserved(PhysReg: Reg) &&
3036 (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
3037 Extras.push_back(Elt: Reg);
3038 NumExtras--;
3039 }
3040 }
3041 // For non-Thumb1 functions, also check for hi-reg CS registers
3042 if (!AFI->isThumb1OnlyFunction()) {
3043 while (NumExtras && !UnspilledCS2GPRs.empty()) {
3044 unsigned Reg = UnspilledCS2GPRs.pop_back_val();
3045 if (!MRI.isReserved(PhysReg: Reg)) {
3046 Extras.push_back(Elt: Reg);
3047 NumExtras--;
3048 }
3049 }
3050 }
3051 if (NumExtras == 0) {
3052 for (unsigned Reg : Extras) {
3053 SavedRegs.set(Reg);
3054 if (!MRI.isPhysRegUsed(PhysReg: Reg))
3055 NumExtraCSSpill++;
3056 }
3057 }
3058 while ((RegsNeeded > NumExtraCSSpill) && RS) {
3059 // Reserve a slot closest to SP or frame pointer.
3060 LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
3061 const TargetRegisterClass &RC = ARM::GPRRegClass;
3062 unsigned Size = TRI->getSpillSize(RC);
3063 Align Alignment = TRI->getSpillAlign(RC);
3064 RS->addScavengingFrameIndex(
3065 FI: MFI.CreateSpillStackObject(Size, Alignment));
3066 --RegsNeeded;
3067 }
3068 }
3069 }
3070
3071 if (ForceLRSpill)
3072 SavedRegs.set(ARM::LR);
3073 AFI->setLRIsSpilled(SavedRegs.test(Idx: ARM::LR));
3074}
3075
3076void ARMFrameLowering::updateLRRestored(MachineFunction &MF) {
3077 MachineFrameInfo &MFI = MF.getFrameInfo();
3078 if (!MFI.isCalleeSavedInfoValid())
3079 return;
3080
3081 // Check if all terminators do not implicitly use LR. Then we can 'restore' LR
3082 // into PC so it is not live out of the return block: Clear the Restored bit
3083 // in that case.
3084 for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
3085 if (Info.getReg() != ARM::LR)
3086 continue;
3087 if (all_of(Range&: MF, P: [](const MachineBasicBlock &MBB) {
3088 return all_of(Range: MBB.terminators(), P: [](const MachineInstr &Term) {
3089 return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET ||
3090 Term.getOpcode() == ARM::t2LDMIA_RET ||
3091 Term.getOpcode() == ARM::tPOP_RET;
3092 });
3093 })) {
3094 Info.setRestored(false);
3095 break;
3096 }
3097 }
3098}
3099
3100void ARMFrameLowering::processFunctionBeforeFrameFinalized(
3101 MachineFunction &MF, RegScavenger *RS) const {
3102 TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS);
3103 updateLRRestored(MF);
3104}
3105
3106void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF,
3107 BitVector &SavedRegs) const {
3108 TargetFrameLowering::getCalleeSaves(MF, SavedRegs);
3109
3110 // If we have the "returned" parameter attribute which guarantees that we
3111 // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
3112 // record that fact for IPRA.
3113 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3114 if (AFI->getPreservesR0())
3115 SavedRegs.set(ARM::R0);
3116}
3117
3118bool ARMFrameLowering::assignCalleeSavedSpillSlots(
3119 MachineFunction &MF, const TargetRegisterInfo *TRI,
3120 std::vector<CalleeSavedInfo> &CSI) const {
3121 // For CMSE entry functions, handle floating-point context as if it was a
3122 // callee-saved register.
3123 if (STI.hasV8_1MMainlineOps() &&
3124 MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction()) {
3125 CSI.emplace_back(args: ARM::FPCXTNS);
3126 CSI.back().setRestored(false);
3127 }
3128
3129 // For functions, which sign their return address, upon function entry, the
3130 // return address PAC is computed in R12. Treat R12 as a callee-saved register
3131 // in this case.
3132 const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
3133 if (AFI.shouldSignReturnAddress()) {
3134 // The order of register must match the order we push them, because the
3135 // PEI assigns frame indices in that order. That order depends on the
3136 // PushPopSplitVariation, there are only two cases which we use with return
3137 // address signing:
3138 switch (STI.getPushPopSplitVariation(MF)) {
3139 case ARMSubtarget::SplitR7:
3140 // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
3141 CSI.insert(position: find_if(Range&: CSI,
3142 P: [=](const auto &CS) {
3143 MCRegister Reg = CS.getReg();
3144 return Reg == ARM::R10 || Reg == ARM::R11 ||
3145 Reg == ARM::R8 || Reg == ARM::R9 ||
3146 ARM::DPRRegClass.contains(Reg);
3147 }),
3148 x: CalleeSavedInfo(ARM::R12));
3149 break;
3150 case ARMSubtarget::SplitR11AAPCSSignRA:
3151 // With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR
3152 // on the stack.
3153 CSI.insert(position: CSI.begin(), x: CalleeSavedInfo(ARM::R12));
3154 break;
3155 case ARMSubtarget::NoSplit:
3156 assert(!MF.getTarget().Options.DisableFramePointerElim(MF) &&
3157 "ABI-required frame pointers need a CSR split when signing return "
3158 "address.");
3159 CSI.insert(position: find_if(Range&: CSI,
3160 P: [=](const auto &CS) {
3161 MCRegister Reg = CS.getReg();
3162 return Reg != ARM::LR;
3163 }),
3164 x: CalleeSavedInfo(ARM::R12));
3165 break;
3166 default:
3167 llvm_unreachable("Unexpected CSR split with return address signing");
3168 }
3169 }
3170
3171 return false;
3172}
3173
3174const TargetFrameLowering::SpillSlot *
3175ARMFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
3176 static const SpillSlot FixedSpillOffsets[] = {{.Reg: ARM::FPCXTNS, .Offset: -4}};
3177 NumEntries = std::size(FixedSpillOffsets);
3178 return FixedSpillOffsets;
3179}
3180
3181MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
3182 MachineFunction &MF, MachineBasicBlock &MBB,
3183 MachineBasicBlock::iterator I) const {
3184 const ARMBaseInstrInfo &TII =
3185 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3186 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3187 bool isARM = !AFI->isThumbFunction();
3188 DebugLoc dl = I->getDebugLoc();
3189 unsigned Opc = I->getOpcode();
3190 bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
3191 unsigned CalleePopAmount = IsDestroy ? I->getOperand(i: 1).getImm() : 0;
3192
3193 assert(!AFI->isThumb1OnlyFunction() &&
3194 "This eliminateCallFramePseudoInstr does not support Thumb1!");
3195
3196 int PIdx = I->findFirstPredOperandIdx();
3197 ARMCC::CondCodes Pred = (PIdx == -1)
3198 ? ARMCC::AL
3199 : (ARMCC::CondCodes)I->getOperand(i: PIdx).getImm();
3200 unsigned PredReg = TII.getFramePred(MI: *I);
3201
3202 if (!hasReservedCallFrame(MF)) {
3203 // Bail early if the callee is expected to do the adjustment.
3204 if (IsDestroy && CalleePopAmount != -1U)
3205 return MBB.erase(I);
3206
3207 // If we have alloca, convert as follows:
3208 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
3209 // ADJCALLSTACKUP -> add, sp, sp, amount
3210 unsigned Amount = TII.getFrameSize(I: *I);
3211 if (Amount != 0) {
3212 // We need to keep the stack aligned properly. To do this, we round the
3213 // amount of space needed for the outgoing arguments up to the next
3214 // alignment boundary.
3215 Amount = alignSPAdjust(SPAdj: Amount);
3216
3217 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
3218 emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: -Amount, MIFlags: MachineInstr::NoFlags,
3219 Pred, PredReg);
3220 } else {
3221 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
3222 emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: Amount, MIFlags: MachineInstr::NoFlags,
3223 Pred, PredReg);
3224 }
3225 }
3226 } else if (CalleePopAmount != -1U) {
3227 // If the calling convention demands that the callee pops arguments from the
3228 // stack, we want to add it back if we have a reserved call frame.
3229 emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: -CalleePopAmount,
3230 MIFlags: MachineInstr::NoFlags, Pred, PredReg);
3231 }
3232 return MBB.erase(I);
3233}
3234
3235/// Get the minimum constant for ARM that is greater than or equal to the
3236/// argument. In ARM, constants can have any value that can be produced by
3237/// rotating an 8-bit value to the right by an even number of bits within a
3238/// 32-bit word.
3239static uint32_t alignToARMConstant(uint32_t Value) {
3240 unsigned Shifted = 0;
3241
3242 if (Value == 0)
3243 return 0;
3244
3245 while (!(Value & 0xC0000000)) {
3246 Value = Value << 2;
3247 Shifted += 2;
3248 }
3249
3250 bool Carry = (Value & 0x00FFFFFF);
3251 Value = ((Value & 0xFF000000) >> 24) + Carry;
3252
3253 if (Value & 0x0000100)
3254 Value = Value & 0x000001FC;
3255
3256 if (Shifted > 24)
3257 Value = Value >> (Shifted - 24);
3258 else
3259 Value = Value << (24 - Shifted);
3260
3261 return Value;
3262}
3263
3264// The stack limit in the TCB is set to this many bytes above the actual
3265// stack limit.
3266static const uint64_t kSplitStackAvailable = 256;
3267
3268// Adjust the function prologue to enable split stacks. This currently only
3269// supports android and linux.
3270//
3271// The ABI of the segmented stack prologue is a little arbitrarily chosen, but
3272// must be well defined in order to allow for consistent implementations of the
3273// __morestack helper function. The ABI is also not a normal ABI in that it
3274// doesn't follow the normal calling conventions because this allows the
3275// prologue of each function to be optimized further.
3276//
3277// Currently, the ABI looks like (when calling __morestack)
3278//
3279// * r4 holds the minimum stack size requested for this function call
3280// * r5 holds the stack size of the arguments to the function
3281// * the beginning of the function is 3 instructions after the call to
3282// __morestack
3283//
3284// Implementations of __morestack should use r4 to allocate a new stack, r5 to
3285// place the arguments on to the new stack, and the 3-instruction knowledge to
3286// jump directly to the body of the function when working on the new stack.
3287//
3288// An old (and possibly no longer compatible) implementation of __morestack for
3289// ARM can be found at [1].
3290//
3291// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
3292void ARMFrameLowering::adjustForSegmentedStacks(
3293 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3294 unsigned Opcode;
3295 const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
3296 bool Thumb = ST->isThumb();
3297 bool Thumb2 = ST->isThumb2();
3298
3299 // Sadly, this currently doesn't support varargs, platforms other than
3300 // android/linux. Note that thumb1/thumb2 are support for android/linux.
3301 if (MF.getFunction().isVarArg())
3302 report_fatal_error(reason: "Segmented stacks do not support vararg functions.");
3303 if (!ST->isTargetAndroid() && !ST->isTargetLinux())
3304 report_fatal_error(reason: "Segmented stacks not supported on this platform.");
3305
3306 MachineFrameInfo &MFI = MF.getFrameInfo();
3307 const ARMBaseInstrInfo &TII =
3308 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3309 ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
3310 DebugLoc DL;
3311
3312 if (!MFI.needsSplitStackProlog())
3313 return;
3314
3315 uint64_t StackSize = MFI.getStackSize();
3316
3317 // Use R4 and R5 as scratch registers.
3318 // We save R4 and R5 before use and restore them before leaving the function.
3319 unsigned ScratchReg0 = ARM::R4;
3320 unsigned ScratchReg1 = ARM::R5;
3321 unsigned MovOp = ST->useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
3322 uint64_t AlignedStackSize;
3323
3324 MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
3325 MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
3326 MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
3327 MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
3328 MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
3329
3330 // Grab everything that reaches PrologueMBB to update there liveness as well.
3331 SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
3332 SmallVector<MachineBasicBlock *, 2> WalkList;
3333 WalkList.push_back(Elt: &PrologueMBB);
3334
3335 do {
3336 MachineBasicBlock *CurMBB = WalkList.pop_back_val();
3337 for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
3338 if (BeforePrologueRegion.insert(Ptr: PredBB).second)
3339 WalkList.push_back(Elt: PredBB);
3340 }
3341 } while (!WalkList.empty());
3342
3343 // The order in that list is important.
3344 // The blocks will all be inserted before PrologueMBB using that order.
3345 // Therefore the block that should appear first in the CFG should appear
3346 // first in the list.
3347 MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
3348 PostStackMBB};
3349
3350 BeforePrologueRegion.insert_range(R&: AddedBlocks);
3351
3352 for (const auto &LI : PrologueMBB.liveins()) {
3353 for (MachineBasicBlock *PredBB : BeforePrologueRegion)
3354 PredBB->addLiveIn(RegMaskPair: LI);
3355 }
3356
3357 // Remove the newly added blocks from the list, since we know
3358 // we do not have to do the following updates for them.
3359 for (MachineBasicBlock *B : AddedBlocks) {
3360 BeforePrologueRegion.erase(Ptr: B);
3361 MF.insert(MBBI: PrologueMBB.getIterator(), MBB: B);
3362 }
3363
3364 for (MachineBasicBlock *MBB : BeforePrologueRegion) {
3365 // Make sure the LiveIns are still sorted and unique.
3366 MBB->sortUniqueLiveIns();
3367 // Replace the edges to PrologueMBB by edges to the sequences
3368 // we are about to add, but only update for immediate predecessors.
3369 if (MBB->isSuccessor(MBB: &PrologueMBB))
3370 MBB->ReplaceUsesOfBlockWith(Old: &PrologueMBB, New: AddedBlocks[0]);
3371 }
3372
3373 // The required stack size that is aligned to ARM constant criterion.
3374 AlignedStackSize = alignToARMConstant(Value: StackSize);
3375
3376 // When the frame size is less than 256 we just compare the stack
3377 // boundary directly to the value of the stack pointer, per gcc.
3378 bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
3379
3380 // We will use two of the callee save registers as scratch registers so we
3381 // need to save those registers onto the stack.
3382 // We will use SR0 to hold stack limit and SR1 to hold the stack size
3383 // requested and arguments for __morestack().
3384 // SR0: Scratch Register #0
3385 // SR1: Scratch Register #1
3386 // push {SR0, SR1}
3387 if (Thumb) {
3388 BuildMI(BB: PrevStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPUSH))
3389 .add(MOs: predOps(Pred: ARMCC::AL))
3390 .addReg(RegNo: ScratchReg0)
3391 .addReg(RegNo: ScratchReg1);
3392 } else {
3393 BuildMI(BB: PrevStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::STMDB_UPD))
3394 .addReg(RegNo: ARM::SP, Flags: RegState::Define)
3395 .addReg(RegNo: ARM::SP)
3396 .add(MOs: predOps(Pred: ARMCC::AL))
3397 .addReg(RegNo: ScratchReg0)
3398 .addReg(RegNo: ScratchReg1);
3399 }
3400
3401 // Emit the relevant DWARF information about the change in stack pointer as
3402 // well as where to find both r4 and r5 (the callee-save registers)
3403 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3404 CFIInstBuilder CFIBuilder(PrevStackMBB, MachineInstr::NoFlags);
3405 CFIBuilder.buildDefCFAOffset(Offset: 8);
3406 CFIBuilder.buildOffset(Reg: ScratchReg1, Offset: -4);
3407 CFIBuilder.buildOffset(Reg: ScratchReg0, Offset: -8);
3408 }
3409
3410 // mov SR1, sp
3411 if (Thumb) {
3412 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ScratchReg1)
3413 .addReg(RegNo: ARM::SP)
3414 .add(MOs: predOps(Pred: ARMCC::AL));
3415 } else if (CompareStackPointer) {
3416 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVr), DestReg: ScratchReg1)
3417 .addReg(RegNo: ARM::SP)
3418 .add(MOs: predOps(Pred: ARMCC::AL))
3419 .add(MO: condCodeOp());
3420 }
3421
3422 // sub SR1, sp, #StackSize
3423 if (!CompareStackPointer && Thumb) {
3424 if (AlignedStackSize < 256) {
3425 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tSUBi8), DestReg: ScratchReg1)
3426 .add(MO: condCodeOp())
3427 .addReg(RegNo: ScratchReg1)
3428 .addImm(Val: AlignedStackSize)
3429 .add(MOs: predOps(Pred: ARMCC::AL));
3430 } else {
3431 if (Thumb2 || ST->genExecuteOnly()) {
3432 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg0)
3433 .addImm(Val: AlignedStackSize);
3434 } else {
3435 auto MBBI = McrMBB->end();
3436 auto RegInfo = STI.getRegisterInfo();
3437 RegInfo->emitLoadConstPool(MBB&: *McrMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: 0,
3438 Val: AlignedStackSize);
3439 }
3440 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tSUBrr), DestReg: ScratchReg1)
3441 .add(MO: condCodeOp())
3442 .addReg(RegNo: ScratchReg1)
3443 .addReg(RegNo: ScratchReg0)
3444 .add(MOs: predOps(Pred: ARMCC::AL));
3445 }
3446 } else if (!CompareStackPointer) {
3447 if (AlignedStackSize < 256) {
3448 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::SUBri), DestReg: ScratchReg1)
3449 .addReg(RegNo: ARM::SP)
3450 .addImm(Val: AlignedStackSize)
3451 .add(MOs: predOps(Pred: ARMCC::AL))
3452 .add(MO: condCodeOp());
3453 } else {
3454 auto MBBI = McrMBB->end();
3455 auto RegInfo = STI.getRegisterInfo();
3456 RegInfo->emitLoadConstPool(MBB&: *McrMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: 0,
3457 Val: AlignedStackSize);
3458 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::SUBrr), DestReg: ScratchReg1)
3459 .addReg(RegNo: ARM::SP)
3460 .addReg(RegNo: ScratchReg0)
3461 .add(MOs: predOps(Pred: ARMCC::AL))
3462 .add(MO: condCodeOp());
3463 }
3464 }
3465
3466 if (Thumb && ST->isThumb1Only()) {
3467 if (ST->genExecuteOnly()) {
3468 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg0)
3469 .addExternalSymbol(FnName: "__STACK_LIMIT");
3470 } else {
3471 unsigned PCLabelId = ARMFI->createPICLabelUId();
3472 ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(
3473 C&: MF.getFunction().getContext(), s: "__STACK_LIMIT", ID: PCLabelId, PCAdj: 0);
3474 MachineConstantPool *MCP = MF.getConstantPool();
3475 unsigned CPI = MCP->getConstantPoolIndex(V: NewCPV, Alignment: Align(4));
3476
3477 // ldr SR0, [pc, offset(STACK_LIMIT)]
3478 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tLDRpci), DestReg: ScratchReg0)
3479 .addConstantPoolIndex(Idx: CPI)
3480 .add(MOs: predOps(Pred: ARMCC::AL));
3481 }
3482
3483 // ldr SR0, [SR0]
3484 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tLDRi), DestReg: ScratchReg0)
3485 .addReg(RegNo: ScratchReg0)
3486 .addImm(Val: 0)
3487 .add(MOs: predOps(Pred: ARMCC::AL));
3488 } else {
3489 // Get TLS base address from the coprocessor
3490 // mrc p15, #0, SR0, c13, c0, #3
3491 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: Thumb ? ARM::t2MRC : ARM::MRC),
3492 DestReg: ScratchReg0)
3493 .addImm(Val: 15)
3494 .addImm(Val: 0)
3495 .addImm(Val: 13)
3496 .addImm(Val: 0)
3497 .addImm(Val: 3)
3498 .add(MOs: predOps(Pred: ARMCC::AL));
3499
3500 // Use the last tls slot on android and a private field of the TCP on linux.
3501 assert(ST->isTargetAndroid() || ST->isTargetLinux());
3502 unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
3503
3504 // Get the stack limit from the right offset
3505 // ldr SR0, [sr0, #4 * TlsOffset]
3506 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
3507 DestReg: ScratchReg0)
3508 .addReg(RegNo: ScratchReg0)
3509 .addImm(Val: 4 * TlsOffset)
3510 .add(MOs: predOps(Pred: ARMCC::AL));
3511 }
3512
3513 // Compare stack limit with stack size requested.
3514 // cmp SR0, SR1
3515 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
3516 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode))
3517 .addReg(RegNo: ScratchReg0)
3518 .addReg(RegNo: ScratchReg1)
3519 .add(MOs: predOps(Pred: ARMCC::AL));
3520
3521 // This jump is taken if StackLimit <= SP - stack required.
3522 Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
3523 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode))
3524 .addMBB(MBB: PostStackMBB)
3525 .addImm(Val: ARMCC::LS)
3526 .addReg(RegNo: ARM::CPSR);
3527
3528 // Calling __morestack(StackSize, Size of stack arguments).
3529 // __morestack knows that the stack size requested is in SR0(r4)
3530 // and amount size of stack arguments is in SR1(r5).
3531
3532 // Pass first argument for the __morestack by Scratch Register #0.
3533 // The amount size of stack required
3534 if (Thumb) {
3535 if (AlignedStackSize < 256) {
3536 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVi8), DestReg: ScratchReg0)
3537 .add(MO: condCodeOp())
3538 .addImm(Val: AlignedStackSize)
3539 .add(MOs: predOps(Pred: ARMCC::AL));
3540 } else {
3541 if (Thumb2 || ST->genExecuteOnly()) {
3542 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg0)
3543 .addImm(Val: AlignedStackSize);
3544 } else {
3545 auto MBBI = AllocMBB->end();
3546 auto RegInfo = STI.getRegisterInfo();
3547 RegInfo->emitLoadConstPool(MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: 0,
3548 Val: AlignedStackSize);
3549 }
3550 }
3551 } else {
3552 if (AlignedStackSize < 256) {
3553 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVi), DestReg: ScratchReg0)
3554 .addImm(Val: AlignedStackSize)
3555 .add(MOs: predOps(Pred: ARMCC::AL))
3556 .add(MO: condCodeOp());
3557 } else {
3558 auto MBBI = AllocMBB->end();
3559 auto RegInfo = STI.getRegisterInfo();
3560 RegInfo->emitLoadConstPool(MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: 0,
3561 Val: AlignedStackSize);
3562 }
3563 }
3564
3565 // Pass second argument for the __morestack by Scratch Register #1.
3566 // The amount size of stack consumed to save function arguments.
3567 if (Thumb) {
3568 if (ARMFI->getArgumentStackSize() < 256) {
3569 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVi8), DestReg: ScratchReg1)
3570 .add(MO: condCodeOp())
3571 .addImm(Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()))
3572 .add(MOs: predOps(Pred: ARMCC::AL));
3573 } else {
3574 if (Thumb2 || ST->genExecuteOnly()) {
3575 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg1)
3576 .addImm(Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3577 } else {
3578 auto MBBI = AllocMBB->end();
3579 auto RegInfo = STI.getRegisterInfo();
3580 RegInfo->emitLoadConstPool(
3581 MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg1, SubIdx: 0,
3582 Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3583 }
3584 }
3585 } else {
3586 if (alignToARMConstant(Value: ARMFI->getArgumentStackSize()) < 256) {
3587 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVi), DestReg: ScratchReg1)
3588 .addImm(Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()))
3589 .add(MOs: predOps(Pred: ARMCC::AL))
3590 .add(MO: condCodeOp());
3591 } else {
3592 auto MBBI = AllocMBB->end();
3593 auto RegInfo = STI.getRegisterInfo();
3594 RegInfo->emitLoadConstPool(
3595 MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg1, SubIdx: 0,
3596 Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3597 }
3598 }
3599
3600 // push {lr} - Save return address of this function.
3601 if (Thumb) {
3602 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPUSH))
3603 .add(MOs: predOps(Pred: ARMCC::AL))
3604 .addReg(RegNo: ARM::LR);
3605 } else {
3606 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::STMDB_UPD))
3607 .addReg(RegNo: ARM::SP, Flags: RegState::Define)
3608 .addReg(RegNo: ARM::SP)
3609 .add(MOs: predOps(Pred: ARMCC::AL))
3610 .addReg(RegNo: ARM::LR);
3611 }
3612
3613 // Emit the DWARF info about the change in stack as well as where to find the
3614 // previous link register
3615 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3616 CFIInstBuilder CFIBuilder(AllocMBB, MachineInstr::NoFlags);
3617 CFIBuilder.buildDefCFAOffset(Offset: 12);
3618 CFIBuilder.buildOffset(Reg: ARM::LR, Offset: -12);
3619 }
3620
3621 // Call __morestack().
3622 if (Thumb) {
3623 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tBL))
3624 .add(MOs: predOps(Pred: ARMCC::AL))
3625 .addExternalSymbol(FnName: "__morestack");
3626 } else {
3627 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::BL))
3628 .addExternalSymbol(FnName: "__morestack");
3629 }
3630
3631 // pop {lr} - Restore return address of this original function.
3632 if (Thumb) {
3633 if (ST->isThumb1Only()) {
3634 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP))
3635 .add(MOs: predOps(Pred: ARMCC::AL))
3636 .addReg(RegNo: ScratchReg0);
3637 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::LR)
3638 .addReg(RegNo: ScratchReg0)
3639 .add(MOs: predOps(Pred: ARMCC::AL));
3640 } else {
3641 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::t2LDR_POST))
3642 .addReg(RegNo: ARM::LR, Flags: RegState::Define)
3643 .addReg(RegNo: ARM::SP, Flags: RegState::Define)
3644 .addReg(RegNo: ARM::SP)
3645 .addImm(Val: 4)
3646 .add(MOs: predOps(Pred: ARMCC::AL));
3647 }
3648 } else {
3649 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::LDMIA_UPD))
3650 .addReg(RegNo: ARM::SP, Flags: RegState::Define)
3651 .addReg(RegNo: ARM::SP)
3652 .add(MOs: predOps(Pred: ARMCC::AL))
3653 .addReg(RegNo: ARM::LR);
3654 }
3655
3656 // Restore SR0 and SR1 in case of __morestack() was called.
3657 // __morestack() will skip PostStackMBB block so we need to restore
3658 // scratch registers from here.
3659 // pop {SR0, SR1}
3660 if (Thumb) {
3661 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP))
3662 .add(MOs: predOps(Pred: ARMCC::AL))
3663 .addReg(RegNo: ScratchReg0)
3664 .addReg(RegNo: ScratchReg1);
3665 } else {
3666 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::LDMIA_UPD))
3667 .addReg(RegNo: ARM::SP, Flags: RegState::Define)
3668 .addReg(RegNo: ARM::SP)
3669 .add(MOs: predOps(Pred: ARMCC::AL))
3670 .addReg(RegNo: ScratchReg0)
3671 .addReg(RegNo: ScratchReg1);
3672 }
3673
3674 // Update the CFA offset now that we've popped
3675 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
3676 CFIInstBuilder(AllocMBB, MachineInstr::NoFlags).buildDefCFAOffset(Offset: 0);
3677
3678 // Return from this function.
3679 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ST->getReturnOpcode())).add(MOs: predOps(Pred: ARMCC::AL));
3680
3681 // Restore SR0 and SR1 in case of __morestack() was not called.
3682 // pop {SR0, SR1}
3683 if (Thumb) {
3684 BuildMI(BB: PostStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP))
3685 .add(MOs: predOps(Pred: ARMCC::AL))
3686 .addReg(RegNo: ScratchReg0)
3687 .addReg(RegNo: ScratchReg1);
3688 } else {
3689 BuildMI(BB: PostStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::LDMIA_UPD))
3690 .addReg(RegNo: ARM::SP, Flags: RegState::Define)
3691 .addReg(RegNo: ARM::SP)
3692 .add(MOs: predOps(Pred: ARMCC::AL))
3693 .addReg(RegNo: ScratchReg0)
3694 .addReg(RegNo: ScratchReg1);
3695 }
3696
3697 // Update the CFA offset now that we've popped
3698 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3699 CFIInstBuilder CFIBuilder(PostStackMBB, MachineInstr::NoFlags);
3700 CFIBuilder.buildDefCFAOffset(Offset: 0);
3701
3702 // Tell debuggers that r4 and r5 are now the same as they were in the
3703 // previous function, that they're the "Same Value".
3704 CFIBuilder.buildSameValue(Reg: ScratchReg0);
3705 CFIBuilder.buildSameValue(Reg: ScratchReg1);
3706 }
3707
3708 // Organizing MBB lists
3709 PostStackMBB->addSuccessor(Succ: &PrologueMBB);
3710
3711 AllocMBB->addSuccessor(Succ: PostStackMBB);
3712
3713 GetMBB->addSuccessor(Succ: PostStackMBB);
3714 GetMBB->addSuccessor(Succ: AllocMBB);
3715
3716 McrMBB->addSuccessor(Succ: GetMBB);
3717
3718 PrevStackMBB->addSuccessor(Succ: McrMBB);
3719
3720#ifdef EXPENSIVE_CHECKS
3721 MF.verify();
3722#endif
3723}
3724