1//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the ARM implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12//
13// This file contains the ARM implementation of TargetFrameLowering class.
14//
15// On ARM, stack frames are structured as follows:
16//
17// The stack grows downward.
18//
19// All of the individual frame areas on the frame below are optional, i.e. it's
20// possible to create a function so that the particular area isn't present
21// in the frame.
22//
23// At function entry, the "frame" looks as follows:
24//
25// | | Higher address
26// |-----------------------------------|
27// | |
28// | arguments passed on the stack |
29// | |
30// |-----------------------------------| <- sp
31// | | Lower address
32//
33//
34// After the prologue has run, the frame has the following general structure.
35// Technically the last frame area (VLAs) doesn't get created until in the
36// main function body, after the prologue is run. However, it's depicted here
37// for completeness.
38//
39// | | Higher address
40// |-----------------------------------|
41// | |
42// | arguments passed on the stack |
43// | |
44// |-----------------------------------| <- (sp at function entry)
45// | |
46// | varargs from registers |
47// | |
48// |-----------------------------------|
49// | |
50// | prev_lr |
51// | prev_fp |
52// | (a.k.a. "frame record") |
53// | |
54// |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
55// | |
56// | callee-saved gpr registers |
57// | |
58// |-----------------------------------|
59// | |
60// | callee-saved fp/simd regs |
61// | |
62// |-----------------------------------|
63// |.empty.space.to.make.part.below....|
64// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
65// |.the.standard.8-byte.alignment.....| compile time; if present)
66// |-----------------------------------|
67// | |
68// | local variables of fixed size |
69// | including spill slots |
70// |-----------------------------------| <- base pointer (not defined by ABI,
71// |.variable-sized.local.variables....| LLVM chooses r6)
72// |.(VLAs)............................| (size of this area is unknown at
73// |...................................| compile time)
74// |-----------------------------------| <- sp
75// | | Lower address
76//
77//
78// To access the data in a frame, at-compile time, a constant offset must be
79// computable from one of the pointers (fp, bp, sp) to access it. The size
80// of the areas with a dotted background cannot be computed at compile-time
81// if they are present, making it required to have all three of fp, bp and
82// sp to be set up to be able to access all contents in the frame areas,
83// assuming all of the frame areas are non-empty.
84//
85// For most functions, some of the frame areas are empty. For those functions,
86// it may not be necessary to set up fp or bp:
87// * A base pointer is definitely needed when there are both VLAs and local
88// variables with more-than-default alignment requirements.
89// * A frame pointer is definitely needed when there are local variables with
90// more-than-default alignment requirements.
91//
92// In some cases when a base pointer is not strictly needed, it is generated
93// anyway when offsets from the frame pointer to access local variables become
94// so large that the offset can't be encoded in the immediate fields of loads
95// or stores.
96//
97// The frame pointer might be chosen to be r7 or r11, depending on the target
98// architecture and operating system. See ARMSubtarget::getFramePointerReg for
99// details.
100//
101// Outgoing function arguments must be at the bottom of the stack frame when
102// calling another function. If we do not have variable-sized stack objects, we
103// can allocate a "reserved call frame" area at the bottom of the local
104// variable area, large enough for all outgoing calls. If we do have VLAs, then
105// the stack pointer must be decremented and incremented around each call to
106// make space for the arguments below the VLAs.
107//
108//===----------------------------------------------------------------------===//
109
110#include "ARMFrameLowering.h"
111#include "ARMBaseInstrInfo.h"
112#include "ARMBaseRegisterInfo.h"
113#include "ARMConstantPoolValue.h"
114#include "ARMMachineFunctionInfo.h"
115#include "ARMSubtarget.h"
116#include "MCTargetDesc/ARMAddressingModes.h"
117#include "MCTargetDesc/ARMBaseInfo.h"
118#include "Utils/ARMBaseInfo.h"
119#include "llvm/ADT/BitVector.h"
120#include "llvm/ADT/STLExtras.h"
121#include "llvm/ADT/SmallPtrSet.h"
122#include "llvm/ADT/SmallVector.h"
123#include "llvm/CodeGen/CFIInstBuilder.h"
124#include "llvm/CodeGen/MachineBasicBlock.h"
125#include "llvm/CodeGen/MachineConstantPool.h"
126#include "llvm/CodeGen/MachineFrameInfo.h"
127#include "llvm/CodeGen/MachineFunction.h"
128#include "llvm/CodeGen/MachineInstr.h"
129#include "llvm/CodeGen/MachineInstrBuilder.h"
130#include "llvm/CodeGen/MachineJumpTableInfo.h"
131#include "llvm/CodeGen/MachineModuleInfo.h"
132#include "llvm/CodeGen/MachineOperand.h"
133#include "llvm/CodeGen/MachineRegisterInfo.h"
134#include "llvm/CodeGen/RegisterScavenging.h"
135#include "llvm/CodeGen/TargetInstrInfo.h"
136#include "llvm/CodeGen/TargetRegisterInfo.h"
137#include "llvm/CodeGen/TargetSubtargetInfo.h"
138#include "llvm/IR/Attributes.h"
139#include "llvm/IR/CallingConv.h"
140#include "llvm/IR/DebugLoc.h"
141#include "llvm/IR/Function.h"
142#include "llvm/MC/MCAsmInfo.h"
143#include "llvm/MC/MCInstrDesc.h"
144#include "llvm/Support/CodeGen.h"
145#include "llvm/Support/CommandLine.h"
146#include "llvm/Support/Compiler.h"
147#include "llvm/Support/Debug.h"
148#include "llvm/Support/ErrorHandling.h"
149#include "llvm/Support/raw_ostream.h"
150#include "llvm/Target/TargetMachine.h"
151#include "llvm/Target/TargetOptions.h"
152#include <algorithm>
153#include <cassert>
154#include <cstddef>
155#include <cstdint>
156#include <iterator>
157#include <utility>
158#include <vector>
159
160#define DEBUG_TYPE "arm-frame-lowering"
161
162using namespace llvm;
163
164static cl::opt<bool>
165SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(Val: true),
166 cl::desc("Align ARM NEON spills in prolog and epilog"));
167
168static MachineBasicBlock::iterator
169skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
170 unsigned NumAlignedDPRCS2Regs);
171
172enum class SpillArea {
173 GPRCS1,
174 GPRCS2,
175 FPStatus,
176 DPRCS1,
177 DPRCS2,
178 GPRCS3,
179 FPCXT,
180};
181
182/// Get the spill area that Reg should be saved into in the prologue.
183SpillArea getSpillArea(Register Reg,
184 ARMSubtarget::PushPopSplitVariation Variation,
185 unsigned NumAlignedDPRCS2Regs,
186 const ARMBaseRegisterInfo *RegInfo) {
187 // NoSplit:
188 // push {r0-r12, lr} GPRCS1
189 // vpush {r8-d15} DPRCS1
190 //
191 // SplitR7:
192 // push {r0-r7, lr} GPRCS1
193 // push {r8-r12} GPRCS2
194 // vpush {r8-d15} DPRCS1
195 //
196 // SplitR11WindowsSEH:
197 // push {r0-r10, r12} GPRCS1
198 // vpush {r8-d15} DPRCS1
199 // push {r11, lr} GPRCS3
200 //
201 // SplitR11AAPCSSignRA:
202 // push {r0-r10, r12} GPRSC1
203 // push {r11, lr} GPRCS2
204 // vpush {r8-d15} DPRCS1
205
206 // If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
207 // the top of the stack frame.
208 // The DPRCS2 region is used for ABIs which only guarantee 4-byte alignment
209 // of SP. If used, it will be below the other save areas, after the stack has
210 // been re-aligned.
211
212 switch (Reg) {
213 default:
214 dbgs() << "Don't know where to spill " << printReg(Reg, TRI: RegInfo) << "\n";
215 llvm_unreachable("Don't know where to spill this register");
216 break;
217
218 case ARM::FPCXTNS:
219 return SpillArea::FPCXT;
220
221 case ARM::FPSCR:
222 case ARM::FPEXC:
223 return SpillArea::FPStatus;
224
225 case ARM::R0:
226 case ARM::R1:
227 case ARM::R2:
228 case ARM::R3:
229 case ARM::R4:
230 case ARM::R5:
231 case ARM::R6:
232 case ARM::R7:
233 return SpillArea::GPRCS1;
234
235 case ARM::R8:
236 case ARM::R9:
237 case ARM::R10:
238 if (Variation == ARMSubtarget::SplitR7)
239 return SpillArea::GPRCS2;
240 else
241 return SpillArea::GPRCS1;
242
243 case ARM::R11:
244 if (Variation == ARMSubtarget::SplitR7 ||
245 Variation == ARMSubtarget::SplitR11AAPCSSignRA)
246 return SpillArea::GPRCS2;
247 if (Variation == ARMSubtarget::SplitR11WindowsSEH)
248 return SpillArea::GPRCS3;
249
250 return SpillArea::GPRCS1;
251
252 case ARM::R12:
253 if (Variation == ARMSubtarget::SplitR7)
254 return SpillArea::GPRCS2;
255 else
256 return SpillArea::GPRCS1;
257
258 case ARM::LR:
259 if (Variation == ARMSubtarget::SplitR11AAPCSSignRA)
260 return SpillArea::GPRCS2;
261 if (Variation == ARMSubtarget::SplitR11WindowsSEH)
262 return SpillArea::GPRCS3;
263
264 return SpillArea::GPRCS1;
265
266 case ARM::D0:
267 case ARM::D1:
268 case ARM::D2:
269 case ARM::D3:
270 case ARM::D4:
271 case ARM::D5:
272 case ARM::D6:
273 case ARM::D7:
274 return SpillArea::DPRCS1;
275
276 case ARM::D8:
277 case ARM::D9:
278 case ARM::D10:
279 case ARM::D11:
280 case ARM::D12:
281 case ARM::D13:
282 case ARM::D14:
283 case ARM::D15:
284 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
285 return SpillArea::DPRCS2;
286 else
287 return SpillArea::DPRCS1;
288
289 case ARM::D16:
290 case ARM::D17:
291 case ARM::D18:
292 case ARM::D19:
293 case ARM::D20:
294 case ARM::D21:
295 case ARM::D22:
296 case ARM::D23:
297 case ARM::D24:
298 case ARM::D25:
299 case ARM::D26:
300 case ARM::D27:
301 case ARM::D28:
302 case ARM::D29:
303 case ARM::D30:
304 case ARM::D31:
305 return SpillArea::DPRCS1;
306 }
307}
308
309ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
310 : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
311 STI(sti) {}
312
313bool ARMFrameLowering::keepFramePointer(const MachineFunction &MF) const {
314 // iOS always has a FP for backtracking, force other targets to keep their FP
315 // when doing FastISel. The emitted code is currently superior, and in cases
316 // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
317 return MF.getSubtarget<ARMSubtarget>().useFastISel();
318}
319
320/// Returns true if the target can safely skip saving callee-saved registers
321/// for noreturn nounwind functions.
322bool ARMFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const {
323 assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
324 MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
325 !MF.getFunction().hasFnAttribute(Attribute::UWTable));
326
327 // Frame pointer and link register are not treated as normal CSR, thus we
328 // can always skip CSR saves for nonreturning functions.
329 return true;
330}
331
332/// hasFPImpl - Return true if the specified function should have a dedicated
333/// frame pointer register. This is true if the function has variable sized
334/// allocas or if frame pointer elimination is disabled.
335bool ARMFrameLowering::hasFPImpl(const MachineFunction &MF) const {
336 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
337 const MachineFrameInfo &MFI = MF.getFrameInfo();
338
339 // Check to see if the target want to forcibly keep frame pointer.
340 if (keepFramePointer(MF))
341 return true;
342
343 // ABI-required frame pointer.
344 if (MF.getTarget().Options.DisableFramePointerElim(MF))
345 return true;
346
347 // Frame pointer required for use within this function.
348 return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
349 MFI.isFrameAddressTaken());
350}
351
352/// isFPReserved - Return true if the frame pointer register should be
353/// considered a reserved register on the scope of the specified function.
354bool ARMFrameLowering::isFPReserved(const MachineFunction &MF) const {
355 return hasFP(MF) || MF.getTarget().Options.FramePointerIsReserved(MF);
356}
357
358/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
359/// not required, we reserve argument space for call sites in the function
360/// immediately on entry to the current function. This eliminates the need for
361/// add/sub sp brackets around call sites. Returns true if the call frame is
362/// included as part of the stack frame.
363bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
364 const MachineFrameInfo &MFI = MF.getFrameInfo();
365 unsigned CFSize = MFI.getMaxCallFrameSize();
366 // It's not always a good idea to include the call frame as part of the
367 // stack frame. ARM (especially Thumb) has small immediate offset to
368 // address the stack frame. So a large call frame can cause poor codegen
369 // and may even makes it impossible to scavenge a register.
370 if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
371 return false;
372
373 return !MFI.hasVarSizedObjects();
374}
375
376/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
377/// call frame pseudos can be simplified. Unlike most targets, having a FP
378/// is not sufficient here since we still may reference some objects via SP
379/// even when FP is available in Thumb2 mode.
380bool
381ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
382 return hasReservedCallFrame(MF) || MF.getFrameInfo().hasVarSizedObjects();
383}
384
385// Returns how much of the incoming argument stack area we should clean up in an
386// epilogue. For the C calling convention this will be 0, for guaranteed tail
387// call conventions it can be positive (a normal return or a tail call to a
388// function that uses less stack space for arguments) or negative (for a tail
389// call to a function that needs more stack space than us for arguments).
390static int getArgumentStackToRestore(MachineFunction &MF,
391 MachineBasicBlock &MBB) {
392 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
393 bool IsTailCallReturn = false;
394 if (MBB.end() != MBBI) {
395 unsigned RetOpcode = MBBI->getOpcode();
396 IsTailCallReturn = RetOpcode == ARM::TCRETURNdi ||
397 RetOpcode == ARM::TCRETURNri ||
398 RetOpcode == ARM::TCRETURNrinotr12;
399 }
400 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
401
402 int ArgumentPopSize = 0;
403 if (IsTailCallReturn) {
404 MachineOperand &StackAdjust = MBBI->getOperand(i: 1);
405
406 // For a tail-call in a callee-pops-arguments environment, some or all of
407 // the stack may actually be in use for the call's arguments, this is
408 // calculated during LowerCall and consumed here...
409 ArgumentPopSize = StackAdjust.getImm();
410 } else {
411 // ... otherwise the amount to pop is *all* of the argument space,
412 // conveniently stored in the MachineFunctionInfo by
413 // LowerFormalArguments. This will, of course, be zero for the C calling
414 // convention.
415 ArgumentPopSize = AFI->getArgumentStackToRestore();
416 }
417
418 return ArgumentPopSize;
419}
420
421static bool needsWinCFI(const MachineFunction &MF) {
422 const Function &F = MF.getFunction();
423 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
424 F.needsUnwindTableEntry();
425}
426
427// Given a load or a store instruction, generate an appropriate unwinding SEH
428// code on Windows.
429static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
430 const TargetInstrInfo &TII,
431 unsigned Flags) {
432 unsigned Opc = MBBI->getOpcode();
433 MachineBasicBlock *MBB = MBBI->getParent();
434 MachineFunction &MF = *MBB->getParent();
435 DebugLoc DL = MBBI->getDebugLoc();
436 MachineInstrBuilder MIB;
437 const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
438 const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
439
440 Flags |= MachineInstr::NoMerge;
441
442 switch (Opc) {
443 default:
444 report_fatal_error(reason: "No SEH Opcode for instruction " + TII.getName(Opcode: Opc));
445 break;
446 case ARM::t2ADDri: // add.w r11, sp, #xx
447 case ARM::t2ADDri12: // add.w r11, sp, #xx
448 case ARM::t2MOVTi16: // movt r4, #xx
449 case ARM::tBL: // bl __chkstk
450 // These are harmless if used for just setting up a frame pointer,
451 // but that frame pointer can't be relied upon for unwinding, unless
452 // set up with SEH_SaveSP.
453 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
454 .addImm(/*Wide=*/Val: 1)
455 .setMIFlags(Flags);
456 break;
457
458 case ARM::t2MOVi16: { // mov(w) r4, #xx
459 bool Wide = MBBI->getOperand(i: 1).getImm() >= 256;
460 if (!Wide) {
461 MachineInstrBuilder NewInstr =
462 BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVi8)).setMIFlags(MBBI->getFlags());
463 NewInstr.add(MO: MBBI->getOperand(i: 0));
464 NewInstr.add(MO: t1CondCodeOp(/*isDead=*/true));
465 for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MBBI->operands()))
466 NewInstr.add(MO);
467 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(I: MBBI, MI: NewInstr);
468 MBB->erase(I: MBBI);
469 MBBI = NewMBBI;
470 }
471 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop)).addImm(Val: Wide).setMIFlags(Flags);
472 break;
473 }
474
475 case ARM::tBLXr: // blx r12 (__chkstk)
476 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
477 .addImm(/*Wide=*/Val: 0)
478 .setMIFlags(Flags);
479 break;
480
481 case ARM::t2MOVi32imm: // movw+movt
482 // This pseudo instruction expands into two mov instructions. If the
483 // second operand is a symbol reference, this will stay as two wide
484 // instructions, movw+movt. If they're immediates, the first one can
485 // end up as a narrow mov though.
486 // As two SEH instructions are appended here, they won't get interleaved
487 // between the two final movw/movt instructions, but it doesn't make any
488 // practical difference.
489 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
490 .addImm(/*Wide=*/Val: 1)
491 .setMIFlags(Flags);
492 MBB->insertAfter(I: MBBI, MI: MIB);
493 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
494 .addImm(/*Wide=*/Val: 1)
495 .setMIFlags(Flags);
496 break;
497
498 case ARM::t2STR_PRE:
499 if (MBBI->getOperand(i: 0).getReg() == ARM::SP &&
500 MBBI->getOperand(i: 2).getReg() == ARM::SP &&
501 MBBI->getOperand(i: 3).getImm() == -4) {
502 unsigned Reg = RegInfo->getSEHRegNum(i: MBBI->getOperand(i: 1).getReg());
503 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveRegs))
504 .addImm(Val: 1ULL << Reg)
505 .addImm(/*Wide=*/Val: 1)
506 .setMIFlags(Flags);
507 } else {
508 report_fatal_error(reason: "No matching SEH Opcode for t2STR_PRE");
509 }
510 break;
511
512 case ARM::t2LDR_POST:
513 if (MBBI->getOperand(i: 1).getReg() == ARM::SP &&
514 MBBI->getOperand(i: 2).getReg() == ARM::SP &&
515 MBBI->getOperand(i: 3).getImm() == 4) {
516 unsigned Reg = RegInfo->getSEHRegNum(i: MBBI->getOperand(i: 0).getReg());
517 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveRegs))
518 .addImm(Val: 1ULL << Reg)
519 .addImm(/*Wide=*/Val: 1)
520 .setMIFlags(Flags);
521 } else {
522 report_fatal_error(reason: "No matching SEH Opcode for t2LDR_POST");
523 }
524 break;
525
526 case ARM::t2LDMIA_RET:
527 case ARM::t2LDMIA_UPD:
528 case ARM::t2STMDB_UPD: {
529 unsigned Mask = 0;
530 bool Wide = false;
531 for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
532 const MachineOperand &MO = MBBI->getOperand(i);
533 if (!MO.isReg() || MO.isImplicit())
534 continue;
535 unsigned Reg = RegInfo->getSEHRegNum(i: MO.getReg());
536 if (Reg == 15)
537 Reg = 14;
538 if (Reg >= 8 && Reg <= 13)
539 Wide = true;
540 else if (Opc == ARM::t2LDMIA_UPD && Reg == 14)
541 Wide = true;
542 Mask |= 1 << Reg;
543 }
544 if (!Wide) {
545 unsigned NewOpc;
546 switch (Opc) {
547 case ARM::t2LDMIA_RET:
548 NewOpc = ARM::tPOP_RET;
549 break;
550 case ARM::t2LDMIA_UPD:
551 NewOpc = ARM::tPOP;
552 break;
553 case ARM::t2STMDB_UPD:
554 NewOpc = ARM::tPUSH;
555 break;
556 default:
557 llvm_unreachable("");
558 }
559 MachineInstrBuilder NewInstr =
560 BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: NewOpc)).setMIFlags(MBBI->getFlags());
561 for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
562 NewInstr.add(MO: MBBI->getOperand(i));
563 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(I: MBBI, MI: NewInstr);
564 MBB->erase(I: MBBI);
565 MBBI = NewMBBI;
566 }
567 unsigned SEHOpc =
568 (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
569 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: SEHOpc))
570 .addImm(Val: Mask)
571 .addImm(Val: Wide ? 1 : 0)
572 .setMIFlags(Flags);
573 break;
574 }
575 case ARM::VSTMDDB_UPD:
576 case ARM::VLDMDIA_UPD: {
577 int First = -1, Last = 0;
578 for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MBBI->operands(), N: 4)) {
579 unsigned Reg = RegInfo->getSEHRegNum(i: MO.getReg());
580 if (First == -1)
581 First = Reg;
582 Last = Reg;
583 }
584 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveFRegs))
585 .addImm(Val: First)
586 .addImm(Val: Last)
587 .setMIFlags(Flags);
588 break;
589 }
590 case ARM::tSUBspi:
591 case ARM::tADDspi:
592 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_StackAlloc))
593 .addImm(Val: MBBI->getOperand(i: 2).getImm() * 4)
594 .addImm(/*Wide=*/Val: 0)
595 .setMIFlags(Flags);
596 break;
597 case ARM::t2SUBspImm:
598 case ARM::t2SUBspImm12:
599 case ARM::t2ADDspImm:
600 case ARM::t2ADDspImm12:
601 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_StackAlloc))
602 .addImm(Val: MBBI->getOperand(i: 2).getImm())
603 .addImm(/*Wide=*/Val: 1)
604 .setMIFlags(Flags);
605 break;
606
607 case ARM::tMOVr:
608 if (MBBI->getOperand(i: 1).getReg() == ARM::SP &&
609 (Flags & MachineInstr::FrameSetup)) {
610 unsigned Reg = RegInfo->getSEHRegNum(i: MBBI->getOperand(i: 0).getReg());
611 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveSP))
612 .addImm(Val: Reg)
613 .setMIFlags(Flags);
614 } else if (MBBI->getOperand(i: 0).getReg() == ARM::SP &&
615 (Flags & MachineInstr::FrameDestroy)) {
616 unsigned Reg = RegInfo->getSEHRegNum(i: MBBI->getOperand(i: 1).getReg());
617 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveSP))
618 .addImm(Val: Reg)
619 .setMIFlags(Flags);
620 } else {
621 report_fatal_error(reason: "No SEH Opcode for MOV");
622 }
623 break;
624
625 case ARM::tBX_RET:
626 case ARM::TCRETURNri:
627 case ARM::TCRETURNrinotr12:
628 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop_Ret))
629 .addImm(/*Wide=*/Val: 0)
630 .setMIFlags(Flags);
631 break;
632
633 case ARM::TCRETURNdi:
634 MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop_Ret))
635 .addImm(/*Wide=*/Val: 1)
636 .setMIFlags(Flags);
637 break;
638 }
639 return MBB->insertAfter(I: MBBI, MI: MIB);
640}
641
642static MachineBasicBlock::iterator
643initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI) {
644 if (MBBI == MBB.begin())
645 return MachineBasicBlock::iterator();
646 return std::prev(x: MBBI);
647}
648
649static void insertSEHRange(MachineBasicBlock &MBB,
650 MachineBasicBlock::iterator Start,
651 const MachineBasicBlock::iterator &End,
652 const ARMBaseInstrInfo &TII, unsigned MIFlags) {
653 if (Start.isValid())
654 Start = std::next(x: Start);
655 else
656 Start = MBB.begin();
657
658 for (auto MI = Start; MI != End;) {
659 auto Next = std::next(x: MI);
660 // Check if this instruction already has got a SEH opcode added. In that
661 // case, don't do this generic mapping.
662 if (Next != End && isSEHInstruction(MI: *Next)) {
663 MI = std::next(x: Next);
664 while (MI != End && isSEHInstruction(MI: *MI))
665 ++MI;
666 continue;
667 }
668 insertSEH(MBBI: MI, TII, Flags: MIFlags);
669 MI = Next;
670 }
671}
672
673static void emitRegPlusImmediate(
674 bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
675 const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
676 unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
677 ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
678 if (isARM)
679 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, BaseReg: SrcReg, NumBytes,
680 Pred, PredReg, TII, MIFlags);
681 else
682 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, BaseReg: SrcReg, NumBytes,
683 Pred, PredReg, TII, MIFlags);
684}
685
686static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
687 MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
688 const ARMBaseInstrInfo &TII, int NumBytes,
689 unsigned MIFlags = MachineInstr::NoFlags,
690 ARMCC::CondCodes Pred = ARMCC::AL,
691 unsigned PredReg = 0) {
692 emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, DestReg: ARM::SP, SrcReg: ARM::SP, NumBytes,
693 MIFlags, Pred, PredReg);
694}
695
696static int sizeOfSPAdjustment(const MachineInstr &MI) {
697 int RegSize;
698 switch (MI.getOpcode()) {
699 case ARM::VSTMDDB_UPD:
700 RegSize = 8;
701 break;
702 case ARM::STMDB_UPD:
703 case ARM::t2STMDB_UPD:
704 RegSize = 4;
705 break;
706 case ARM::t2STR_PRE:
707 case ARM::STR_PRE_IMM:
708 return 4;
709 default:
710 llvm_unreachable("Unknown push or pop like instruction");
711 }
712
713 int count = 0;
714 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
715 // pred) so the list starts at 4.
716 for (int i = MI.getNumOperands() - 1; i >= 4; --i)
717 count += RegSize;
718 return count;
719}
720
721static bool WindowsRequiresStackProbe(const MachineFunction &MF,
722 size_t StackSizeInBytes) {
723 const MachineFrameInfo &MFI = MF.getFrameInfo();
724 const Function &F = MF.getFunction();
725 unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
726
727 StackProbeSize =
728 F.getFnAttributeAsParsedInteger(Kind: "stack-probe-size", Default: StackProbeSize);
729 return (StackSizeInBytes >= StackProbeSize) &&
730 !F.hasFnAttribute(Kind: "no-stack-arg-probe");
731}
732
733namespace {
734
735struct StackAdjustingInsts {
736 struct InstInfo {
737 MachineBasicBlock::iterator I;
738 unsigned SPAdjust;
739 bool BeforeFPSet;
740
741#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
742 void dump() {
743 dbgs() << " " << (BeforeFPSet ? "before-fp " : " ")
744 << "sp-adjust=" << SPAdjust;
745 I->dump();
746 }
747#endif
748 };
749
750 SmallVector<InstInfo, 4> Insts;
751
752 void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
753 bool BeforeFPSet = false) {
754 InstInfo Info = {.I: I, .SPAdjust: SPAdjust, .BeforeFPSet: BeforeFPSet};
755 Insts.push_back(Elt: Info);
756 }
757
758 void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
759 auto Info =
760 llvm::find_if(Range&: Insts, P: [&](InstInfo &Info) { return Info.I == I; });
761 assert(Info != Insts.end() && "invalid sp adjusting instruction");
762 Info->SPAdjust += ExtraBytes;
763 }
764
765 void emitDefCFAOffsets(MachineBasicBlock &MBB, bool HasFP) {
766 CFIInstBuilder CFIBuilder(MBB, MBB.end(), MachineInstr::FrameSetup);
767 unsigned CFAOffset = 0;
768 for (auto &Info : Insts) {
769 if (HasFP && !Info.BeforeFPSet)
770 return;
771
772 CFAOffset += Info.SPAdjust;
773 CFIBuilder.setInsertPoint(std::next(x: Info.I));
774 CFIBuilder.buildDefCFAOffset(Offset: CFAOffset);
775 }
776 }
777
778#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
779 void dump() {
780 dbgs() << "StackAdjustingInsts:\n";
781 for (auto &Info : Insts)
782 Info.dump();
783 }
784#endif
785};
786
787} // end anonymous namespace
788
789/// Emit an instruction sequence that will align the address in
790/// register Reg by zero-ing out the lower bits. For versions of the
791/// architecture that support Neon, this must be done in a single
792/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
793/// single instruction. That function only gets called when optimizing
794/// spilling of D registers on a core with the Neon instruction set
795/// present.
796static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
797 const TargetInstrInfo &TII,
798 MachineBasicBlock &MBB,
799 MachineBasicBlock::iterator MBBI,
800 const DebugLoc &DL, const unsigned Reg,
801 const Align Alignment,
802 const bool MustBeSingleInstruction) {
803 const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
804 const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
805 const unsigned AlignMask = Alignment.value() - 1U;
806 const unsigned NrBitsToZero = Log2(A: Alignment);
807 assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
808 if (!AFI->isThumbFunction()) {
809 // if the BFC instruction is available, use that to zero the lower
810 // bits:
811 // bfc Reg, #0, log2(Alignment)
812 // otherwise use BIC, if the mask to zero the required number of bits
813 // can be encoded in the bic immediate field
814 // bic Reg, Reg, Alignment-1
815 // otherwise, emit
816 // lsr Reg, Reg, log2(Alignment)
817 // lsl Reg, Reg, log2(Alignment)
818 if (CanUseBFC) {
819 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::BFC), DestReg: Reg)
820 .addReg(RegNo: Reg, flags: RegState::Kill)
821 .addImm(Val: ~AlignMask)
822 .add(MOs: predOps(Pred: ARMCC::AL));
823 } else if (AlignMask <= 255) {
824 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::BICri), DestReg: Reg)
825 .addReg(RegNo: Reg, flags: RegState::Kill)
826 .addImm(Val: AlignMask)
827 .add(MOs: predOps(Pred: ARMCC::AL))
828 .add(MO: condCodeOp());
829 } else {
830 assert(!MustBeSingleInstruction &&
831 "Shouldn't call emitAligningInstructions demanding a single "
832 "instruction to be emitted for large stack alignment for a target "
833 "without BFC.");
834 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVsi), DestReg: Reg)
835 .addReg(RegNo: Reg, flags: RegState::Kill)
836 .addImm(Val: ARM_AM::getSORegOpc(ShOp: ARM_AM::lsr, Imm: NrBitsToZero))
837 .add(MOs: predOps(Pred: ARMCC::AL))
838 .add(MO: condCodeOp());
839 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVsi), DestReg: Reg)
840 .addReg(RegNo: Reg, flags: RegState::Kill)
841 .addImm(Val: ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: NrBitsToZero))
842 .add(MOs: predOps(Pred: ARMCC::AL))
843 .add(MO: condCodeOp());
844 }
845 } else {
846 // Since this is only reached for Thumb-2 targets, the BFC instruction
847 // should always be available.
848 assert(CanUseBFC);
849 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::t2BFC), DestReg: Reg)
850 .addReg(RegNo: Reg, flags: RegState::Kill)
851 .addImm(Val: ~AlignMask)
852 .add(MOs: predOps(Pred: ARMCC::AL));
853 }
854}
855
856/// We need the offset of the frame pointer relative to other MachineFrameInfo
857/// offsets which are encoded relative to SP at function begin.
858/// See also emitPrologue() for how the FP is set up.
859/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
860/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
861/// this to produce a conservative estimate that we check in an assert() later.
862static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
863 const MachineFunction &MF) {
864 ARMSubtarget::PushPopSplitVariation PushPopSplit =
865 STI.getPushPopSplitVariation(MF);
866 // For Thumb1, push.w isn't available, so the first push will always push
867 // r7 and lr onto the stack first.
868 if (AFI.isThumb1OnlyFunction())
869 return -AFI.getArgRegsSaveSize() - (2 * 4);
870 // This is a conservative estimation: Assume the frame pointer being r7 and
871 // pc("r15") up to r8 getting spilled before (= 8 registers).
872 int MaxRegBytes = 8 * 4;
873 if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
874 // Here, r11 can be stored below all of r4-r15.
875 MaxRegBytes = 11 * 4;
876 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
877 // Here, r11 can be stored below all of r4-r15 plus d8-d15.
878 MaxRegBytes = 11 * 4 + 8 * 8;
879 }
880 int FPCXTSaveSize =
881 (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
882 return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
883}
884
885void ARMFrameLowering::emitPrologue(MachineFunction &MF,
886 MachineBasicBlock &MBB) const {
887 MachineBasicBlock::iterator MBBI = MBB.begin();
888 MachineFrameInfo &MFI = MF.getFrameInfo();
889 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
890 const TargetMachine &TM = MF.getTarget();
891 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
892 const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
893 assert(!AFI->isThumb1OnlyFunction() &&
894 "This emitPrologue does not support Thumb1!");
895 bool isARM = !AFI->isThumbFunction();
896 Align Alignment = STI.getFrameLowering()->getStackAlign();
897 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
898 unsigned NumBytes = MFI.getStackSize();
899 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
900 int FPCXTSaveSize = 0;
901 bool NeedsWinCFI = needsWinCFI(MF);
902 ARMSubtarget::PushPopSplitVariation PushPopSplit =
903 STI.getPushPopSplitVariation(MF);
904
905 LLVM_DEBUG(dbgs() << "Emitting prologue for " << MF.getName() << "\n");
906
907 // Debug location must be unknown since the first debug location is used
908 // to determine the end of the prologue.
909 DebugLoc dl;
910
911 Register FramePtr = RegInfo->getFrameRegister(MF);
912
913 // Determine the sizes of each callee-save spill areas and record which frame
914 // belongs to which callee-save spill areas.
915 unsigned GPRCS1Size = 0, GPRCS2Size = 0, FPStatusSize = 0,
916 DPRCS1Size = 0, GPRCS3Size = 0, DPRCS2Size = 0;
917 int FramePtrSpillFI = 0;
918 int D8SpillFI = 0;
919
920 // All calls are tail calls in GHC calling conv, and functions have no
921 // prologue/epilogue.
922 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
923 return;
924
925 StackAdjustingInsts DefCFAOffsetCandidates;
926 bool HasFP = hasFP(MF);
927
928 if (!AFI->hasStackFrame() &&
929 (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, StackSizeInBytes: NumBytes))) {
930 if (NumBytes != 0) {
931 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -NumBytes,
932 MIFlags: MachineInstr::FrameSetup);
933 DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: NumBytes, BeforeFPSet: true);
934 }
935 if (!NeedsWinCFI)
936 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, HasFP);
937 if (NeedsWinCFI && MBBI != MBB.begin()) {
938 insertSEHRange(MBB, Start: {}, End: MBBI, TII, MIFlags: MachineInstr::FrameSetup);
939 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_PrologEnd))
940 .setMIFlag(MachineInstr::FrameSetup);
941 MF.setHasWinCFI(true);
942 }
943 return;
944 }
945
946 // Determine spill area sizes, and some important frame indices.
947 SpillArea FramePtrSpillArea = SpillArea::GPRCS1;
948 bool BeforeFPPush = true;
949 for (const CalleeSavedInfo &I : CSI) {
950 MCRegister Reg = I.getReg();
951 int FI = I.getFrameIdx();
952
953 SpillArea Area = getSpillArea(Reg, Variation: PushPopSplit,
954 NumAlignedDPRCS2Regs: AFI->getNumAlignedDPRCS2Regs(), RegInfo);
955
956 if (Reg == FramePtr.asMCReg()) {
957 FramePtrSpillFI = FI;
958 FramePtrSpillArea = Area;
959 }
960 if (Reg == ARM::D8)
961 D8SpillFI = FI;
962
963 switch (Area) {
964 case SpillArea::FPCXT:
965 FPCXTSaveSize += 4;
966 break;
967 case SpillArea::GPRCS1:
968 GPRCS1Size += 4;
969 break;
970 case SpillArea::GPRCS2:
971 GPRCS2Size += 4;
972 break;
973 case SpillArea::FPStatus:
974 FPStatusSize += 4;
975 break;
976 case SpillArea::DPRCS1:
977 DPRCS1Size += 8;
978 break;
979 case SpillArea::GPRCS3:
980 GPRCS3Size += 4;
981 break;
982 case SpillArea::DPRCS2:
983 DPRCS2Size += 8;
984 break;
985 }
986 }
987
988 MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push,
989 DPRCS1Push, GPRCS3Push;
990
991 // Move past the PAC computation.
992 if (AFI->shouldSignReturnAddress())
993 LastPush = MBBI++;
994
995 // Move past FPCXT area.
996 if (FPCXTSaveSize > 0) {
997 LastPush = MBBI++;
998 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: FPCXTSaveSize, BeforeFPSet: BeforeFPPush);
999 }
1000
1001 // Allocate the vararg register save area.
1002 if (ArgRegsSaveSize) {
1003 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -ArgRegsSaveSize,
1004 MIFlags: MachineInstr::FrameSetup);
1005 LastPush = std::prev(x: MBBI);
1006 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: ArgRegsSaveSize, BeforeFPSet: BeforeFPPush);
1007 }
1008
1009 // Move past area 1.
1010 if (GPRCS1Size > 0) {
1011 GPRCS1Push = LastPush = MBBI++;
1012 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS1Size, BeforeFPSet: BeforeFPPush);
1013 if (FramePtrSpillArea == SpillArea::GPRCS1)
1014 BeforeFPPush = false;
1015 }
1016
1017 // Determine starting offsets of spill areas. These offsets are all positive
1018 // offsets from the bottom of the lowest-addressed callee-save area
1019 // (excluding DPRCS2, which is th the re-aligned stack region) to the bottom
1020 // of the spill area in question.
1021 unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
1022 unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
1023 unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
1024 unsigned FPStatusOffset = GPRCS2Offset - FPStatusSize;
1025
1026 Align DPRAlign = DPRCS1Size ? std::min(a: Align(8), b: Alignment) : Align(4);
1027 unsigned DPRGapSize = (ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1028 GPRCS2Size + FPStatusSize) %
1029 DPRAlign.value();
1030
1031 unsigned DPRCS1Offset = FPStatusOffset - DPRGapSize - DPRCS1Size;
1032
1033 if (HasFP) {
1034 // Offset from the CFA to the saved frame pointer, will be negative.
1035 [[maybe_unused]] int FPOffset = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI);
1036 LLVM_DEBUG(dbgs() << "FramePtrSpillFI: " << FramePtrSpillFI
1037 << ", FPOffset: " << FPOffset << "\n");
1038 assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
1039 "Max FP estimation is wrong");
1040 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
1041 NumBytes);
1042 }
1043 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
1044 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
1045 AFI->setDPRCalleeSavedArea1Offset(DPRCS1Offset);
1046
1047 // Move past area 2.
1048 if (GPRCS2Size > 0) {
1049 assert(PushPopSplit != ARMSubtarget::SplitR11WindowsSEH);
1050 GPRCS2Push = LastPush = MBBI++;
1051 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS2Size, BeforeFPSet: BeforeFPPush);
1052 if (FramePtrSpillArea == SpillArea::GPRCS2)
1053 BeforeFPPush = false;
1054 }
1055
1056 // Move past FP status save area.
1057 if (FPStatusSize > 0) {
1058 while (MBBI != MBB.end()) {
1059 unsigned Opc = MBBI->getOpcode();
1060 if (Opc == ARM::VMRS || Opc == ARM::VMRS_FPEXC)
1061 MBBI++;
1062 else
1063 break;
1064 }
1065 LastPush = MBBI++;
1066 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: FPStatusSize);
1067 }
1068
1069 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
1070 // .cfi_offset operations will reflect that.
1071 if (DPRGapSize) {
1072 assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
1073 if (LastPush != MBB.end() &&
1074 tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*LastPush, NumBytes: DPRGapSize))
1075 DefCFAOffsetCandidates.addExtraBytes(I: LastPush, ExtraBytes: DPRGapSize);
1076 else {
1077 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -DPRGapSize,
1078 MIFlags: MachineInstr::FrameSetup);
1079 DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: DPRGapSize, BeforeFPSet: BeforeFPPush);
1080 }
1081 }
1082
1083 // Move past DPRCS1Size.
1084 if (DPRCS1Size > 0) {
1085 // Since vpush register list cannot have gaps, there may be multiple vpush
1086 // instructions in the prologue.
1087 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
1088 DefCFAOffsetCandidates.addInst(I: MBBI, SPAdjust: sizeOfSPAdjustment(MI: *MBBI),
1089 BeforeFPSet: BeforeFPPush);
1090 DPRCS1Push = LastPush = MBBI++;
1091 }
1092 }
1093
1094 // Move past the aligned DPRCS2 area.
1095 if (DPRCS2Size > 0) {
1096 MBBI = skipAlignedDPRCS2Spills(MI: MBBI, NumAlignedDPRCS2Regs: AFI->getNumAlignedDPRCS2Regs());
1097 // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
1098 // leaves the stack pointer pointing to the DPRCS2 area.
1099 //
1100 // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
1101 NumBytes += MFI.getObjectOffset(ObjectIdx: D8SpillFI);
1102 } else
1103 NumBytes = DPRCS1Offset;
1104
1105 // Move GPRCS3, if using using SplitR11WindowsSEH.
1106 if (GPRCS3Size > 0) {
1107 assert(PushPopSplit == ARMSubtarget::SplitR11WindowsSEH);
1108 GPRCS3Push = LastPush = MBBI++;
1109 DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS3Size, BeforeFPSet: BeforeFPPush);
1110 if (FramePtrSpillArea == SpillArea::GPRCS3)
1111 BeforeFPPush = false;
1112 }
1113
1114 bool NeedsWinCFIStackAlloc = NeedsWinCFI;
1115 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH && HasFP)
1116 NeedsWinCFIStackAlloc = false;
1117
1118 if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, StackSizeInBytes: NumBytes)) {
1119 uint32_t NumWords = NumBytes >> 2;
1120
1121 if (NumWords < 65536) {
1122 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVi16), DestReg: ARM::R4)
1123 .addImm(Val: NumWords)
1124 .setMIFlags(MachineInstr::FrameSetup)
1125 .add(MOs: predOps(Pred: ARMCC::AL));
1126 } else {
1127 // Split into two instructions here, instead of using t2MOVi32imm,
1128 // to allow inserting accurate SEH instructions (including accurate
1129 // instruction size for each of them).
1130 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVi16), DestReg: ARM::R4)
1131 .addImm(Val: NumWords & 0xffff)
1132 .setMIFlags(MachineInstr::FrameSetup)
1133 .add(MOs: predOps(Pred: ARMCC::AL));
1134 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVTi16), DestReg: ARM::R4)
1135 .addReg(RegNo: ARM::R4)
1136 .addImm(Val: NumWords >> 16)
1137 .setMIFlags(MachineInstr::FrameSetup)
1138 .add(MOs: predOps(Pred: ARMCC::AL));
1139 }
1140
1141 switch (TM.getCodeModel()) {
1142 case CodeModel::Tiny:
1143 llvm_unreachable("Tiny code model not available on ARM.");
1144 case CodeModel::Small:
1145 case CodeModel::Medium:
1146 case CodeModel::Kernel:
1147 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tBL))
1148 .add(MOs: predOps(Pred: ARMCC::AL))
1149 .addExternalSymbol(FnName: "__chkstk")
1150 .addReg(RegNo: ARM::R4, flags: RegState::Implicit)
1151 .setMIFlags(MachineInstr::FrameSetup);
1152 break;
1153 case CodeModel::Large:
1154 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVi32imm), DestReg: ARM::R12)
1155 .addExternalSymbol(FnName: "__chkstk")
1156 .setMIFlags(MachineInstr::FrameSetup);
1157
1158 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tBLXr))
1159 .add(MOs: predOps(Pred: ARMCC::AL))
1160 .addReg(RegNo: ARM::R12, flags: RegState::Kill)
1161 .addReg(RegNo: ARM::R4, flags: RegState::Implicit)
1162 .setMIFlags(MachineInstr::FrameSetup);
1163 break;
1164 }
1165
1166 MachineInstrBuilder Instr, SEH;
1167 Instr = BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2SUBrr), DestReg: ARM::SP)
1168 .addReg(RegNo: ARM::SP, flags: RegState::Kill)
1169 .addReg(RegNo: ARM::R4, flags: RegState::Kill)
1170 .setMIFlags(MachineInstr::FrameSetup)
1171 .add(MOs: predOps(Pred: ARMCC::AL))
1172 .add(MO: condCodeOp());
1173 if (NeedsWinCFIStackAlloc) {
1174 SEH = BuildMI(MF, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_StackAlloc))
1175 .addImm(Val: NumBytes)
1176 .addImm(/*Wide=*/Val: 1)
1177 .setMIFlags(MachineInstr::FrameSetup);
1178 MBB.insertAfter(I: Instr, MI: SEH);
1179 }
1180 NumBytes = 0;
1181 }
1182
1183 if (NumBytes) {
1184 // Adjust SP after all the callee-save spills.
1185 if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
1186 tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*LastPush, NumBytes))
1187 DefCFAOffsetCandidates.addExtraBytes(I: LastPush, ExtraBytes: NumBytes);
1188 else {
1189 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -NumBytes,
1190 MIFlags: MachineInstr::FrameSetup);
1191 DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: NumBytes);
1192 }
1193
1194 if (HasFP && isARM)
1195 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1196 // Note it's not safe to do this in Thumb2 mode because it would have
1197 // taken two instructions:
1198 // mov sp, r7
1199 // sub sp, #24
1200 // If an interrupt is taken between the two instructions, then sp is in
1201 // an inconsistent state (pointing to the middle of callee-saved area).
1202 // The interrupt handler can end up clobbering the registers.
1203 AFI->setShouldRestoreSPFromFP(true);
1204 }
1205
1206 // Set FP to point to the stack slot that contains the previous FP.
1207 // For iOS, FP is R7, which has now been stored in spill area 1.
1208 // Otherwise, if this is not iOS, all the callee-saved registers go
1209 // into spill area 1, including the FP in R11. In either case, it
1210 // is in area one and the adjustment needs to take place just after
1211 // that push.
1212 MachineBasicBlock::iterator AfterPush;
1213 if (HasFP) {
1214 MachineBasicBlock::iterator FPPushInst;
1215 // Offset from SP immediately after the push which saved the FP to the FP
1216 // save slot.
1217 int64_t FPOffsetAfterPush;
1218 switch (FramePtrSpillArea) {
1219 case SpillArea::GPRCS1:
1220 FPPushInst = GPRCS1Push;
1221 FPOffsetAfterPush = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
1222 ArgRegsSaveSize + FPCXTSaveSize +
1223 sizeOfSPAdjustment(MI: *FPPushInst);
1224 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS1, offset "
1225 << FPOffsetAfterPush << " after that push\n");
1226 break;
1227 case SpillArea::GPRCS2:
1228 FPPushInst = GPRCS2Push;
1229 FPOffsetAfterPush = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
1230 ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1231 sizeOfSPAdjustment(MI: *FPPushInst);
1232 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS2, offset "
1233 << FPOffsetAfterPush << " after that push\n");
1234 break;
1235 case SpillArea::GPRCS3:
1236 FPPushInst = GPRCS3Push;
1237 FPOffsetAfterPush = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
1238 ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1239 FPStatusSize + GPRCS2Size + DPRCS1Size + DPRGapSize +
1240 sizeOfSPAdjustment(MI: *FPPushInst);
1241 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS3, offset "
1242 << FPOffsetAfterPush << " after that push\n");
1243 break;
1244 default:
1245 llvm_unreachable("frame pointer in unknown spill area");
1246 break;
1247 }
1248 AfterPush = std::next(x: FPPushInst);
1249 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1250 assert(FPOffsetAfterPush == 0);
1251
1252 // Emit the MOV or ADD to set up the frame pointer register.
1253 emitRegPlusImmediate(isARM: !AFI->isThumbFunction(), MBB, MBBI&: AfterPush, dl, TII,
1254 DestReg: FramePtr, SrcReg: ARM::SP, NumBytes: FPOffsetAfterPush,
1255 MIFlags: MachineInstr::FrameSetup);
1256
1257 if (!NeedsWinCFI) {
1258 // Emit DWARF info to find the CFA using the frame pointer from this
1259 // point onward.
1260 CFIInstBuilder CFIBuilder(MBB, AfterPush, MachineInstr::FrameSetup);
1261 if (FPOffsetAfterPush != 0)
1262 CFIBuilder.buildDefCFA(Reg: FramePtr, Offset: -MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI));
1263 else
1264 CFIBuilder.buildDefCFARegister(Reg: FramePtr);
1265 }
1266 }
1267
1268 // Emit a SEH opcode indicating the prologue end. The rest of the prologue
1269 // instructions below don't need to be replayed to unwind the stack.
1270 if (NeedsWinCFI && MBBI != MBB.begin()) {
1271 MachineBasicBlock::iterator End = MBBI;
1272 if (HasFP && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1273 End = AfterPush;
1274 insertSEHRange(MBB, Start: {}, End, TII, MIFlags: MachineInstr::FrameSetup);
1275 BuildMI(BB&: MBB, I: End, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_PrologEnd))
1276 .setMIFlag(MachineInstr::FrameSetup);
1277 MF.setHasWinCFI(true);
1278 }
1279
1280 // Now that the prologue's actual instructions are finalised, we can insert
1281 // the necessary DWARF cf instructions to describe the situation. Start by
1282 // recording where each register ended up:
1283 if (!NeedsWinCFI) {
1284 for (const auto &Entry : reverse(C: CSI)) {
1285 MCRegister Reg = Entry.getReg();
1286 int FI = Entry.getFrameIdx();
1287 MachineBasicBlock::iterator CFIPos;
1288 switch (getSpillArea(Reg, Variation: PushPopSplit, NumAlignedDPRCS2Regs: AFI->getNumAlignedDPRCS2Regs(),
1289 RegInfo)) {
1290 case SpillArea::GPRCS1:
1291 CFIPos = std::next(x: GPRCS1Push);
1292 break;
1293 case SpillArea::GPRCS2:
1294 CFIPos = std::next(x: GPRCS2Push);
1295 break;
1296 case SpillArea::DPRCS1:
1297 CFIPos = std::next(x: DPRCS1Push);
1298 break;
1299 case SpillArea::GPRCS3:
1300 CFIPos = std::next(x: GPRCS3Push);
1301 break;
1302 case SpillArea::FPStatus:
1303 case SpillArea::FPCXT:
1304 case SpillArea::DPRCS2:
1305 // FPCXT and DPRCS2 are not represented in the DWARF info.
1306 break;
1307 }
1308
1309 if (CFIPos.isValid()) {
1310 CFIInstBuilder(MBB, CFIPos, MachineInstr::FrameSetup)
1311 .buildOffset(Reg: Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg,
1312 Offset: MFI.getObjectOffset(ObjectIdx: FI));
1313 }
1314 }
1315 }
1316
1317 // Now we can emit descriptions of where the canonical frame address was
1318 // throughout the process. If we have a frame pointer, it takes over the job
1319 // half-way through, so only the first few .cfi_def_cfa_offset instructions
1320 // actually get emitted.
1321 if (!NeedsWinCFI) {
1322 LLVM_DEBUG(DefCFAOffsetCandidates.dump());
1323 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, HasFP);
1324 }
1325
1326 if (STI.isTargetELF() && hasFP(MF))
1327 MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
1328 AFI->getFramePtrSpillOffset());
1329
1330 AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1331 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1332 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1333 AFI->setFPStatusSavesSize(FPStatusSize);
1334 AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1335 AFI->setDPRCalleeSavedArea1Size(DPRCS1Size);
1336 AFI->setGPRCalleeSavedArea3Size(GPRCS3Size);
1337
1338 // If we need dynamic stack realignment, do it here. Be paranoid and make
1339 // sure if we also have VLAs, we have a base pointer for frame access.
1340 // If aligned NEON registers were spilled, the stack has already been
1341 // realigned.
1342 if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1343 Align MaxAlign = MFI.getMaxAlign();
1344 assert(!AFI->isThumb1OnlyFunction());
1345 if (!AFI->isThumbFunction()) {
1346 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, DL: dl, Reg: ARM::SP, Alignment: MaxAlign,
1347 MustBeSingleInstruction: false);
1348 } else {
1349 // We cannot use sp as source/dest register here, thus we're using r4 to
1350 // perform the calculations. We're emitting the following sequence:
1351 // mov r4, sp
1352 // -- use emitAligningInstructions to produce best sequence to zero
1353 // -- out lower bits in r4
1354 // mov sp, r4
1355 // FIXME: It will be better just to find spare register here.
1356 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::R4)
1357 .addReg(RegNo: ARM::SP, flags: RegState::Kill)
1358 .add(MOs: predOps(Pred: ARMCC::AL));
1359 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, DL: dl, Reg: ARM::R4, Alignment: MaxAlign,
1360 MustBeSingleInstruction: false);
1361 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP)
1362 .addReg(RegNo: ARM::R4, flags: RegState::Kill)
1363 .add(MOs: predOps(Pred: ARMCC::AL));
1364 }
1365
1366 AFI->setShouldRestoreSPFromFP(true);
1367 }
1368
1369 // If we need a base pointer, set it up here. It's whatever the value
1370 // of the stack pointer is at this point. Any variable size objects
1371 // will be allocated after this, so we can still use the base pointer
1372 // to reference locals.
1373 // FIXME: Clarify FrameSetup flags here.
1374 if (RegInfo->hasBasePointer(MF)) {
1375 if (isARM)
1376 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::MOVr), DestReg: RegInfo->getBaseRegister())
1377 .addReg(RegNo: ARM::SP)
1378 .add(MOs: predOps(Pred: ARMCC::AL))
1379 .add(MO: condCodeOp());
1380 else
1381 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: RegInfo->getBaseRegister())
1382 .addReg(RegNo: ARM::SP)
1383 .add(MOs: predOps(Pred: ARMCC::AL));
1384 }
1385
1386 // If the frame has variable sized objects then the epilogue must restore
1387 // the sp from fp. We can assume there's an FP here since hasFP already
1388 // checks for hasVarSizedObjects.
1389 if (MFI.hasVarSizedObjects())
1390 AFI->setShouldRestoreSPFromFP(true);
1391}
1392
1393void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
1394 MachineBasicBlock &MBB) const {
1395 MachineFrameInfo &MFI = MF.getFrameInfo();
1396 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1397 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1398 const ARMBaseInstrInfo &TII =
1399 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1400 assert(!AFI->isThumb1OnlyFunction() &&
1401 "This emitEpilogue does not support Thumb1!");
1402 bool isARM = !AFI->isThumbFunction();
1403 ARMSubtarget::PushPopSplitVariation PushPopSplit =
1404 STI.getPushPopSplitVariation(MF);
1405
1406 LLVM_DEBUG(dbgs() << "Emitting epilogue for " << MF.getName() << "\n");
1407
1408 // Amount of stack space we reserved next to incoming args for either
1409 // varargs registers or stack arguments in tail calls made by this function.
1410 unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1411
1412 // How much of the stack used by incoming arguments this function is expected
1413 // to restore in this particular epilogue.
1414 int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1415 int NumBytes = (int)MFI.getStackSize();
1416 Register FramePtr = RegInfo->getFrameRegister(MF);
1417
1418 // All calls are tail calls in GHC calling conv, and functions have no
1419 // prologue/epilogue.
1420 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1421 return;
1422
1423 // First put ourselves on the first (from top) terminator instructions.
1424 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1425 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
1426
1427 MachineBasicBlock::iterator RangeStart;
1428 if (!AFI->hasStackFrame()) {
1429 if (MF.hasWinCFI()) {
1430 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_EpilogStart))
1431 .setMIFlag(MachineInstr::FrameDestroy);
1432 RangeStart = initMBBRange(MBB, MBBI);
1433 }
1434
1435 if (NumBytes + IncomingArgStackToRestore != 0)
1436 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1437 NumBytes: NumBytes + IncomingArgStackToRestore,
1438 MIFlags: MachineInstr::FrameDestroy);
1439 } else {
1440 // Unwind MBBI to point to first LDR / VLDRD.
1441 if (MBBI != MBB.begin()) {
1442 do {
1443 --MBBI;
1444 } while (MBBI != MBB.begin() &&
1445 MBBI->getFlag(Flag: MachineInstr::FrameDestroy));
1446 if (!MBBI->getFlag(Flag: MachineInstr::FrameDestroy))
1447 ++MBBI;
1448 }
1449
1450 if (MF.hasWinCFI()) {
1451 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_EpilogStart))
1452 .setMIFlag(MachineInstr::FrameDestroy);
1453 RangeStart = initMBBRange(MBB, MBBI);
1454 }
1455
1456 // Move SP to start of FP callee save spill area.
1457 NumBytes -=
1458 (ReservedArgStack + AFI->getFPCXTSaveAreaSize() +
1459 AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() +
1460 AFI->getFPStatusSavesSize() + AFI->getDPRCalleeSavedGapSize() +
1461 AFI->getDPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea3Size());
1462
1463 // Reset SP based on frame pointer only if the stack frame extends beyond
1464 // frame pointer stack slot or target is ELF and the function has FP.
1465 if (AFI->shouldRestoreSPFromFP()) {
1466 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1467 if (NumBytes) {
1468 if (isARM)
1469 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg: ARM::SP, BaseReg: FramePtr, NumBytes: -NumBytes,
1470 Pred: ARMCC::AL, PredReg: 0, TII,
1471 MIFlags: MachineInstr::FrameDestroy);
1472 else {
1473 // It's not possible to restore SP from FP in a single instruction.
1474 // For iOS, this looks like:
1475 // mov sp, r7
1476 // sub sp, #24
1477 // This is bad, if an interrupt is taken after the mov, sp is in an
1478 // inconsistent state.
1479 // Use the first callee-saved register as a scratch register.
1480 assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1481 "No scratch register to restore SP from FP!");
1482 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg: ARM::R4, BaseReg: FramePtr, NumBytes: -NumBytes,
1483 Pred: ARMCC::AL, PredReg: 0, TII, MIFlags: MachineInstr::FrameDestroy);
1484 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP)
1485 .addReg(RegNo: ARM::R4)
1486 .add(MOs: predOps(Pred: ARMCC::AL))
1487 .setMIFlag(MachineInstr::FrameDestroy);
1488 }
1489 } else {
1490 // Thumb2 or ARM.
1491 if (isARM)
1492 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::MOVr), DestReg: ARM::SP)
1493 .addReg(RegNo: FramePtr)
1494 .add(MOs: predOps(Pred: ARMCC::AL))
1495 .add(MO: condCodeOp())
1496 .setMIFlag(MachineInstr::FrameDestroy);
1497 else
1498 BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP)
1499 .addReg(RegNo: FramePtr)
1500 .add(MOs: predOps(Pred: ARMCC::AL))
1501 .setMIFlag(MachineInstr::FrameDestroy);
1502 }
1503 } else if (NumBytes &&
1504 !tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*MBBI, NumBytes))
1505 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1506 MIFlags: MachineInstr::FrameDestroy);
1507
1508 // Increment past our save areas.
1509 if (AFI->getGPRCalleeSavedArea3Size()) {
1510 assert(PushPopSplit == ARMSubtarget::SplitR11WindowsSEH);
1511 (void)PushPopSplit;
1512 MBBI++;
1513 }
1514
1515 if (MBBI != MBB.end() && AFI->getDPRCalleeSavedArea1Size()) {
1516 MBBI++;
1517 // Since vpop register list cannot have gaps, there may be multiple vpop
1518 // instructions in the epilogue.
1519 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
1520 MBBI++;
1521 }
1522 if (AFI->getDPRCalleeSavedGapSize()) {
1523 assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
1524 "unexpected DPR alignment gap");
1525 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: AFI->getDPRCalleeSavedGapSize(),
1526 MIFlags: MachineInstr::FrameDestroy);
1527 }
1528
1529 if (AFI->getGPRCalleeSavedArea2Size()) {
1530 assert(PushPopSplit != ARMSubtarget::SplitR11WindowsSEH);
1531 (void)PushPopSplit;
1532 MBBI++;
1533 }
1534 if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
1535
1536 if (ReservedArgStack || IncomingArgStackToRestore) {
1537 assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&
1538 "attempting to restore negative stack amount");
1539 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1540 NumBytes: ReservedArgStack + IncomingArgStackToRestore,
1541 MIFlags: MachineInstr::FrameDestroy);
1542 }
1543
1544 // Validate PAC, It should have been already popped into R12. For CMSE entry
1545 // function, the validation instruction is emitted during expansion of the
1546 // tBXNS_RET, since the validation must use the value of SP at function
1547 // entry, before saving, resp. after restoring, FPCXTNS.
1548 if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
1549 BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc(), MCID: STI.getInstrInfo()->get(Opcode: ARM::t2AUT));
1550 }
1551
1552 if (MF.hasWinCFI()) {
1553 insertSEHRange(MBB, Start: RangeStart, End: MBB.end(), TII, MIFlags: MachineInstr::FrameDestroy);
1554 BuildMI(BB&: MBB, I: MBB.end(), MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_EpilogEnd))
1555 .setMIFlag(MachineInstr::FrameDestroy);
1556 }
1557}
1558
1559/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1560/// debug info. It's the same as what we use for resolving the code-gen
1561/// references for now. FIXME: This can go wrong when references are
1562/// SP-relative and simple call frames aren't used.
1563StackOffset ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1564 int FI,
1565 Register &FrameReg) const {
1566 return StackOffset::getFixed(Fixed: ResolveFrameIndexReference(MF, FI, FrameReg, SPAdj: 0));
1567}
1568
1569int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
1570 int FI, Register &FrameReg,
1571 int SPAdj) const {
1572 const MachineFrameInfo &MFI = MF.getFrameInfo();
1573 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1574 MF.getSubtarget().getRegisterInfo());
1575 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1576 int Offset = MFI.getObjectOffset(ObjectIdx: FI) + MFI.getStackSize();
1577 int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1578 bool isFixed = MFI.isFixedObjectIndex(ObjectIdx: FI);
1579
1580 FrameReg = ARM::SP;
1581 Offset += SPAdj;
1582
1583 // SP can move around if there are allocas. We may also lose track of SP
1584 // when emergency spilling inside a non-reserved call frame setup.
1585 bool hasMovingSP = !hasReservedCallFrame(MF);
1586
1587 // When dynamically realigning the stack, use the frame pointer for
1588 // parameters, and the stack/base pointer for locals.
1589 if (RegInfo->hasStackRealignment(MF)) {
1590 assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1591 if (isFixed) {
1592 FrameReg = RegInfo->getFrameRegister(MF);
1593 Offset = FPOffset;
1594 } else if (hasMovingSP) {
1595 assert(RegInfo->hasBasePointer(MF) &&
1596 "VLAs and dynamic stack alignment, but missing base pointer!");
1597 FrameReg = RegInfo->getBaseRegister();
1598 Offset -= SPAdj;
1599 }
1600 return Offset;
1601 }
1602
1603 // If there is a frame pointer, use it when we can.
1604 if (hasFP(MF) && AFI->hasStackFrame()) {
1605 // Use frame pointer to reference fixed objects. Use it for locals if
1606 // there are VLAs (and thus the SP isn't reliable as a base).
1607 if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1608 FrameReg = RegInfo->getFrameRegister(MF);
1609 return FPOffset;
1610 } else if (hasMovingSP) {
1611 assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1612 if (AFI->isThumb2Function()) {
1613 // Try to use the frame pointer if we can, else use the base pointer
1614 // since it's available. This is handy for the emergency spill slot, in
1615 // particular.
1616 if (FPOffset >= -255 && FPOffset < 0) {
1617 FrameReg = RegInfo->getFrameRegister(MF);
1618 return FPOffset;
1619 }
1620 }
1621 } else if (AFI->isThumbFunction()) {
1622 // Prefer SP to base pointer, if the offset is suitably aligned and in
1623 // range as the effective range of the immediate offset is bigger when
1624 // basing off SP.
1625 // Use add <rd>, sp, #<imm8>
1626 // ldr <rd>, [sp, #<imm8>]
1627 if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
1628 return Offset;
1629 // In Thumb2 mode, the negative offset is very limited. Try to avoid
1630 // out of range references. ldr <rt>,[<rn>, #-<imm8>]
1631 if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
1632 FrameReg = RegInfo->getFrameRegister(MF);
1633 return FPOffset;
1634 }
1635 } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
1636 // Otherwise, use SP or FP, whichever is closer to the stack slot.
1637 FrameReg = RegInfo->getFrameRegister(MF);
1638 return FPOffset;
1639 }
1640 }
1641 // Use the base pointer if we have one.
1642 // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1643 // That can happen if we forced a base pointer for a large call frame.
1644 if (RegInfo->hasBasePointer(MF)) {
1645 FrameReg = RegInfo->getBaseRegister();
1646 Offset -= SPAdj;
1647 }
1648 return Offset;
1649}
1650
1651void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1652 MachineBasicBlock::iterator MI,
1653 ArrayRef<CalleeSavedInfo> CSI,
1654 unsigned StmOpc, unsigned StrOpc,
1655 bool NoGap,
1656 function_ref<bool(unsigned)> Func) const {
1657 MachineFunction &MF = *MBB.getParent();
1658 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1659 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1660
1661 DebugLoc DL;
1662
1663 using RegAndKill = std::pair<unsigned, bool>;
1664
1665 SmallVector<RegAndKill, 4> Regs;
1666 unsigned i = CSI.size();
1667 while (i != 0) {
1668 unsigned LastReg = 0;
1669 for (; i != 0; --i) {
1670 MCRegister Reg = CSI[i-1].getReg();
1671 if (!Func(Reg))
1672 continue;
1673
1674 const MachineRegisterInfo &MRI = MF.getRegInfo();
1675 bool isLiveIn = MRI.isLiveIn(Reg);
1676 if (!isLiveIn && !MRI.isReserved(PhysReg: Reg))
1677 MBB.addLiveIn(PhysReg: Reg);
1678 // If NoGap is true, push consecutive registers and then leave the rest
1679 // for other instructions. e.g.
1680 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1681 if (NoGap && LastReg && LastReg != Reg-1)
1682 break;
1683 LastReg = Reg;
1684 // Do not set a kill flag on values that are also marked as live-in. This
1685 // happens with the @llvm-returnaddress intrinsic and with arguments
1686 // passed in callee saved registers.
1687 // Omitting the kill flags is conservatively correct even if the live-in
1688 // is not used after all.
1689 Regs.push_back(Elt: std::make_pair(x&: Reg, /*isKill=*/y: !isLiveIn));
1690 }
1691
1692 if (Regs.empty())
1693 continue;
1694
1695 llvm::sort(C&: Regs, Comp: [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1696 return TRI.getEncodingValue(Reg: LHS.first) < TRI.getEncodingValue(Reg: RHS.first);
1697 });
1698
1699 if (Regs.size() > 1 || StrOpc== 0) {
1700 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: StmOpc), DestReg: ARM::SP)
1701 .addReg(RegNo: ARM::SP)
1702 .setMIFlags(MachineInstr::FrameSetup)
1703 .add(MOs: predOps(Pred: ARMCC::AL));
1704 for (const auto &[Reg, Kill] : Regs)
1705 MIB.addReg(RegNo: Reg, flags: getKillRegState(B: Kill));
1706 } else if (Regs.size() == 1) {
1707 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: StrOpc), DestReg: ARM::SP)
1708 .addReg(RegNo: Regs[0].first, flags: getKillRegState(B: Regs[0].second))
1709 .addReg(RegNo: ARM::SP)
1710 .setMIFlags(MachineInstr::FrameSetup)
1711 .addImm(Val: -4)
1712 .add(MOs: predOps(Pred: ARMCC::AL));
1713 }
1714 Regs.clear();
1715
1716 // Put any subsequent vpush instructions before this one: they will refer to
1717 // higher register numbers so need to be pushed first in order to preserve
1718 // monotonicity.
1719 if (MI != MBB.begin())
1720 --MI;
1721 }
1722}
1723
1724void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1725 MachineBasicBlock::iterator MI,
1726 MutableArrayRef<CalleeSavedInfo> CSI,
1727 unsigned LdmOpc, unsigned LdrOpc,
1728 bool isVarArg, bool NoGap,
1729 function_ref<bool(unsigned)> Func) const {
1730 MachineFunction &MF = *MBB.getParent();
1731 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1732 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1733 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1734 bool hasPAC = AFI->shouldSignReturnAddress();
1735 DebugLoc DL;
1736 bool isTailCall = false;
1737 bool isInterrupt = false;
1738 bool isTrap = false;
1739 bool isCmseEntry = false;
1740 ARMSubtarget::PushPopSplitVariation PushPopSplit =
1741 STI.getPushPopSplitVariation(MF);
1742 if (MBB.end() != MI) {
1743 DL = MI->getDebugLoc();
1744 unsigned RetOpcode = MI->getOpcode();
1745 isTailCall =
1746 (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri ||
1747 RetOpcode == ARM::TCRETURNrinotr12);
1748 isInterrupt =
1749 RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1750 isTrap =
1751 RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1752 RetOpcode == ARM::tTRAP;
1753 isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
1754 }
1755
1756 SmallVector<unsigned, 4> Regs;
1757 unsigned i = CSI.size();
1758 while (i != 0) {
1759 unsigned LastReg = 0;
1760 bool DeleteRet = false;
1761 for (; i != 0; --i) {
1762 CalleeSavedInfo &Info = CSI[i-1];
1763 MCRegister Reg = Info.getReg();
1764 if (!Func(Reg))
1765 continue;
1766
1767 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1768 !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
1769 STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1770 (PushPopSplit != ARMSubtarget::SplitR11WindowsSEH &&
1771 PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) {
1772 Reg = ARM::PC;
1773 // Fold the return instruction into the LDM.
1774 DeleteRet = true;
1775 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1776 }
1777
1778 // If NoGap is true, pop consecutive registers and then leave the rest
1779 // for other instructions. e.g.
1780 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1781 if (NoGap && LastReg && LastReg != Reg-1)
1782 break;
1783
1784 LastReg = Reg;
1785 Regs.push_back(Elt: Reg);
1786 }
1787
1788 if (Regs.empty())
1789 continue;
1790
1791 llvm::sort(C&: Regs, Comp: [&](unsigned LHS, unsigned RHS) {
1792 return TRI.getEncodingValue(Reg: LHS) < TRI.getEncodingValue(Reg: RHS);
1793 });
1794
1795 if (Regs.size() > 1 || LdrOpc == 0) {
1796 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: LdmOpc), DestReg: ARM::SP)
1797 .addReg(RegNo: ARM::SP)
1798 .add(MOs: predOps(Pred: ARMCC::AL))
1799 .setMIFlags(MachineInstr::FrameDestroy);
1800 for (unsigned Reg : Regs)
1801 MIB.addReg(RegNo: Reg, flags: getDefRegState(B: true));
1802 if (DeleteRet) {
1803 if (MI != MBB.end()) {
1804 MIB.copyImplicitOps(OtherMI: *MI);
1805 MI->eraseFromParent();
1806 }
1807 }
1808 MI = MIB;
1809 } else if (Regs.size() == 1) {
1810 // If we adjusted the reg to PC from LR above, switch it back here. We
1811 // only do that for LDM.
1812 if (Regs[0] == ARM::PC)
1813 Regs[0] = ARM::LR;
1814 MachineInstrBuilder MIB =
1815 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: LdrOpc), DestReg: Regs[0])
1816 .addReg(RegNo: ARM::SP, flags: RegState::Define)
1817 .addReg(RegNo: ARM::SP)
1818 .setMIFlags(MachineInstr::FrameDestroy);
1819 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1820 // that refactoring is complete (eventually).
1821 if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1822 MIB.addReg(RegNo: 0);
1823 MIB.addImm(Val: ARM_AM::getAM2Opc(Opc: ARM_AM::add, Imm12: 4, SO: ARM_AM::no_shift));
1824 } else
1825 MIB.addImm(Val: 4);
1826 MIB.add(MOs: predOps(Pred: ARMCC::AL));
1827 }
1828 Regs.clear();
1829
1830 // Put any subsequent vpop instructions after this one: they will refer to
1831 // higher register numbers so need to be popped afterwards.
1832 if (MI != MBB.end())
1833 ++MI;
1834 }
1835}
1836
1837void ARMFrameLowering::emitFPStatusSaves(MachineBasicBlock &MBB,
1838 MachineBasicBlock::iterator MI,
1839 ArrayRef<CalleeSavedInfo> CSI,
1840 unsigned PushOpc) const {
1841 MachineFunction &MF = *MBB.getParent();
1842 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1843
1844 SmallVector<MCRegister> Regs;
1845 auto RegPresent = [&CSI](MCRegister Reg) {
1846 return llvm::any_of(Range&: CSI, P: [Reg](const CalleeSavedInfo &C) {
1847 return C.getReg() == Reg;
1848 });
1849 };
1850
1851 // If we need to save FPSCR, then we must move FPSCR into R4 with the VMRS
1852 // instruction.
1853 if (RegPresent(ARM::FPSCR)) {
1854 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: TII.get(Opcode: ARM::VMRS), DestReg: ARM::R4)
1855 .add(MOs: predOps(Pred: ARMCC::AL))
1856 .setMIFlags(MachineInstr::FrameSetup);
1857
1858 Regs.push_back(Elt: ARM::R4);
1859 }
1860
1861 // If we need to save FPEXC, then we must move FPEXC into R5 with the
1862 // VMRS_FPEXC instruction.
1863 if (RegPresent(ARM::FPEXC)) {
1864 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: TII.get(Opcode: ARM::VMRS_FPEXC), DestReg: ARM::R5)
1865 .add(MOs: predOps(Pred: ARMCC::AL))
1866 .setMIFlags(MachineInstr::FrameSetup);
1867
1868 Regs.push_back(Elt: ARM::R5);
1869 }
1870
1871 // If neither FPSCR and FPEXC are present, then do nothing.
1872 if (Regs.size() == 0)
1873 return;
1874
1875 // Push both R4 and R5 onto the stack, if present.
1876 MachineInstrBuilder MIB =
1877 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: TII.get(Opcode: PushOpc), DestReg: ARM::SP)
1878 .addReg(RegNo: ARM::SP)
1879 .add(MOs: predOps(Pred: ARMCC::AL))
1880 .setMIFlags(MachineInstr::FrameSetup);
1881
1882 for (Register Reg : Regs) {
1883 MIB.addReg(RegNo: Reg);
1884 }
1885}
1886
1887void ARMFrameLowering::emitFPStatusRestores(
1888 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1889 MutableArrayRef<CalleeSavedInfo> CSI, unsigned LdmOpc) const {
1890 MachineFunction &MF = *MBB.getParent();
1891 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1892
1893 auto RegPresent = [&CSI](MCRegister Reg) {
1894 return llvm::any_of(Range&: CSI, P: [Reg](const CalleeSavedInfo &C) {
1895 return C.getReg() == Reg;
1896 });
1897 };
1898
1899 // Do nothing if we don't need to restore any FP status registers.
1900 if (!RegPresent(ARM::FPSCR) && !RegPresent(ARM::FPEXC))
1901 return;
1902
1903 // Pop registers off of the stack.
1904 MachineInstrBuilder MIB =
1905 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: TII.get(Opcode: LdmOpc), DestReg: ARM::SP)
1906 .addReg(RegNo: ARM::SP)
1907 .add(MOs: predOps(Pred: ARMCC::AL))
1908 .setMIFlags(MachineInstr::FrameDestroy);
1909
1910 // If FPSCR was saved, it will be popped into R4.
1911 if (RegPresent(ARM::FPSCR)) {
1912 MIB.addReg(RegNo: ARM::R4, flags: RegState::Define);
1913 }
1914
1915 // If FPEXC was saved, it will be popped into R5.
1916 if (RegPresent(ARM::FPEXC)) {
1917 MIB.addReg(RegNo: ARM::R5, flags: RegState::Define);
1918 }
1919
1920 // Move the FPSCR value back into the register with the VMSR instruction.
1921 if (RegPresent(ARM::FPSCR)) {
1922 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: STI.getInstrInfo()->get(Opcode: ARM::VMSR))
1923 .addReg(RegNo: ARM::R4)
1924 .add(MOs: predOps(Pred: ARMCC::AL))
1925 .setMIFlags(MachineInstr::FrameDestroy);
1926 }
1927
1928 // Move the FPEXC value back into the register with the VMSR_FPEXC
1929 // instruction.
1930 if (RegPresent(ARM::FPEXC)) {
1931 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: STI.getInstrInfo()->get(Opcode: ARM::VMSR_FPEXC))
1932 .addReg(RegNo: ARM::R5)
1933 .add(MOs: predOps(Pred: ARMCC::AL))
1934 .setMIFlags(MachineInstr::FrameDestroy);
1935 }
1936}
1937
1938/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1939/// starting from d8. Also insert stack realignment code and leave the stack
1940/// pointer pointing to the d8 spill slot.
1941static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
1942 MachineBasicBlock::iterator MI,
1943 unsigned NumAlignedDPRCS2Regs,
1944 ArrayRef<CalleeSavedInfo> CSI,
1945 const TargetRegisterInfo *TRI) {
1946 MachineFunction &MF = *MBB.getParent();
1947 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1948 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1949 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1950 MachineFrameInfo &MFI = MF.getFrameInfo();
1951
1952 // Mark the D-register spill slots as properly aligned. Since MFI computes
1953 // stack slot layout backwards, this can actually mean that the d-reg stack
1954 // slot offsets can be wrong. The offset for d8 will always be correct.
1955 for (const CalleeSavedInfo &I : CSI) {
1956 unsigned DNum = I.getReg() - ARM::D8;
1957 if (DNum > NumAlignedDPRCS2Regs - 1)
1958 continue;
1959 int FI = I.getFrameIdx();
1960 // The even-numbered registers will be 16-byte aligned, the odd-numbered
1961 // registers will be 8-byte aligned.
1962 MFI.setObjectAlignment(ObjectIdx: FI, Alignment: DNum % 2 ? Align(8) : Align(16));
1963
1964 // The stack slot for D8 needs to be maximally aligned because this is
1965 // actually the point where we align the stack pointer. MachineFrameInfo
1966 // computes all offsets relative to the incoming stack pointer which is a
1967 // bit weird when realigning the stack. Any extra padding for this
1968 // over-alignment is not realized because the code inserted below adjusts
1969 // the stack pointer by numregs * 8 before aligning the stack pointer.
1970 if (DNum == 0)
1971 MFI.setObjectAlignment(ObjectIdx: FI, Alignment: MFI.getMaxAlign());
1972 }
1973
1974 // Move the stack pointer to the d8 spill slot, and align it at the same
1975 // time. Leave the stack slot address in the scratch register r4.
1976 //
1977 // sub r4, sp, #numregs * 8
1978 // bic r4, r4, #align - 1
1979 // mov sp, r4
1980 //
1981 bool isThumb = AFI->isThumbFunction();
1982 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1983 AFI->setShouldRestoreSPFromFP(true);
1984
1985 // sub r4, sp, #numregs * 8
1986 // The immediate is <= 64, so it doesn't need any special encoding.
1987 unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1988 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: ARM::R4)
1989 .addReg(RegNo: ARM::SP)
1990 .addImm(Val: 8 * NumAlignedDPRCS2Regs)
1991 .add(MOs: predOps(Pred: ARMCC::AL))
1992 .add(MO: condCodeOp());
1993
1994 Align MaxAlign = MF.getFrameInfo().getMaxAlign();
1995 // We must set parameter MustBeSingleInstruction to true, since
1996 // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1997 // stack alignment. Luckily, this can always be done since all ARM
1998 // architecture versions that support Neon also support the BFC
1999 // instruction.
2000 emitAligningInstructions(MF, AFI, TII, MBB, MBBI: MI, DL, Reg: ARM::R4, Alignment: MaxAlign, MustBeSingleInstruction: true);
2001
2002 // mov sp, r4
2003 // The stack pointer must be adjusted before spilling anything, otherwise
2004 // the stack slots could be clobbered by an interrupt handler.
2005 // Leave r4 live, it is used below.
2006 Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
2007 MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: ARM::SP)
2008 .addReg(RegNo: ARM::R4)
2009 .add(MOs: predOps(Pred: ARMCC::AL));
2010 if (!isThumb)
2011 MIB.add(MO: condCodeOp());
2012
2013 // Now spill NumAlignedDPRCS2Regs registers starting from d8.
2014 // r4 holds the stack slot address.
2015 unsigned NextReg = ARM::D8;
2016
2017 // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
2018 // The writeback is only needed when emitting two vst1.64 instructions.
2019 if (NumAlignedDPRCS2Regs >= 6) {
2020 MCRegister SupReg =
2021 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QQPRRegClass);
2022 MBB.addLiveIn(PhysReg: SupReg);
2023 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VST1d64Qwb_fixed), DestReg: ARM::R4)
2024 .addReg(RegNo: ARM::R4, flags: RegState::Kill)
2025 .addImm(Val: 16)
2026 .addReg(RegNo: NextReg)
2027 .addReg(RegNo: SupReg, flags: RegState::ImplicitKill)
2028 .add(MOs: predOps(Pred: ARMCC::AL));
2029 NextReg += 4;
2030 NumAlignedDPRCS2Regs -= 4;
2031 }
2032
2033 // We won't modify r4 beyond this point. It currently points to the next
2034 // register to be spilled.
2035 unsigned R4BaseReg = NextReg;
2036
2037 // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
2038 if (NumAlignedDPRCS2Regs >= 4) {
2039 MCRegister SupReg =
2040 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QQPRRegClass);
2041 MBB.addLiveIn(PhysReg: SupReg);
2042 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VST1d64Q))
2043 .addReg(RegNo: ARM::R4)
2044 .addImm(Val: 16)
2045 .addReg(RegNo: NextReg)
2046 .addReg(RegNo: SupReg, flags: RegState::ImplicitKill)
2047 .add(MOs: predOps(Pred: ARMCC::AL));
2048 NextReg += 4;
2049 NumAlignedDPRCS2Regs -= 4;
2050 }
2051
2052 // 16-byte aligned vst1.64 with 2 d-regs.
2053 if (NumAlignedDPRCS2Regs >= 2) {
2054 MCRegister SupReg =
2055 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QPRRegClass);
2056 MBB.addLiveIn(PhysReg: SupReg);
2057 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VST1q64))
2058 .addReg(RegNo: ARM::R4)
2059 .addImm(Val: 16)
2060 .addReg(RegNo: SupReg)
2061 .add(MOs: predOps(Pred: ARMCC::AL));
2062 NextReg += 2;
2063 NumAlignedDPRCS2Regs -= 2;
2064 }
2065
2066 // Finally, use a vanilla vstr.64 for the odd last register.
2067 if (NumAlignedDPRCS2Regs) {
2068 MBB.addLiveIn(PhysReg: NextReg);
2069 // vstr.64 uses addrmode5 which has an offset scale of 4.
2070 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VSTRD))
2071 .addReg(RegNo: NextReg)
2072 .addReg(RegNo: ARM::R4)
2073 .addImm(Val: (NextReg - R4BaseReg) * 2)
2074 .add(MOs: predOps(Pred: ARMCC::AL));
2075 }
2076
2077 // The last spill instruction inserted should kill the scratch register r4.
2078 std::prev(x: MI)->addRegisterKilled(IncomingReg: ARM::R4, RegInfo: TRI);
2079}
2080
2081/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
2082/// iterator to the following instruction.
2083static MachineBasicBlock::iterator
2084skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
2085 unsigned NumAlignedDPRCS2Regs) {
2086 // sub r4, sp, #numregs * 8
2087 // bic r4, r4, #align - 1
2088 // mov sp, r4
2089 ++MI; ++MI; ++MI;
2090 assert(MI->mayStore() && "Expecting spill instruction");
2091
2092 // These switches all fall through.
2093 switch(NumAlignedDPRCS2Regs) {
2094 case 7:
2095 ++MI;
2096 assert(MI->mayStore() && "Expecting spill instruction");
2097 [[fallthrough]];
2098 default:
2099 ++MI;
2100 assert(MI->mayStore() && "Expecting spill instruction");
2101 [[fallthrough]];
2102 case 1:
2103 case 2:
2104 case 4:
2105 assert(MI->killsRegister(ARM::R4, /*TRI=*/nullptr) && "Missed kill flag");
2106 ++MI;
2107 }
2108 return MI;
2109}
2110
2111/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
2112/// starting from d8. These instructions are assumed to execute while the
2113/// stack is still aligned, unlike the code inserted by emitPopInst.
2114static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
2115 MachineBasicBlock::iterator MI,
2116 unsigned NumAlignedDPRCS2Regs,
2117 ArrayRef<CalleeSavedInfo> CSI,
2118 const TargetRegisterInfo *TRI) {
2119 MachineFunction &MF = *MBB.getParent();
2120 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2121 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
2122 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2123
2124 // Find the frame index assigned to d8.
2125 int D8SpillFI = 0;
2126 for (const CalleeSavedInfo &I : CSI)
2127 if (I.getReg() == ARM::D8) {
2128 D8SpillFI = I.getFrameIdx();
2129 break;
2130 }
2131
2132 // Materialize the address of the d8 spill slot into the scratch register r4.
2133 // This can be fairly complicated if the stack frame is large, so just use
2134 // the normal frame index elimination mechanism to do it. This code runs as
2135 // the initial part of the epilog where the stack and base pointers haven't
2136 // been changed yet.
2137 bool isThumb = AFI->isThumbFunction();
2138 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
2139
2140 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
2141 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: ARM::R4)
2142 .addFrameIndex(Idx: D8SpillFI)
2143 .addImm(Val: 0)
2144 .add(MOs: predOps(Pred: ARMCC::AL))
2145 .add(MO: condCodeOp());
2146
2147 // Now restore NumAlignedDPRCS2Regs registers starting from d8.
2148 unsigned NextReg = ARM::D8;
2149
2150 // 16-byte aligned vld1.64 with 4 d-regs and writeback.
2151 if (NumAlignedDPRCS2Regs >= 6) {
2152 MCRegister SupReg =
2153 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QQPRRegClass);
2154 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLD1d64Qwb_fixed), DestReg: NextReg)
2155 .addReg(RegNo: ARM::R4, flags: RegState::Define)
2156 .addReg(RegNo: ARM::R4, flags: RegState::Kill)
2157 .addImm(Val: 16)
2158 .addReg(RegNo: SupReg, flags: RegState::ImplicitDefine)
2159 .add(MOs: predOps(Pred: ARMCC::AL));
2160 NextReg += 4;
2161 NumAlignedDPRCS2Regs -= 4;
2162 }
2163
2164 // We won't modify r4 beyond this point. It currently points to the next
2165 // register to be spilled.
2166 unsigned R4BaseReg = NextReg;
2167
2168 // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
2169 if (NumAlignedDPRCS2Regs >= 4) {
2170 MCRegister SupReg =
2171 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QQPRRegClass);
2172 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLD1d64Q), DestReg: NextReg)
2173 .addReg(RegNo: ARM::R4)
2174 .addImm(Val: 16)
2175 .addReg(RegNo: SupReg, flags: RegState::ImplicitDefine)
2176 .add(MOs: predOps(Pred: ARMCC::AL));
2177 NextReg += 4;
2178 NumAlignedDPRCS2Regs -= 4;
2179 }
2180
2181 // 16-byte aligned vld1.64 with 2 d-regs.
2182 if (NumAlignedDPRCS2Regs >= 2) {
2183 MCRegister SupReg =
2184 TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QPRRegClass);
2185 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLD1q64), DestReg: SupReg)
2186 .addReg(RegNo: ARM::R4)
2187 .addImm(Val: 16)
2188 .add(MOs: predOps(Pred: ARMCC::AL));
2189 NextReg += 2;
2190 NumAlignedDPRCS2Regs -= 2;
2191 }
2192
2193 // Finally, use a vanilla vldr.64 for the remaining odd register.
2194 if (NumAlignedDPRCS2Regs)
2195 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLDRD), DestReg: NextReg)
2196 .addReg(RegNo: ARM::R4)
2197 .addImm(Val: 2 * (NextReg - R4BaseReg))
2198 .add(MOs: predOps(Pred: ARMCC::AL));
2199
2200 // Last store kills r4.
2201 std::prev(x: MI)->addRegisterKilled(IncomingReg: ARM::R4, RegInfo: TRI);
2202}
2203
2204bool ARMFrameLowering::spillCalleeSavedRegisters(
2205 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2206 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2207 if (CSI.empty())
2208 return false;
2209
2210 MachineFunction &MF = *MBB.getParent();
2211 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2212 ARMSubtarget::PushPopSplitVariation PushPopSplit =
2213 STI.getPushPopSplitVariation(MF);
2214 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
2215
2216 unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
2217 unsigned PushOneOpc = AFI->isThumbFunction() ?
2218 ARM::t2STR_PRE : ARM::STR_PRE_IMM;
2219 unsigned FltOpc = ARM::VSTMDDB_UPD;
2220 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2221 // Compute PAC in R12.
2222 if (AFI->shouldSignReturnAddress()) {
2223 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: STI.getInstrInfo()->get(Opcode: ARM::t2PAC))
2224 .setMIFlags(MachineInstr::FrameSetup);
2225 }
2226 // Save the non-secure floating point context.
2227 if (llvm::any_of(Range&: CSI, P: [](const CalleeSavedInfo &C) {
2228 return C.getReg() == ARM::FPCXTNS;
2229 })) {
2230 BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: STI.getInstrInfo()->get(Opcode: ARM::VSTR_FPCXTNS_pre),
2231 DestReg: ARM::SP)
2232 .addReg(RegNo: ARM::SP)
2233 .addImm(Val: -4)
2234 .add(MOs: predOps(Pred: ARMCC::AL));
2235 }
2236
2237 auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
2238 RegInfo](unsigned Reg, SpillArea TestArea) {
2239 return getSpillArea(Reg, Variation: PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
2240 TestArea;
2241 };
2242 auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
2243 return CheckRegArea(Reg, SpillArea::GPRCS1);
2244 };
2245 auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
2246 return CheckRegArea(Reg, SpillArea::GPRCS2);
2247 };
2248 auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
2249 return CheckRegArea(Reg, SpillArea::DPRCS1);
2250 };
2251 auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
2252 return CheckRegArea(Reg, SpillArea::GPRCS3);
2253 };
2254
2255 emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false, Func: IsGPRCS1);
2256 emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false, Func: IsGPRCS2);
2257 emitFPStatusSaves(MBB, MI, CSI, PushOpc);
2258 emitPushInst(MBB, MI, CSI, StmOpc: FltOpc, StrOpc: 0, NoGap: true, Func: IsDPRCS1);
2259 emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false, Func: IsGPRCS3);
2260
2261 // The code above does not insert spill code for the aligned DPRCS2 registers.
2262 // The stack realignment code will be inserted between the push instructions
2263 // and these spills.
2264 if (NumAlignedDPRCS2Regs)
2265 emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2266
2267 return true;
2268}
2269
2270bool ARMFrameLowering::restoreCalleeSavedRegisters(
2271 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2272 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2273 if (CSI.empty())
2274 return false;
2275
2276 MachineFunction &MF = *MBB.getParent();
2277 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2278 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
2279
2280 bool isVarArg = AFI->getArgRegsSaveSize() > 0;
2281 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2282 ARMSubtarget::PushPopSplitVariation PushPopSplit =
2283 STI.getPushPopSplitVariation(MF);
2284
2285 // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2286 // registers. Do that here instead.
2287 if (NumAlignedDPRCS2Regs)
2288 emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2289
2290 unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
2291 unsigned LdrOpc =
2292 AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
2293 unsigned FltOpc = ARM::VLDMDIA_UPD;
2294
2295 auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
2296 RegInfo](unsigned Reg, SpillArea TestArea) {
2297 return getSpillArea(Reg, Variation: PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
2298 TestArea;
2299 };
2300 auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
2301 return CheckRegArea(Reg, SpillArea::GPRCS1);
2302 };
2303 auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
2304 return CheckRegArea(Reg, SpillArea::GPRCS2);
2305 };
2306 auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
2307 return CheckRegArea(Reg, SpillArea::DPRCS1);
2308 };
2309 auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
2310 return CheckRegArea(Reg, SpillArea::GPRCS3);
2311 };
2312
2313 emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false, Func: IsGPRCS3);
2314 emitPopInst(MBB, MI, CSI, LdmOpc: FltOpc, LdrOpc: 0, isVarArg, NoGap: true, Func: IsDPRCS1);
2315 emitFPStatusRestores(MBB, MI, CSI, LdmOpc: PopOpc);
2316 emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false, Func: IsGPRCS2);
2317 emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false, Func: IsGPRCS1);
2318
2319 return true;
2320}
2321
2322// FIXME: Make generic?
2323static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF,
2324 const ARMBaseInstrInfo &TII) {
2325 unsigned FnSize = 0;
2326 for (auto &MBB : MF) {
2327 for (auto &MI : MBB)
2328 FnSize += TII.getInstSizeInBytes(MI);
2329 }
2330 if (MF.getJumpTableInfo())
2331 for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
2332 FnSize += Table.MBBs.size() * 4;
2333 FnSize += MF.getConstantPool()->getConstants().size() * 4;
2334 return FnSize;
2335}
2336
2337/// estimateRSStackSizeLimit - Look at each instruction that references stack
2338/// frames and return the stack size limit beyond which some of these
2339/// instructions will require a scratch register during their expansion later.
2340// FIXME: Move to TII?
2341static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
2342 const TargetFrameLowering *TFI,
2343 bool &HasNonSPFrameIndex) {
2344 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2345 const ARMBaseInstrInfo &TII =
2346 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2347 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2348 unsigned Limit = (1 << 12) - 1;
2349 for (auto &MBB : MF) {
2350 for (auto &MI : MBB) {
2351 if (MI.isDebugInstr())
2352 continue;
2353 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
2354 if (!MI.getOperand(i).isFI())
2355 continue;
2356
2357 // When using ADDri to get the address of a stack object, 255 is the
2358 // largest offset guaranteed to fit in the immediate offset.
2359 if (MI.getOpcode() == ARM::ADDri) {
2360 Limit = std::min(a: Limit, b: (1U << 8) - 1);
2361 break;
2362 }
2363 // t2ADDri will not require an extra register, it can reuse the
2364 // destination.
2365 if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
2366 break;
2367
2368 const MCInstrDesc &MCID = MI.getDesc();
2369 const TargetRegisterClass *RegClass = TII.getRegClass(MCID, OpNum: i, TRI, MF);
2370 if (RegClass && !RegClass->contains(Reg: ARM::SP))
2371 HasNonSPFrameIndex = true;
2372
2373 // Otherwise check the addressing mode.
2374 switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
2375 case ARMII::AddrMode_i12:
2376 case ARMII::AddrMode2:
2377 // Default 12 bit limit.
2378 break;
2379 case ARMII::AddrMode3:
2380 case ARMII::AddrModeT2_i8neg:
2381 Limit = std::min(a: Limit, b: (1U << 8) - 1);
2382 break;
2383 case ARMII::AddrMode5FP16:
2384 Limit = std::min(a: Limit, b: ((1U << 8) - 1) * 2);
2385 break;
2386 case ARMII::AddrMode5:
2387 case ARMII::AddrModeT2_i8s4:
2388 case ARMII::AddrModeT2_ldrex:
2389 Limit = std::min(a: Limit, b: ((1U << 8) - 1) * 4);
2390 break;
2391 case ARMII::AddrModeT2_i12:
2392 // i12 supports only positive offset so these will be converted to
2393 // i8 opcodes. See llvm::rewriteT2FrameIndex.
2394 if (TFI->hasFP(MF) && AFI->hasStackFrame())
2395 Limit = std::min(a: Limit, b: (1U << 8) - 1);
2396 break;
2397 case ARMII::AddrMode4:
2398 case ARMII::AddrMode6:
2399 // Addressing modes 4 & 6 (load/store) instructions can't encode an
2400 // immediate offset for stack references.
2401 return 0;
2402 case ARMII::AddrModeT2_i7:
2403 Limit = std::min(a: Limit, b: ((1U << 7) - 1) * 1);
2404 break;
2405 case ARMII::AddrModeT2_i7s2:
2406 Limit = std::min(a: Limit, b: ((1U << 7) - 1) * 2);
2407 break;
2408 case ARMII::AddrModeT2_i7s4:
2409 Limit = std::min(a: Limit, b: ((1U << 7) - 1) * 4);
2410 break;
2411 default:
2412 llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2413 }
2414 break; // At most one FI per instruction
2415 }
2416 }
2417 }
2418
2419 return Limit;
2420}
2421
2422// In functions that realign the stack, it can be an advantage to spill the
2423// callee-saved vector registers after realigning the stack. The vst1 and vld1
2424// instructions take alignment hints that can improve performance.
2425static void
2426checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
2427 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
2428 if (!SpillAlignedNEONRegs)
2429 return;
2430
2431 // Naked functions don't spill callee-saved registers.
2432 if (MF.getFunction().hasFnAttribute(Kind: Attribute::Naked))
2433 return;
2434
2435 // We are planning to use NEON instructions vst1 / vld1.
2436 if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2437 return;
2438
2439 // Don't bother if the default stack alignment is sufficiently high.
2440 if (MF.getSubtarget().getFrameLowering()->getStackAlign() >= Align(8))
2441 return;
2442
2443 // Aligned spills require stack realignment.
2444 if (!static_cast<const ARMBaseRegisterInfo *>(
2445 MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
2446 return;
2447
2448 // We always spill contiguous d-registers starting from d8. Count how many
2449 // needs spilling. The register allocator will almost always use the
2450 // callee-saved registers in order, but it can happen that there are holes in
2451 // the range. Registers above the hole will be spilled to the standard DPRCS
2452 // area.
2453 unsigned NumSpills = 0;
2454 for (; NumSpills < 8; ++NumSpills)
2455 if (!SavedRegs.test(Idx: ARM::D8 + NumSpills))
2456 break;
2457
2458 // Don't do this for just one d-register. It's not worth it.
2459 if (NumSpills < 2)
2460 return;
2461
2462 // Spill the first NumSpills D-registers after realigning the stack.
2463 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2464
2465 // A scratch register is required for the vst1 / vld1 instructions.
2466 SavedRegs.set(ARM::R4);
2467}
2468
2469bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2470 // For CMSE entry functions, we want to save the FPCXT_NS immediately
2471 // upon function entry (resp. restore it immmediately before return)
2472 if (STI.hasV8_1MMainlineOps() &&
2473 MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction())
2474 return false;
2475
2476 // We are disabling shrinkwrapping for now when PAC is enabled, as
2477 // shrinkwrapping can cause clobbering of r12 when the PAC code is
2478 // generated. A follow-up patch will fix this in a more performant manner.
2479 if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(
2480 SpillsLR: true /* SpillsLR */))
2481 return false;
2482
2483 return true;
2484}
2485
2486bool ARMFrameLowering::requiresAAPCSFrameRecord(
2487 const MachineFunction &MF) const {
2488 const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
2489 return Subtarget.createAAPCSFrameChain() && hasFP(MF);
2490}
2491
2492// Thumb1 may require a spill when storing to a frame index through FP (or any
2493// access with execute-only), for cases where FP is a high register (R11). This
2494// scans the function for cases where this may happen.
2495static bool canSpillOnFrameIndexAccess(const MachineFunction &MF,
2496 const TargetFrameLowering &TFI) {
2497 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2498 if (!AFI->isThumb1OnlyFunction())
2499 return false;
2500
2501 const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
2502 for (const auto &MBB : MF)
2503 for (const auto &MI : MBB)
2504 if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi ||
2505 STI.genExecuteOnly())
2506 for (const auto &Op : MI.operands())
2507 if (Op.isFI()) {
2508 Register Reg;
2509 TFI.getFrameIndexReference(MF, FI: Op.getIndex(), FrameReg&: Reg);
2510 if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
2511 return true;
2512 }
2513 return false;
2514}
2515
2516void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
2517 BitVector &SavedRegs,
2518 RegScavenger *RS) const {
2519 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2520 // This tells PEI to spill the FP as if it is any other callee-save register
2521 // to take advantage the eliminateFrameIndex machinery. This also ensures it
2522 // is spilled in the order specified by getCalleeSavedRegs() to make it easier
2523 // to combine multiple loads / stores.
2524 bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF)) &&
2525 !MF.getTarget().Options.DisableFramePointerElim(MF);
2526 bool CS1Spilled = false;
2527 bool LRSpilled = false;
2528 unsigned NumGPRSpills = 0;
2529 unsigned NumFPRSpills = 0;
2530 SmallVector<unsigned, 4> UnspilledCS1GPRs;
2531 SmallVector<unsigned, 4> UnspilledCS2GPRs;
2532 const Function &F = MF.getFunction();
2533 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
2534 MF.getSubtarget().getRegisterInfo());
2535 const ARMBaseInstrInfo &TII =
2536 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2537 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2538 MachineFrameInfo &MFI = MF.getFrameInfo();
2539 MachineRegisterInfo &MRI = MF.getRegInfo();
2540 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2541 (void)TRI; // Silence unused warning in non-assert builds.
2542 Register FramePtr = RegInfo->getFrameRegister(MF);
2543 ARMSubtarget::PushPopSplitVariation PushPopSplit =
2544 STI.getPushPopSplitVariation(MF);
2545
2546 // For a floating point interrupt, save these registers always, since LLVM
2547 // currently doesn't model reads/writes to these registers.
2548 if (F.hasFnAttribute(Kind: "interrupt") && F.hasFnAttribute(Kind: "save-fp")) {
2549 SavedRegs.set(ARM::FPSCR);
2550 SavedRegs.set(ARM::R4);
2551
2552 // This register will only be present on non-MClass registers.
2553 if (STI.isMClass()) {
2554 SavedRegs.reset(Idx: ARM::FPEXC);
2555 } else {
2556 SavedRegs.set(ARM::FPEXC);
2557 SavedRegs.set(ARM::R5);
2558 }
2559 }
2560
2561 // Spill R4 if Thumb2 function requires stack realignment - it will be used as
2562 // scratch register. Also spill R4 if Thumb2 function has varsized objects,
2563 // since it's not always possible to restore sp from fp in a single
2564 // instruction.
2565 // FIXME: It will be better just to find spare register here.
2566 if (AFI->isThumb2Function() &&
2567 (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)))
2568 SavedRegs.set(ARM::R4);
2569
2570 // If a stack probe will be emitted, spill R4 and LR, since they are
2571 // clobbered by the stack probe call.
2572 // This estimate should be a safe, conservative estimate. The actual
2573 // stack probe is enabled based on the size of the local objects;
2574 // this estimate also includes the varargs store size.
2575 if (STI.isTargetWindows() &&
2576 WindowsRequiresStackProbe(MF, StackSizeInBytes: MFI.estimateStackSize(MF))) {
2577 SavedRegs.set(ARM::R4);
2578 SavedRegs.set(ARM::LR);
2579 }
2580
2581 if (AFI->isThumb1OnlyFunction()) {
2582 // Spill LR if Thumb1 function uses variable length argument lists.
2583 if (AFI->getArgRegsSaveSize() > 0)
2584 SavedRegs.set(ARM::LR);
2585
2586 // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2587 // requires stack alignment. We don't know for sure what the stack size
2588 // will be, but for this, an estimate is good enough. If there anything
2589 // changes it, it'll be a spill, which implies we've used all the registers
2590 // and so R4 is already used, so not marking it here will be OK.
2591 // FIXME: It will be better just to find spare register here.
2592 if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||
2593 MFI.estimateStackSize(MF) > 508)
2594 SavedRegs.set(ARM::R4);
2595 }
2596
2597 // See if we can spill vector registers to aligned stack.
2598 checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2599
2600 // Spill the BasePtr if it's used.
2601 if (RegInfo->hasBasePointer(MF))
2602 SavedRegs.set(RegInfo->getBaseRegister());
2603
2604 // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2605 if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2606 CanEliminateFrame = false;
2607
2608 // When return address signing is enabled R12 is treated as callee-saved.
2609 if (AFI->shouldSignReturnAddress())
2610 CanEliminateFrame = false;
2611
2612 // Don't spill FP if the frame can be eliminated. This is determined
2613 // by scanning the callee-save registers to see if any is modified.
2614 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF: &MF);
2615 for (unsigned i = 0; CSRegs[i]; ++i) {
2616 unsigned Reg = CSRegs[i];
2617 bool Spilled = false;
2618 if (SavedRegs.test(Idx: Reg)) {
2619 Spilled = true;
2620 CanEliminateFrame = false;
2621 }
2622
2623 if (!ARM::GPRRegClass.contains(Reg)) {
2624 if (Spilled) {
2625 if (ARM::SPRRegClass.contains(Reg))
2626 NumFPRSpills++;
2627 else if (ARM::DPRRegClass.contains(Reg))
2628 NumFPRSpills += 2;
2629 else if (ARM::QPRRegClass.contains(Reg))
2630 NumFPRSpills += 4;
2631 }
2632 continue;
2633 }
2634
2635 if (Spilled) {
2636 NumGPRSpills++;
2637
2638 if (PushPopSplit != ARMSubtarget::SplitR7) {
2639 if (Reg == ARM::LR)
2640 LRSpilled = true;
2641 CS1Spilled = true;
2642 continue;
2643 }
2644
2645 // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2646 switch (Reg) {
2647 case ARM::LR:
2648 LRSpilled = true;
2649 [[fallthrough]];
2650 case ARM::R0: case ARM::R1:
2651 case ARM::R2: case ARM::R3:
2652 case ARM::R4: case ARM::R5:
2653 case ARM::R6: case ARM::R7:
2654 CS1Spilled = true;
2655 break;
2656 default:
2657 break;
2658 }
2659 } else {
2660 if (PushPopSplit != ARMSubtarget::SplitR7) {
2661 UnspilledCS1GPRs.push_back(Elt: Reg);
2662 continue;
2663 }
2664
2665 switch (Reg) {
2666 case ARM::R0: case ARM::R1:
2667 case ARM::R2: case ARM::R3:
2668 case ARM::R4: case ARM::R5:
2669 case ARM::R6: case ARM::R7:
2670 case ARM::LR:
2671 UnspilledCS1GPRs.push_back(Elt: Reg);
2672 break;
2673 default:
2674 UnspilledCS2GPRs.push_back(Elt: Reg);
2675 break;
2676 }
2677 }
2678 }
2679
2680 bool ForceLRSpill = false;
2681 if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2682 unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2683 // Force LR to be spilled if the Thumb function size is > 2048. This enables
2684 // use of BL to implement far jump.
2685 if (FnSize >= (1 << 11)) {
2686 CanEliminateFrame = false;
2687 ForceLRSpill = true;
2688 }
2689 }
2690
2691 // If any of the stack slot references may be out of range of an immediate
2692 // offset, make sure a register (or a spill slot) is available for the
2693 // register scavenger. Note that if we're indexing off the frame pointer, the
2694 // effective stack size is 4 bytes larger since the FP points to the stack
2695 // slot of the previous FP. Also, if we have variable sized objects in the
2696 // function, stack slot references will often be negative, and some of
2697 // our instructions are positive-offset only, so conservatively consider
2698 // that case to want a spill slot (or register) as well. Similarly, if
2699 // the function adjusts the stack pointer during execution and the
2700 // adjustments aren't already part of our stack size estimate, our offset
2701 // calculations may be off, so be conservative.
2702 // FIXME: We could add logic to be more precise about negative offsets
2703 // and which instructions will need a scratch register for them. Is it
2704 // worth the effort and added fragility?
2705 unsigned EstimatedStackSize =
2706 MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
2707
2708 // Determine biggest (positive) SP offset in MachineFrameInfo.
2709 int MaxFixedOffset = 0;
2710 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
2711 int MaxObjectOffset = MFI.getObjectOffset(ObjectIdx: I) + MFI.getObjectSize(ObjectIdx: I);
2712 MaxFixedOffset = std::max(a: MaxFixedOffset, b: MaxObjectOffset);
2713 }
2714
2715 bool HasFP = hasFP(MF);
2716 if (HasFP) {
2717 if (AFI->hasStackFrame())
2718 EstimatedStackSize += 4;
2719 } else {
2720 // If FP is not used, SP will be used to access arguments, so count the
2721 // size of arguments into the estimation.
2722 EstimatedStackSize += MaxFixedOffset;
2723 }
2724 EstimatedStackSize += 16; // For possible paddings.
2725
2726 unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2727 bool HasNonSPFrameIndex = false;
2728 if (AFI->isThumb1OnlyFunction()) {
2729 // For Thumb1, don't bother to iterate over the function. The only
2730 // instruction that requires an emergency spill slot is a store to a
2731 // frame index.
2732 //
2733 // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2734 // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2735 // a 5-bit unsigned immediate.
2736 //
2737 // We could try to check if the function actually contains a tSTRspi
2738 // that might need the spill slot, but it's not really important.
2739 // Functions with VLAs or extremely large call frames are rare, and
2740 // if a function is allocating more than 1KB of stack, an extra 4-byte
2741 // slot probably isn't relevant.
2742 //
2743 // A special case is the scenario where r11 is used as FP, where accesses
2744 // to a frame index will require its value to be moved into a low reg.
2745 // This is handled later on, once we are able to determine if we have any
2746 // fp-relative accesses.
2747 if (RegInfo->hasBasePointer(MF))
2748 EstimatedRSStackSizeLimit = (1U << 5) * 4;
2749 else
2750 EstimatedRSStackSizeLimit = (1U << 8) * 4;
2751 EstimatedRSFixedSizeLimit = (1U << 5) * 4;
2752 } else {
2753 EstimatedRSStackSizeLimit =
2754 estimateRSStackSizeLimit(MF, TFI: this, HasNonSPFrameIndex);
2755 EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2756 }
2757 // Final estimate of whether sp or bp-relative accesses might require
2758 // scavenging.
2759 bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2760
2761 // If the stack pointer moves and we don't have a base pointer, the
2762 // estimate logic doesn't work. The actual offsets might be larger when
2763 // we're constructing a call frame, or we might need to use negative
2764 // offsets from fp.
2765 bool HasMovingSP = MFI.hasVarSizedObjects() ||
2766 (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2767 bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
2768
2769 // If we have a frame pointer, we assume arguments will be accessed
2770 // relative to the frame pointer. Check whether fp-relative accesses to
2771 // arguments require scavenging.
2772 //
2773 // We could do slightly better on Thumb1; in some cases, an sp-relative
2774 // offset would be legal even though an fp-relative offset is not.
2775 int MaxFPOffset = getMaxFPOffset(STI, AFI: *AFI, MF);
2776 bool HasLargeArgumentList =
2777 HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2778
2779 bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
2780 HasLargeArgumentList || HasNonSPFrameIndex;
2781 LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2782 << "; EstimatedStack: " << EstimatedStackSize
2783 << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2784 << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2785 if (BigFrameOffsets ||
2786 !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
2787 AFI->setHasStackFrame(true);
2788
2789 if (HasFP) {
2790 SavedRegs.set(FramePtr);
2791 // If the frame pointer is required by the ABI, also spill LR so that we
2792 // emit a complete frame record.
2793 if ((requiresAAPCSFrameRecord(MF) ||
2794 MF.getTarget().Options.DisableFramePointerElim(MF)) &&
2795 !LRSpilled) {
2796 SavedRegs.set(ARM::LR);
2797 LRSpilled = true;
2798 NumGPRSpills++;
2799 auto LRPos = llvm::find(Range&: UnspilledCS1GPRs, Val: ARM::LR);
2800 if (LRPos != UnspilledCS1GPRs.end())
2801 UnspilledCS1GPRs.erase(CI: LRPos);
2802 }
2803 auto FPPos = llvm::find(Range&: UnspilledCS1GPRs, Val: FramePtr);
2804 if (FPPos != UnspilledCS1GPRs.end())
2805 UnspilledCS1GPRs.erase(CI: FPPos);
2806 NumGPRSpills++;
2807 if (FramePtr == ARM::R7)
2808 CS1Spilled = true;
2809 }
2810
2811 // This is the number of extra spills inserted for callee-save GPRs which
2812 // would not otherwise be used by the function. When greater than zero it
2813 // guaranteees that it is possible to scavenge a register to hold the
2814 // address of a stack slot. On Thumb1, the register must be a valid operand
2815 // to tSTRi, i.e. r4-r7. For other subtargets, this is any GPR, i.e. r4-r11
2816 // or lr.
2817 //
2818 // If we don't insert a spill, we instead allocate an emergency spill
2819 // slot, which can be used by scavenging to spill an arbitrary register.
2820 //
2821 // We currently don't try to figure out whether any specific instruction
2822 // requires scavening an additional register.
2823 unsigned NumExtraCSSpill = 0;
2824
2825 if (AFI->isThumb1OnlyFunction()) {
2826 // For Thumb1-only targets, we need some low registers when we save and
2827 // restore the high registers (which aren't allocatable, but could be
2828 // used by inline assembly) because the push/pop instructions can not
2829 // access high registers. If necessary, we might need to push more low
2830 // registers to ensure that there is at least one free that can be used
2831 // for the saving & restoring, and preferably we should ensure that as
2832 // many as are needed are available so that fewer push/pop instructions
2833 // are required.
2834
2835 // Low registers which are not currently pushed, but could be (r4-r7).
2836 SmallVector<unsigned, 4> AvailableRegs;
2837
2838 // Unused argument registers (r0-r3) can be clobbered in the prologue for
2839 // free.
2840 int EntryRegDeficit = 0;
2841 for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2842 if (!MF.getRegInfo().isLiveIn(Reg)) {
2843 --EntryRegDeficit;
2844 LLVM_DEBUG(dbgs()
2845 << printReg(Reg, TRI)
2846 << " is unused argument register, EntryRegDeficit = "
2847 << EntryRegDeficit << "\n");
2848 }
2849 }
2850
2851 // Unused return registers can be clobbered in the epilogue for free.
2852 int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
2853 LLVM_DEBUG(dbgs() << AFI->getReturnRegsCount()
2854 << " return regs used, ExitRegDeficit = "
2855 << ExitRegDeficit << "\n");
2856
2857 int RegDeficit = std::max(a: EntryRegDeficit, b: ExitRegDeficit);
2858 LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2859
2860 // r4-r6 can be used in the prologue if they are pushed by the first push
2861 // instruction.
2862 for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2863 if (SavedRegs.test(Idx: Reg)) {
2864 --RegDeficit;
2865 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2866 << " is saved low register, RegDeficit = "
2867 << RegDeficit << "\n");
2868 } else {
2869 AvailableRegs.push_back(Elt: Reg);
2870 LLVM_DEBUG(
2871 dbgs()
2872 << printReg(Reg, TRI)
2873 << " is non-saved low register, adding to AvailableRegs\n");
2874 }
2875 }
2876
2877 // r7 can be used if it is not being used as the frame pointer.
2878 if (!HasFP || FramePtr != ARM::R7) {
2879 if (SavedRegs.test(Idx: ARM::R7)) {
2880 --RegDeficit;
2881 LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2882 << RegDeficit << "\n");
2883 } else {
2884 AvailableRegs.push_back(Elt: ARM::R7);
2885 LLVM_DEBUG(
2886 dbgs()
2887 << "%r7 is non-saved low register, adding to AvailableRegs\n");
2888 }
2889 }
2890
2891 // Each of r8-r11 needs to be copied to a low register, then pushed.
2892 for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2893 if (SavedRegs.test(Idx: Reg)) {
2894 ++RegDeficit;
2895 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2896 << " is saved high register, RegDeficit = "
2897 << RegDeficit << "\n");
2898 }
2899 }
2900
2901 // LR can only be used by PUSH, not POP, and can't be used at all if the
2902 // llvm.returnaddress intrinsic is used. This is only worth doing if we
2903 // are more limited at function entry than exit.
2904 if ((EntryRegDeficit > ExitRegDeficit) &&
2905 !(MF.getRegInfo().isLiveIn(Reg: ARM::LR) &&
2906 MF.getFrameInfo().isReturnAddressTaken())) {
2907 if (SavedRegs.test(Idx: ARM::LR)) {
2908 --RegDeficit;
2909 LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2910 << RegDeficit << "\n");
2911 } else {
2912 AvailableRegs.push_back(Elt: ARM::LR);
2913 LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2914 }
2915 }
2916
2917 // If there are more high registers that need pushing than low registers
2918 // available, push some more low registers so that we can use fewer push
2919 // instructions. This might not reduce RegDeficit all the way to zero,
2920 // because we can only guarantee that r4-r6 are available, but r8-r11 may
2921 // need saving.
2922 LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2923 for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
2924 unsigned Reg = AvailableRegs.pop_back_val();
2925 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2926 << " to make up reg deficit\n");
2927 SavedRegs.set(Reg);
2928 NumGPRSpills++;
2929 CS1Spilled = true;
2930 assert(!MRI.isReserved(Reg) && "Should not be reserved");
2931 if (Reg != ARM::LR && !MRI.isPhysRegUsed(PhysReg: Reg))
2932 NumExtraCSSpill++;
2933 UnspilledCS1GPRs.erase(CI: llvm::find(Range&: UnspilledCS1GPRs, Val: Reg));
2934 if (Reg == ARM::LR)
2935 LRSpilled = true;
2936 }
2937 LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2938 << "\n");
2939 }
2940
2941 // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2942 // restore LR in that case.
2943 bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2944
2945 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2946 // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2947 if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2948 SavedRegs.set(ARM::LR);
2949 NumGPRSpills++;
2950 SmallVectorImpl<unsigned>::iterator LRPos;
2951 LRPos = llvm::find(Range&: UnspilledCS1GPRs, Val: (unsigned)ARM::LR);
2952 if (LRPos != UnspilledCS1GPRs.end())
2953 UnspilledCS1GPRs.erase(CI: LRPos);
2954
2955 ForceLRSpill = false;
2956 if (!MRI.isReserved(PhysReg: ARM::LR) && !MRI.isPhysRegUsed(PhysReg: ARM::LR) &&
2957 !AFI->isThumb1OnlyFunction())
2958 NumExtraCSSpill++;
2959 }
2960
2961 // If stack and double are 8-byte aligned and we are spilling an odd number
2962 // of GPRs, spill one extra callee save GPR so we won't have to pad between
2963 // the integer and double callee save areas.
2964 LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2965 const Align TargetAlign = getStackAlign();
2966 if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
2967 if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2968 for (unsigned Reg : UnspilledCS1GPRs) {
2969 // Don't spill high register if the function is thumb. In the case of
2970 // Windows on ARM, accept R11 (frame pointer)
2971 if (!AFI->isThumbFunction() ||
2972 (STI.isTargetWindows() && Reg == ARM::R11) ||
2973 isARMLowRegister(Reg) ||
2974 (Reg == ARM::LR && !ExpensiveLRRestore)) {
2975 SavedRegs.set(Reg);
2976 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2977 << " to make up alignment\n");
2978 if (!MRI.isReserved(PhysReg: Reg) && !MRI.isPhysRegUsed(PhysReg: Reg) &&
2979 !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2980 NumExtraCSSpill++;
2981 break;
2982 }
2983 }
2984 } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
2985 unsigned Reg = UnspilledCS2GPRs.front();
2986 SavedRegs.set(Reg);
2987 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2988 << " to make up alignment\n");
2989 if (!MRI.isReserved(PhysReg: Reg) && !MRI.isPhysRegUsed(PhysReg: Reg))
2990 NumExtraCSSpill++;
2991 }
2992 }
2993
2994 // Estimate if we might need to scavenge registers at some point in order
2995 // to materialize a stack offset. If so, either spill one additional
2996 // callee-saved register or reserve a special spill slot to facilitate
2997 // register scavenging. Thumb1 needs a spill slot for stack pointer
2998 // adjustments and for frame index accesses when FP is high register,
2999 // even when the frame itself is small.
3000 unsigned RegsNeeded = 0;
3001 if (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, TFI: *this)) {
3002 RegsNeeded++;
3003 // With thumb1 execute-only we may need an additional register for saving
3004 // and restoring the CPSR.
3005 if (AFI->isThumb1OnlyFunction() && STI.genExecuteOnly() && !STI.useMovt())
3006 RegsNeeded++;
3007 }
3008
3009 if (RegsNeeded > NumExtraCSSpill) {
3010 // If any non-reserved CS register isn't spilled, just spill one or two
3011 // extra. That should take care of it!
3012 unsigned NumExtras = TargetAlign.value() / 4;
3013 SmallVector<unsigned, 2> Extras;
3014 while (NumExtras && !UnspilledCS1GPRs.empty()) {
3015 unsigned Reg = UnspilledCS1GPRs.pop_back_val();
3016 if (!MRI.isReserved(PhysReg: Reg) &&
3017 (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
3018 Extras.push_back(Elt: Reg);
3019 NumExtras--;
3020 }
3021 }
3022 // For non-Thumb1 functions, also check for hi-reg CS registers
3023 if (!AFI->isThumb1OnlyFunction()) {
3024 while (NumExtras && !UnspilledCS2GPRs.empty()) {
3025 unsigned Reg = UnspilledCS2GPRs.pop_back_val();
3026 if (!MRI.isReserved(PhysReg: Reg)) {
3027 Extras.push_back(Elt: Reg);
3028 NumExtras--;
3029 }
3030 }
3031 }
3032 if (NumExtras == 0) {
3033 for (unsigned Reg : Extras) {
3034 SavedRegs.set(Reg);
3035 if (!MRI.isPhysRegUsed(PhysReg: Reg))
3036 NumExtraCSSpill++;
3037 }
3038 }
3039 while ((RegsNeeded > NumExtraCSSpill) && RS) {
3040 // Reserve a slot closest to SP or frame pointer.
3041 LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
3042 const TargetRegisterClass &RC = ARM::GPRRegClass;
3043 unsigned Size = TRI->getSpillSize(RC);
3044 Align Alignment = TRI->getSpillAlign(RC);
3045 RS->addScavengingFrameIndex(
3046 FI: MFI.CreateSpillStackObject(Size, Alignment));
3047 --RegsNeeded;
3048 }
3049 }
3050 }
3051
3052 if (ForceLRSpill)
3053 SavedRegs.set(ARM::LR);
3054 AFI->setLRIsSpilled(SavedRegs.test(Idx: ARM::LR));
3055}
3056
3057void ARMFrameLowering::updateLRRestored(MachineFunction &MF) {
3058 MachineFrameInfo &MFI = MF.getFrameInfo();
3059 if (!MFI.isCalleeSavedInfoValid())
3060 return;
3061
3062 // Check if all terminators do not implicitly use LR. Then we can 'restore' LR
3063 // into PC so it is not live out of the return block: Clear the Restored bit
3064 // in that case.
3065 for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
3066 if (Info.getReg() != ARM::LR)
3067 continue;
3068 if (all_of(Range&: MF, P: [](const MachineBasicBlock &MBB) {
3069 return all_of(Range: MBB.terminators(), P: [](const MachineInstr &Term) {
3070 return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET ||
3071 Term.getOpcode() == ARM::t2LDMIA_RET ||
3072 Term.getOpcode() == ARM::tPOP_RET;
3073 });
3074 })) {
3075 Info.setRestored(false);
3076 break;
3077 }
3078 }
3079}
3080
3081void ARMFrameLowering::processFunctionBeforeFrameFinalized(
3082 MachineFunction &MF, RegScavenger *RS) const {
3083 TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS);
3084 updateLRRestored(MF);
3085}
3086
3087void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF,
3088 BitVector &SavedRegs) const {
3089 TargetFrameLowering::getCalleeSaves(MF, SavedRegs);
3090
3091 // If we have the "returned" parameter attribute which guarantees that we
3092 // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
3093 // record that fact for IPRA.
3094 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3095 if (AFI->getPreservesR0())
3096 SavedRegs.set(ARM::R0);
3097}
3098
3099bool ARMFrameLowering::assignCalleeSavedSpillSlots(
3100 MachineFunction &MF, const TargetRegisterInfo *TRI,
3101 std::vector<CalleeSavedInfo> &CSI) const {
3102 // For CMSE entry functions, handle floating-point context as if it was a
3103 // callee-saved register.
3104 if (STI.hasV8_1MMainlineOps() &&
3105 MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction()) {
3106 CSI.emplace_back(args: ARM::FPCXTNS);
3107 CSI.back().setRestored(false);
3108 }
3109
3110 // For functions, which sign their return address, upon function entry, the
3111 // return address PAC is computed in R12. Treat R12 as a callee-saved register
3112 // in this case.
3113 const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
3114 if (AFI.shouldSignReturnAddress()) {
3115 // The order of register must match the order we push them, because the
3116 // PEI assigns frame indices in that order. That order depends on the
3117 // PushPopSplitVariation, there are only two cases which we use with return
3118 // address signing:
3119 switch (STI.getPushPopSplitVariation(MF)) {
3120 case ARMSubtarget::SplitR7:
3121 // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
3122 CSI.insert(position: find_if(Range&: CSI,
3123 P: [=](const auto &CS) {
3124 MCRegister Reg = CS.getReg();
3125 return Reg == ARM::R10 || Reg == ARM::R11 ||
3126 Reg == ARM::R8 || Reg == ARM::R9 ||
3127 ARM::DPRRegClass.contains(Reg);
3128 }),
3129 x: CalleeSavedInfo(ARM::R12));
3130 break;
3131 case ARMSubtarget::SplitR11AAPCSSignRA:
3132 // With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR
3133 // on the stack.
3134 CSI.insert(position: CSI.begin(), x: CalleeSavedInfo(ARM::R12));
3135 break;
3136 case ARMSubtarget::NoSplit:
3137 assert(!MF.getTarget().Options.DisableFramePointerElim(MF) &&
3138 "ABI-required frame pointers need a CSR split when signing return "
3139 "address.");
3140 CSI.insert(position: find_if(Range&: CSI,
3141 P: [=](const auto &CS) {
3142 MCRegister Reg = CS.getReg();
3143 return Reg != ARM::LR;
3144 }),
3145 x: CalleeSavedInfo(ARM::R12));
3146 break;
3147 default:
3148 llvm_unreachable("Unexpected CSR split with return address signing");
3149 }
3150 }
3151
3152 return false;
3153}
3154
3155const TargetFrameLowering::SpillSlot *
3156ARMFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
3157 static const SpillSlot FixedSpillOffsets[] = {{.Reg: ARM::FPCXTNS, .Offset: -4}};
3158 NumEntries = std::size(FixedSpillOffsets);
3159 return FixedSpillOffsets;
3160}
3161
3162MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
3163 MachineFunction &MF, MachineBasicBlock &MBB,
3164 MachineBasicBlock::iterator I) const {
3165 const ARMBaseInstrInfo &TII =
3166 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3167 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3168 bool isARM = !AFI->isThumbFunction();
3169 DebugLoc dl = I->getDebugLoc();
3170 unsigned Opc = I->getOpcode();
3171 bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
3172 unsigned CalleePopAmount = IsDestroy ? I->getOperand(i: 1).getImm() : 0;
3173
3174 assert(!AFI->isThumb1OnlyFunction() &&
3175 "This eliminateCallFramePseudoInstr does not support Thumb1!");
3176
3177 int PIdx = I->findFirstPredOperandIdx();
3178 ARMCC::CondCodes Pred = (PIdx == -1)
3179 ? ARMCC::AL
3180 : (ARMCC::CondCodes)I->getOperand(i: PIdx).getImm();
3181 unsigned PredReg = TII.getFramePred(MI: *I);
3182
3183 if (!hasReservedCallFrame(MF)) {
3184 // Bail early if the callee is expected to do the adjustment.
3185 if (IsDestroy && CalleePopAmount != -1U)
3186 return MBB.erase(I);
3187
3188 // If we have alloca, convert as follows:
3189 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
3190 // ADJCALLSTACKUP -> add, sp, sp, amount
3191 unsigned Amount = TII.getFrameSize(I: *I);
3192 if (Amount != 0) {
3193 // We need to keep the stack aligned properly. To do this, we round the
3194 // amount of space needed for the outgoing arguments up to the next
3195 // alignment boundary.
3196 Amount = alignSPAdjust(SPAdj: Amount);
3197
3198 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
3199 emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: -Amount, MIFlags: MachineInstr::NoFlags,
3200 Pred, PredReg);
3201 } else {
3202 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
3203 emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: Amount, MIFlags: MachineInstr::NoFlags,
3204 Pred, PredReg);
3205 }
3206 }
3207 } else if (CalleePopAmount != -1U) {
3208 // If the calling convention demands that the callee pops arguments from the
3209 // stack, we want to add it back if we have a reserved call frame.
3210 emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: -CalleePopAmount,
3211 MIFlags: MachineInstr::NoFlags, Pred, PredReg);
3212 }
3213 return MBB.erase(I);
3214}
3215
3216/// Get the minimum constant for ARM that is greater than or equal to the
3217/// argument. In ARM, constants can have any value that can be produced by
3218/// rotating an 8-bit value to the right by an even number of bits within a
3219/// 32-bit word.
3220static uint32_t alignToARMConstant(uint32_t Value) {
3221 unsigned Shifted = 0;
3222
3223 if (Value == 0)
3224 return 0;
3225
3226 while (!(Value & 0xC0000000)) {
3227 Value = Value << 2;
3228 Shifted += 2;
3229 }
3230
3231 bool Carry = (Value & 0x00FFFFFF);
3232 Value = ((Value & 0xFF000000) >> 24) + Carry;
3233
3234 if (Value & 0x0000100)
3235 Value = Value & 0x000001FC;
3236
3237 if (Shifted > 24)
3238 Value = Value >> (Shifted - 24);
3239 else
3240 Value = Value << (24 - Shifted);
3241
3242 return Value;
3243}
3244
3245// The stack limit in the TCB is set to this many bytes above the actual
3246// stack limit.
3247static const uint64_t kSplitStackAvailable = 256;
3248
3249// Adjust the function prologue to enable split stacks. This currently only
3250// supports android and linux.
3251//
3252// The ABI of the segmented stack prologue is a little arbitrarily chosen, but
3253// must be well defined in order to allow for consistent implementations of the
3254// __morestack helper function. The ABI is also not a normal ABI in that it
3255// doesn't follow the normal calling conventions because this allows the
3256// prologue of each function to be optimized further.
3257//
3258// Currently, the ABI looks like (when calling __morestack)
3259//
3260// * r4 holds the minimum stack size requested for this function call
3261// * r5 holds the stack size of the arguments to the function
3262// * the beginning of the function is 3 instructions after the call to
3263// __morestack
3264//
3265// Implementations of __morestack should use r4 to allocate a new stack, r5 to
3266// place the arguments on to the new stack, and the 3-instruction knowledge to
3267// jump directly to the body of the function when working on the new stack.
3268//
3269// An old (and possibly no longer compatible) implementation of __morestack for
3270// ARM can be found at [1].
3271//
3272// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
3273void ARMFrameLowering::adjustForSegmentedStacks(
3274 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3275 unsigned Opcode;
3276 const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
3277 bool Thumb = ST->isThumb();
3278 bool Thumb2 = ST->isThumb2();
3279
3280 // Sadly, this currently doesn't support varargs, platforms other than
3281 // android/linux. Note that thumb1/thumb2 are support for android/linux.
3282 if (MF.getFunction().isVarArg())
3283 report_fatal_error(reason: "Segmented stacks do not support vararg functions.");
3284 if (!ST->isTargetAndroid() && !ST->isTargetLinux())
3285 report_fatal_error(reason: "Segmented stacks not supported on this platform.");
3286
3287 MachineFrameInfo &MFI = MF.getFrameInfo();
3288 const ARMBaseInstrInfo &TII =
3289 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3290 ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
3291 DebugLoc DL;
3292
3293 if (!MFI.needsSplitStackProlog())
3294 return;
3295
3296 uint64_t StackSize = MFI.getStackSize();
3297
3298 // Use R4 and R5 as scratch registers.
3299 // We save R4 and R5 before use and restore them before leaving the function.
3300 unsigned ScratchReg0 = ARM::R4;
3301 unsigned ScratchReg1 = ARM::R5;
3302 unsigned MovOp = ST->useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
3303 uint64_t AlignedStackSize;
3304
3305 MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
3306 MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
3307 MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
3308 MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
3309 MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
3310
3311 // Grab everything that reaches PrologueMBB to update there liveness as well.
3312 SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
3313 SmallVector<MachineBasicBlock *, 2> WalkList;
3314 WalkList.push_back(Elt: &PrologueMBB);
3315
3316 do {
3317 MachineBasicBlock *CurMBB = WalkList.pop_back_val();
3318 for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
3319 if (BeforePrologueRegion.insert(Ptr: PredBB).second)
3320 WalkList.push_back(Elt: PredBB);
3321 }
3322 } while (!WalkList.empty());
3323
3324 // The order in that list is important.
3325 // The blocks will all be inserted before PrologueMBB using that order.
3326 // Therefore the block that should appear first in the CFG should appear
3327 // first in the list.
3328 MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
3329 PostStackMBB};
3330
3331 BeforePrologueRegion.insert_range(R&: AddedBlocks);
3332
3333 for (const auto &LI : PrologueMBB.liveins()) {
3334 for (MachineBasicBlock *PredBB : BeforePrologueRegion)
3335 PredBB->addLiveIn(RegMaskPair: LI);
3336 }
3337
3338 // Remove the newly added blocks from the list, since we know
3339 // we do not have to do the following updates for them.
3340 for (MachineBasicBlock *B : AddedBlocks) {
3341 BeforePrologueRegion.erase(Ptr: B);
3342 MF.insert(MBBI: PrologueMBB.getIterator(), MBB: B);
3343 }
3344
3345 for (MachineBasicBlock *MBB : BeforePrologueRegion) {
3346 // Make sure the LiveIns are still sorted and unique.
3347 MBB->sortUniqueLiveIns();
3348 // Replace the edges to PrologueMBB by edges to the sequences
3349 // we are about to add, but only update for immediate predecessors.
3350 if (MBB->isSuccessor(MBB: &PrologueMBB))
3351 MBB->ReplaceUsesOfBlockWith(Old: &PrologueMBB, New: AddedBlocks[0]);
3352 }
3353
3354 // The required stack size that is aligned to ARM constant criterion.
3355 AlignedStackSize = alignToARMConstant(Value: StackSize);
3356
3357 // When the frame size is less than 256 we just compare the stack
3358 // boundary directly to the value of the stack pointer, per gcc.
3359 bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
3360
3361 // We will use two of the callee save registers as scratch registers so we
3362 // need to save those registers onto the stack.
3363 // We will use SR0 to hold stack limit and SR1 to hold the stack size
3364 // requested and arguments for __morestack().
3365 // SR0: Scratch Register #0
3366 // SR1: Scratch Register #1
3367 // push {SR0, SR1}
3368 if (Thumb) {
3369 BuildMI(BB: PrevStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPUSH))
3370 .add(MOs: predOps(Pred: ARMCC::AL))
3371 .addReg(RegNo: ScratchReg0)
3372 .addReg(RegNo: ScratchReg1);
3373 } else {
3374 BuildMI(BB: PrevStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::STMDB_UPD))
3375 .addReg(RegNo: ARM::SP, flags: RegState::Define)
3376 .addReg(RegNo: ARM::SP)
3377 .add(MOs: predOps(Pred: ARMCC::AL))
3378 .addReg(RegNo: ScratchReg0)
3379 .addReg(RegNo: ScratchReg1);
3380 }
3381
3382 // Emit the relevant DWARF information about the change in stack pointer as
3383 // well as where to find both r4 and r5 (the callee-save registers)
3384 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3385 CFIInstBuilder CFIBuilder(PrevStackMBB, MachineInstr::NoFlags);
3386 CFIBuilder.buildDefCFAOffset(Offset: 8);
3387 CFIBuilder.buildOffset(Reg: ScratchReg1, Offset: -4);
3388 CFIBuilder.buildOffset(Reg: ScratchReg0, Offset: -8);
3389 }
3390
3391 // mov SR1, sp
3392 if (Thumb) {
3393 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ScratchReg1)
3394 .addReg(RegNo: ARM::SP)
3395 .add(MOs: predOps(Pred: ARMCC::AL));
3396 } else if (CompareStackPointer) {
3397 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVr), DestReg: ScratchReg1)
3398 .addReg(RegNo: ARM::SP)
3399 .add(MOs: predOps(Pred: ARMCC::AL))
3400 .add(MO: condCodeOp());
3401 }
3402
3403 // sub SR1, sp, #StackSize
3404 if (!CompareStackPointer && Thumb) {
3405 if (AlignedStackSize < 256) {
3406 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tSUBi8), DestReg: ScratchReg1)
3407 .add(MO: condCodeOp())
3408 .addReg(RegNo: ScratchReg1)
3409 .addImm(Val: AlignedStackSize)
3410 .add(MOs: predOps(Pred: ARMCC::AL));
3411 } else {
3412 if (Thumb2 || ST->genExecuteOnly()) {
3413 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg0)
3414 .addImm(Val: AlignedStackSize);
3415 } else {
3416 auto MBBI = McrMBB->end();
3417 auto RegInfo = STI.getRegisterInfo();
3418 RegInfo->emitLoadConstPool(MBB&: *McrMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: 0,
3419 Val: AlignedStackSize);
3420 }
3421 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tSUBrr), DestReg: ScratchReg1)
3422 .add(MO: condCodeOp())
3423 .addReg(RegNo: ScratchReg1)
3424 .addReg(RegNo: ScratchReg0)
3425 .add(MOs: predOps(Pred: ARMCC::AL));
3426 }
3427 } else if (!CompareStackPointer) {
3428 if (AlignedStackSize < 256) {
3429 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::SUBri), DestReg: ScratchReg1)
3430 .addReg(RegNo: ARM::SP)
3431 .addImm(Val: AlignedStackSize)
3432 .add(MOs: predOps(Pred: ARMCC::AL))
3433 .add(MO: condCodeOp());
3434 } else {
3435 auto MBBI = McrMBB->end();
3436 auto RegInfo = STI.getRegisterInfo();
3437 RegInfo->emitLoadConstPool(MBB&: *McrMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: 0,
3438 Val: AlignedStackSize);
3439 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::SUBrr), DestReg: ScratchReg1)
3440 .addReg(RegNo: ARM::SP)
3441 .addReg(RegNo: ScratchReg0)
3442 .add(MOs: predOps(Pred: ARMCC::AL))
3443 .add(MO: condCodeOp());
3444 }
3445 }
3446
3447 if (Thumb && ST->isThumb1Only()) {
3448 if (ST->genExecuteOnly()) {
3449 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg0)
3450 .addExternalSymbol(FnName: "__STACK_LIMIT");
3451 } else {
3452 unsigned PCLabelId = ARMFI->createPICLabelUId();
3453 ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(
3454 C&: MF.getFunction().getContext(), s: "__STACK_LIMIT", ID: PCLabelId, PCAdj: 0);
3455 MachineConstantPool *MCP = MF.getConstantPool();
3456 unsigned CPI = MCP->getConstantPoolIndex(V: NewCPV, Alignment: Align(4));
3457
3458 // ldr SR0, [pc, offset(STACK_LIMIT)]
3459 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tLDRpci), DestReg: ScratchReg0)
3460 .addConstantPoolIndex(Idx: CPI)
3461 .add(MOs: predOps(Pred: ARMCC::AL));
3462 }
3463
3464 // ldr SR0, [SR0]
3465 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tLDRi), DestReg: ScratchReg0)
3466 .addReg(RegNo: ScratchReg0)
3467 .addImm(Val: 0)
3468 .add(MOs: predOps(Pred: ARMCC::AL));
3469 } else {
3470 // Get TLS base address from the coprocessor
3471 // mrc p15, #0, SR0, c13, c0, #3
3472 BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: Thumb ? ARM::t2MRC : ARM::MRC),
3473 DestReg: ScratchReg0)
3474 .addImm(Val: 15)
3475 .addImm(Val: 0)
3476 .addImm(Val: 13)
3477 .addImm(Val: 0)
3478 .addImm(Val: 3)
3479 .add(MOs: predOps(Pred: ARMCC::AL));
3480
3481 // Use the last tls slot on android and a private field of the TCP on linux.
3482 assert(ST->isTargetAndroid() || ST->isTargetLinux());
3483 unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
3484
3485 // Get the stack limit from the right offset
3486 // ldr SR0, [sr0, #4 * TlsOffset]
3487 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
3488 DestReg: ScratchReg0)
3489 .addReg(RegNo: ScratchReg0)
3490 .addImm(Val: 4 * TlsOffset)
3491 .add(MOs: predOps(Pred: ARMCC::AL));
3492 }
3493
3494 // Compare stack limit with stack size requested.
3495 // cmp SR0, SR1
3496 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
3497 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode))
3498 .addReg(RegNo: ScratchReg0)
3499 .addReg(RegNo: ScratchReg1)
3500 .add(MOs: predOps(Pred: ARMCC::AL));
3501
3502 // This jump is taken if StackLimit <= SP - stack required.
3503 Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
3504 BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode))
3505 .addMBB(MBB: PostStackMBB)
3506 .addImm(Val: ARMCC::LS)
3507 .addReg(RegNo: ARM::CPSR);
3508
3509 // Calling __morestack(StackSize, Size of stack arguments).
3510 // __morestack knows that the stack size requested is in SR0(r4)
3511 // and amount size of stack arguments is in SR1(r5).
3512
3513 // Pass first argument for the __morestack by Scratch Register #0.
3514 // The amount size of stack required
3515 if (Thumb) {
3516 if (AlignedStackSize < 256) {
3517 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVi8), DestReg: ScratchReg0)
3518 .add(MO: condCodeOp())
3519 .addImm(Val: AlignedStackSize)
3520 .add(MOs: predOps(Pred: ARMCC::AL));
3521 } else {
3522 if (Thumb2 || ST->genExecuteOnly()) {
3523 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg0)
3524 .addImm(Val: AlignedStackSize);
3525 } else {
3526 auto MBBI = AllocMBB->end();
3527 auto RegInfo = STI.getRegisterInfo();
3528 RegInfo->emitLoadConstPool(MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: 0,
3529 Val: AlignedStackSize);
3530 }
3531 }
3532 } else {
3533 if (AlignedStackSize < 256) {
3534 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVi), DestReg: ScratchReg0)
3535 .addImm(Val: AlignedStackSize)
3536 .add(MOs: predOps(Pred: ARMCC::AL))
3537 .add(MO: condCodeOp());
3538 } else {
3539 auto MBBI = AllocMBB->end();
3540 auto RegInfo = STI.getRegisterInfo();
3541 RegInfo->emitLoadConstPool(MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: 0,
3542 Val: AlignedStackSize);
3543 }
3544 }
3545
3546 // Pass second argument for the __morestack by Scratch Register #1.
3547 // The amount size of stack consumed to save function arguments.
3548 if (Thumb) {
3549 if (ARMFI->getArgumentStackSize() < 256) {
3550 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVi8), DestReg: ScratchReg1)
3551 .add(MO: condCodeOp())
3552 .addImm(Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()))
3553 .add(MOs: predOps(Pred: ARMCC::AL));
3554 } else {
3555 if (Thumb2 || ST->genExecuteOnly()) {
3556 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg1)
3557 .addImm(Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3558 } else {
3559 auto MBBI = AllocMBB->end();
3560 auto RegInfo = STI.getRegisterInfo();
3561 RegInfo->emitLoadConstPool(
3562 MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg1, SubIdx: 0,
3563 Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3564 }
3565 }
3566 } else {
3567 if (alignToARMConstant(Value: ARMFI->getArgumentStackSize()) < 256) {
3568 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVi), DestReg: ScratchReg1)
3569 .addImm(Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()))
3570 .add(MOs: predOps(Pred: ARMCC::AL))
3571 .add(MO: condCodeOp());
3572 } else {
3573 auto MBBI = AllocMBB->end();
3574 auto RegInfo = STI.getRegisterInfo();
3575 RegInfo->emitLoadConstPool(
3576 MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg1, SubIdx: 0,
3577 Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3578 }
3579 }
3580
3581 // push {lr} - Save return address of this function.
3582 if (Thumb) {
3583 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPUSH))
3584 .add(MOs: predOps(Pred: ARMCC::AL))
3585 .addReg(RegNo: ARM::LR);
3586 } else {
3587 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::STMDB_UPD))
3588 .addReg(RegNo: ARM::SP, flags: RegState::Define)
3589 .addReg(RegNo: ARM::SP)
3590 .add(MOs: predOps(Pred: ARMCC::AL))
3591 .addReg(RegNo: ARM::LR);
3592 }
3593
3594 // Emit the DWARF info about the change in stack as well as where to find the
3595 // previous link register
3596 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3597 CFIInstBuilder CFIBuilder(AllocMBB, MachineInstr::NoFlags);
3598 CFIBuilder.buildDefCFAOffset(Offset: 12);
3599 CFIBuilder.buildOffset(Reg: ARM::LR, Offset: -12);
3600 }
3601
3602 // Call __morestack().
3603 if (Thumb) {
3604 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tBL))
3605 .add(MOs: predOps(Pred: ARMCC::AL))
3606 .addExternalSymbol(FnName: "__morestack");
3607 } else {
3608 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::BL))
3609 .addExternalSymbol(FnName: "__morestack");
3610 }
3611
3612 // pop {lr} - Restore return address of this original function.
3613 if (Thumb) {
3614 if (ST->isThumb1Only()) {
3615 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP))
3616 .add(MOs: predOps(Pred: ARMCC::AL))
3617 .addReg(RegNo: ScratchReg0);
3618 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::LR)
3619 .addReg(RegNo: ScratchReg0)
3620 .add(MOs: predOps(Pred: ARMCC::AL));
3621 } else {
3622 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::t2LDR_POST))
3623 .addReg(RegNo: ARM::LR, flags: RegState::Define)
3624 .addReg(RegNo: ARM::SP, flags: RegState::Define)
3625 .addReg(RegNo: ARM::SP)
3626 .addImm(Val: 4)
3627 .add(MOs: predOps(Pred: ARMCC::AL));
3628 }
3629 } else {
3630 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::LDMIA_UPD))
3631 .addReg(RegNo: ARM::SP, flags: RegState::Define)
3632 .addReg(RegNo: ARM::SP)
3633 .add(MOs: predOps(Pred: ARMCC::AL))
3634 .addReg(RegNo: ARM::LR);
3635 }
3636
3637 // Restore SR0 and SR1 in case of __morestack() was called.
3638 // __morestack() will skip PostStackMBB block so we need to restore
3639 // scratch registers from here.
3640 // pop {SR0, SR1}
3641 if (Thumb) {
3642 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP))
3643 .add(MOs: predOps(Pred: ARMCC::AL))
3644 .addReg(RegNo: ScratchReg0)
3645 .addReg(RegNo: ScratchReg1);
3646 } else {
3647 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::LDMIA_UPD))
3648 .addReg(RegNo: ARM::SP, flags: RegState::Define)
3649 .addReg(RegNo: ARM::SP)
3650 .add(MOs: predOps(Pred: ARMCC::AL))
3651 .addReg(RegNo: ScratchReg0)
3652 .addReg(RegNo: ScratchReg1);
3653 }
3654
3655 // Update the CFA offset now that we've popped
3656 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
3657 CFIInstBuilder(AllocMBB, MachineInstr::NoFlags).buildDefCFAOffset(Offset: 0);
3658
3659 // Return from this function.
3660 BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ST->getReturnOpcode())).add(MOs: predOps(Pred: ARMCC::AL));
3661
3662 // Restore SR0 and SR1 in case of __morestack() was not called.
3663 // pop {SR0, SR1}
3664 if (Thumb) {
3665 BuildMI(BB: PostStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP))
3666 .add(MOs: predOps(Pred: ARMCC::AL))
3667 .addReg(RegNo: ScratchReg0)
3668 .addReg(RegNo: ScratchReg1);
3669 } else {
3670 BuildMI(BB: PostStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::LDMIA_UPD))
3671 .addReg(RegNo: ARM::SP, flags: RegState::Define)
3672 .addReg(RegNo: ARM::SP)
3673 .add(MOs: predOps(Pred: ARMCC::AL))
3674 .addReg(RegNo: ScratchReg0)
3675 .addReg(RegNo: ScratchReg1);
3676 }
3677
3678 // Update the CFA offset now that we've popped
3679 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3680 CFIInstBuilder CFIBuilder(PostStackMBB, MachineInstr::NoFlags);
3681 CFIBuilder.buildDefCFAOffset(Offset: 0);
3682
3683 // Tell debuggers that r4 and r5 are now the same as they were in the
3684 // previous function, that they're the "Same Value".
3685 CFIBuilder.buildSameValue(Reg: ScratchReg0);
3686 CFIBuilder.buildSameValue(Reg: ScratchReg1);
3687 }
3688
3689 // Organizing MBB lists
3690 PostStackMBB->addSuccessor(Succ: &PrologueMBB);
3691
3692 AllocMBB->addSuccessor(Succ: PostStackMBB);
3693
3694 GetMBB->addSuccessor(Succ: PostStackMBB);
3695 GetMBB->addSuccessor(Succ: AllocMBB);
3696
3697 McrMBB->addSuccessor(Succ: GetMBB);
3698
3699 PrevStackMBB->addSuccessor(Succ: McrMBB);
3700
3701#ifdef EXPENSIVE_CHECKS
3702 MF.verify();
3703#endif
3704}
3705