ARMFrameLowering.cpp source code [llvm_projects/llvm/lib/Target/ARM/ARMFrameLowering.cpp]

1	//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains the ARM implementation of TargetFrameLowering class.
10	//
11	//===----------------------------------------------------------------------===//
12	//
13	// This file contains the ARM implementation of TargetFrameLowering class.
14	//
15	// On ARM, stack frames are structured as follows:
16	//
17	// The stack grows downward.
18	//
19	// All of the individual frame areas on the frame below are optional, i.e. it's
20	// possible to create a function so that the particular area isn't present
21	// in the frame.
22	//
23	// At function entry, the "frame" looks as follows:
24	//
25	// \| \| Higher address
26	// \|-----------------------------------\|
27	// \| \|
28	// \| arguments passed on the stack \|
29	// \| \|
30	// \|-----------------------------------\| <- sp
31	// \| \| Lower address
32	//
33	//
34	// After the prologue has run, the frame has the following general structure.
35	// Technically the last frame area (VLAs) doesn't get created until in the
36	// main function body, after the prologue is run. However, it's depicted here
37	// for completeness.
38	//
39	// \| \| Higher address
40	// \|-----------------------------------\|
41	// \| \|
42	// \| arguments passed on the stack \|
43	// \| \|
44	// \|-----------------------------------\| <- (sp at function entry)
45	// \| \|
46	// \| varargs from registers \|
47	// \| \|
48	// \|-----------------------------------\|
49	// \| \|
50	// \| prev_lr \|
51	// \| prev_fp \|
52	// \| (a.k.a. "frame record") \|
53	// \| \|
54	// \|- - - - - - - - - - - - - - - - - -\| <- fp (r7 or r11)
55	// \| \|
56	// \| callee-saved gpr registers \|
57	// \| \|
58	// \|-----------------------------------\|
59	// \| \|
60	// \| callee-saved fp/simd regs \|
61	// \| \|
62	// \|-----------------------------------\|
63	// \|.empty.space.to.make.part.below....\|
64	// \|.aligned.in.case.it.needs.more.than\| (size of this area is unknown at
65	// \|.the.standard.8-byte.alignment.....\| compile time; if present)
66	// \|-----------------------------------\|
67	// \| \|
68	// \| local variables of fixed size \|
69	// \| including spill slots \|
70	// \|-----------------------------------\| <- base pointer (not defined by ABI,
71	// \|.variable-sized.local.variables....\| LLVM chooses r6)
72	// \|.(VLAs)............................\| (size of this area is unknown at
73	// \|...................................\| compile time)
74	// \|-----------------------------------\| <- sp
75	// \| \| Lower address
76	//
77	//
78	// To access the data in a frame, at-compile time, a constant offset must be
79	// computable from one of the pointers (fp, bp, sp) to access it. The size
80	// of the areas with a dotted background cannot be computed at compile-time
81	// if they are present, making it required to have all three of fp, bp and
82	// sp to be set up to be able to access all contents in the frame areas,
83	// assuming all of the frame areas are non-empty.
84	//
85	// For most functions, some of the frame areas are empty. For those functions,
86	// it may not be necessary to set up fp or bp:
87	// A base pointer is definitely needed when there are both VLAs and local*
88	// variables with more-than-default alignment requirements.
89	// A frame pointer is definitely needed when there are local variables with*
90	// more-than-default alignment requirements.
91	//
92	// In some cases when a base pointer is not strictly needed, it is generated
93	// anyway when offsets from the frame pointer to access local variables become
94	// so large that the offset can't be encoded in the immediate fields of loads
95	// or stores.
96	//
97	// The frame pointer might be chosen to be r7 or r11, depending on the target
98	// architecture and operating system. See ARMSubtarget::getFramePointerReg for
99	// details.
100	//
101	// Outgoing function arguments must be at the bottom of the stack frame when
102	// calling another function. If we do not have variable-sized stack objects, we
103	// can allocate a "reserved call frame" area at the bottom of the local
104	// variable area, large enough for all outgoing calls. If we do have VLAs, then
105	// the stack pointer must be decremented and incremented around each call to
106	// make space for the arguments below the VLAs.
107	//
108	//===----------------------------------------------------------------------===//
109
110	#include "ARMFrameLowering.h"
111	#include "ARMBaseInstrInfo.h"
112	#include "ARMBaseRegisterInfo.h"
113	#include "ARMConstantPoolValue.h"
114	#include "ARMMachineFunctionInfo.h"
115	#include "ARMSubtarget.h"
116	#include "MCTargetDesc/ARMAddressingModes.h"
117	#include "MCTargetDesc/ARMBaseInfo.h"
118	#include "Utils/ARMBaseInfo.h"
119	#include "llvm/ADT/BitVector.h"
120	#include "llvm/ADT/STLExtras.h"
121	#include "llvm/ADT/SmallPtrSet.h"
122	#include "llvm/ADT/SmallVector.h"
123	#include "llvm/CodeGen/MachineBasicBlock.h"
124	#include "llvm/CodeGen/MachineConstantPool.h"
125	#include "llvm/CodeGen/MachineFrameInfo.h"
126	#include "llvm/CodeGen/MachineFunction.h"
127	#include "llvm/CodeGen/MachineInstr.h"
128	#include "llvm/CodeGen/MachineInstrBuilder.h"
129	#include "llvm/CodeGen/MachineJumpTableInfo.h"
130	#include "llvm/CodeGen/MachineModuleInfo.h"
131	#include "llvm/CodeGen/MachineOperand.h"
132	#include "llvm/CodeGen/MachineRegisterInfo.h"
133	#include "llvm/CodeGen/RegisterScavenging.h"
134	#include "llvm/CodeGen/TargetInstrInfo.h"
135	#include "llvm/CodeGen/TargetOpcodes.h"
136	#include "llvm/CodeGen/TargetRegisterInfo.h"
137	#include "llvm/CodeGen/TargetSubtargetInfo.h"
138	#include "llvm/IR/Attributes.h"
139	#include "llvm/IR/CallingConv.h"
140	#include "llvm/IR/DebugLoc.h"
141	#include "llvm/IR/Function.h"
142	#include "llvm/MC/MCAsmInfo.h"
143	#include "llvm/MC/MCContext.h"
144	#include "llvm/MC/MCDwarf.h"
145	#include "llvm/MC/MCInstrDesc.h"
146	#include "llvm/MC/MCRegisterInfo.h"
147	#include "llvm/Support/CodeGen.h"
148	#include "llvm/Support/CommandLine.h"
149	#include "llvm/Support/Compiler.h"
150	#include "llvm/Support/Debug.h"
151	#include "llvm/Support/ErrorHandling.h"
152	#include "llvm/Support/MathExtras.h"
153	#include "llvm/Support/raw_ostream.h"
154	#include "llvm/Target/TargetMachine.h"
155	#include "llvm/Target/TargetOptions.h"
156	#include <algorithm>
157	#include <cassert>
158	#include <cstddef>
159	#include <cstdint>
160	#include <iterator>
161	#include <utility>
162	#include <vector>
163
164	#define DEBUG_TYPE "arm-frame-lowering"
165
166	using namespace llvm;
167
168	static cl::opt<bool>
169	SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(Val: true),
170	cl::desc ("Align ARM NEON spills in prolog and epilog"));
171
172	static MachineBasicBlock::iterator
173	skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
174	unsigned NumAlignedDPRCS2Regs);
175
176	ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
177	: TargetFrameLowering (StackGrowsDown, sti.getStackAlignment(), `0`, Align (`4`)),
178	STI(sti) {}
179
180	bool ARMFrameLowering::keepFramePointer(const MachineFunction &MF) const {
181	// iOS always has a FP for backtracking, force other targets to keep their FP
182	// when doing FastISel. The emitted code is currently superior, and in cases
183	// like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
184	return MF.getSubtarget<ARMSubtarget>().useFastISel();
185	}
186
187	/// Returns true if the target can safely skip saving callee-saved registers
188	/// for noreturn nounwind functions.
189	bool ARMFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const {
190	assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
191	MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
192	!MF.getFunction().hasFnAttribute(Attribute::UWTable));
193
194	// Frame pointer and link register are not treated as normal CSR, thus we
195	// can always skip CSR saves for nonreturning functions.
196	return true;
197	}
198
199	/// hasFP - Return true if the specified function should have a dedicated frame
200	/// pointer register. This is true if the function has variable sized allocas
201	/// or if frame pointer elimination is disabled.
202	bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
203	const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
204	const MachineFrameInfo &MFI = MF.getFrameInfo();
205
206	// ABI-required frame pointer.
207	if (MF.getTarget().Options.DisableFramePointerElim(MF))
208	return true;
209
210	// Frame pointer required for use within this function.
211	return (RegInfo->hasStackRealignment(MF) \|\| MFI.hasVarSizedObjects() \|\|
212	MFI.isFrameAddressTaken());
213	}
214
215	/// isFPReserved - Return true if the frame pointer register should be
216	/// considered a reserved register on the scope of the specified function.
217	bool ARMFrameLowering::isFPReserved(const MachineFunction &MF) const {
218	return hasFP(MF) \|\| MF.getTarget().Options.FramePointerIsReserved(MF);
219	}
220
221	/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
222	/// not required, we reserve argument space for call sites in the function
223	/// immediately on entry to the current function. This eliminates the need for
224	/// add/sub sp brackets around call sites. Returns true if the call frame is
225	/// included as part of the stack frame.
226	bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
227	const MachineFrameInfo &MFI = MF.getFrameInfo();
228	unsigned CFSize = MFI.getMaxCallFrameSize();
229	// It's not always a good idea to include the call frame as part of the
230	// stack frame. ARM (especially Thumb) has small immediate offset to
231	// address the stack frame. So a large call frame can cause poor codegen
232	// and may even makes it impossible to scavenge a register.
233	if (CFSize >= ((`1` << `12`) - `1`) / `2`) // Half of imm12
234	return false;
235
236	return !MFI.hasVarSizedObjects();
237	}
238
239	/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
240	/// call frame pseudos can be simplified. Unlike most targets, having a FP
241	/// is not sufficient here since we still may reference some objects via SP
242	/// even when FP is available in Thumb2 mode.
243	bool
244	ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
245	return hasReservedCallFrame(MF) \|\| MF.getFrameInfo().hasVarSizedObjects();
246	}
247
248	// Returns how much of the incoming argument stack area we should clean up in an
249	// epilogue. For the C calling convention this will be 0, for guaranteed tail
250	// call conventions it can be positive (a normal return or a tail call to a
251	// function that uses less stack space for arguments) or negative (for a tail
252	// call to a function that needs more stack space than us for arguments).
253	static int getArgumentStackToRestore(MachineFunction &MF,
254	MachineBasicBlock &MBB) {
255	MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
256	bool IsTailCallReturn = false;
257	if (MBB.end() != MBBI) {
258	unsigned RetOpcode = MBBI ->getOpcode();
259	IsTailCallReturn = RetOpcode == ARM::TCRETURNdi \|\|
260	RetOpcode == ARM::TCRETURNri \|\|
261	RetOpcode == ARM::TCRETURNrinotr12;
262	}
263	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
264
265	int ArgumentPopSize = `0`;
266	if (IsTailCallReturn) {
267	MachineOperand &StackAdjust = MBBI ->getOperand(i: `1`);
268
269	// For a tail-call in a callee-pops-arguments environment, some or all of
270	// the stack may actually be in use for the call's arguments, this is
271	// calculated during LowerCall and consumed here...
272	ArgumentPopSize = StackAdjust.getImm();
273	} else {
274	// ... otherwise the amount to pop is all* of the argument space,*
275	// conveniently stored in the MachineFunctionInfo by
276	// LowerFormalArguments. This will, of course, be zero for the C calling
277	// convention.
278	ArgumentPopSize = AFI->getArgumentStackToRestore();
279	}
280
281	return ArgumentPopSize;
282	}
283
284	static bool needsWinCFI(const MachineFunction &MF) {
285	const Function &F = MF.getFunction();
286	return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
287	F.needsUnwindTableEntry();
288	}
289
290	// Given a load or a store instruction, generate an appropriate unwinding SEH
291	// code on Windows.
292	static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
293	const TargetInstrInfo &TII,
294	unsigned Flags) {
295	unsigned Opc = MBBI ->getOpcode();
296	MachineBasicBlock *MBB = MBBI ->getParent();
297	MachineFunction &MF = *MBB->getParent();
298	DebugLoc DL = MBBI ->getDebugLoc();
299	MachineInstrBuilder MIB;
300	const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
301	const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
302
303	Flags \|= MachineInstr::NoMerge;
304
305	switch (Opc) {
306	default:
307	report_fatal_error(reason: "No SEH Opcode for instruction " + TII.getName(Opcode: Opc));
308	break;
309	case ARM::t2ADDri: // add.w r11, sp, #xx
310	case ARM::t2ADDri12: // add.w r11, sp, #xx
311	case ARM::t2MOVTi16: // movt r4, #xx
312	case ARM::tBL: // bl __chkstk
313	// These are harmless if used for just setting up a frame pointer,
314	// but that frame pointer can't be relied upon for unwinding, unless
315	// set up with SEH_SaveSP.
316	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
317	.addImm(/Wide=/Val: `1`)
318	.setMIFlags(Flags);
319	break;
320
321	case ARM::t2MOVi16: { // mov(w) r4, #xx
322	bool Wide = MBBI ->getOperand(i: `1`).getImm() >= `256`;
323	if (!Wide) {
324	MachineInstrBuilder NewInstr =
325	BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVi8)).setMIFlags(MBBI ->getFlags());
326	NewInstr.add(MO: MBBI ->getOperand(i: `0`));
327	NewInstr.add(MO: t1CondCodeOp(/isDead=/true));
328	for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MBBI ->operands()))
329	NewInstr.add(MO);
330	MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(I: MBBI, MI: NewInstr);
331	MBB->erase(I: MBBI);
332	MBBI = NewMBBI;
333	}
334	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop)).addImm(Val: Wide).setMIFlags(Flags);
335	break;
336	}
337
338	case ARM::tBLXr: // blx r12 (__chkstk)
339	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
340	.addImm(/Wide=/Val: `0`)
341	.setMIFlags(Flags);
342	break;
343
344	case ARM::t2MOVi32imm: // movw+movt
345	// This pseudo instruction expands into two mov instructions. If the
346	// second operand is a symbol reference, this will stay as two wide
347	// instructions, movw+movt. If they're immediates, the first one can
348	// end up as a narrow mov though.
349	// As two SEH instructions are appended here, they won't get interleaved
350	// between the two final movw/movt instructions, but it doesn't make any
351	// practical difference.
352	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
353	.addImm(/Wide=/Val: `1`)
354	.setMIFlags(Flags);
355	MBB->insertAfter(I: MBBI, MI: MIB);
356	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
357	.addImm(/Wide=/Val: `1`)
358	.setMIFlags(Flags);
359	break;
360
361	case ARM::t2STR_PRE:
362	if (MBBI ->getOperand(i: `0`).getReg() == ARM::SP &&
363	MBBI ->getOperand(i: `2`).getReg() == ARM::SP &&
364	MBBI ->getOperand(i: `3`).getImm() == -`4`) {
365	unsigned Reg = RegInfo->getSEHRegNum(i: MBBI ->getOperand(i: `1`).getReg());
366	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveRegs))
367	.addImm(Val: `1ULL` << Reg)
368	.addImm(/Wide=/Val: `1`)
369	.setMIFlags(Flags);
370	} else {
371	report_fatal_error(reason: "No matching SEH Opcode for t2STR_PRE");
372	}
373	break;
374
375	case ARM::t2LDR_POST:
376	if (MBBI ->getOperand(i: `1`).getReg() == ARM::SP &&
377	MBBI ->getOperand(i: `2`).getReg() == ARM::SP &&
378	MBBI ->getOperand(i: `3`).getImm() == `4`) {
379	unsigned Reg = RegInfo->getSEHRegNum(i: MBBI ->getOperand(i: `0`).getReg());
380	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveRegs))
381	.addImm(Val: `1ULL` << Reg)
382	.addImm(/Wide=/Val: `1`)
383	.setMIFlags(Flags);
384	} else {
385	report_fatal_error(reason: "No matching SEH Opcode for t2LDR_POST");
386	}
387	break;
388
389	case ARM::t2LDMIA_RET:
390	case ARM::t2LDMIA_UPD:
391	case ARM::t2STMDB_UPD: {
392	unsigned Mask = `0`;
393	bool Wide = false;
394	for (unsigned i = `4`, NumOps = MBBI ->getNumOperands(); i != NumOps; ++i) {
395	const MachineOperand &MO = MBBI ->getOperand(i);
396	if (!MO.isReg() \|\| MO.isImplicit())
397	continue;
398	unsigned Reg = RegInfo->getSEHRegNum(i: MO.getReg());
399	if (Reg == `15`)
400	Reg = `14`;
401	if (Reg >= `8` && Reg <= `13`)
402	Wide = true;
403	else if (Opc == ARM::t2LDMIA_UPD && Reg == `14`)
404	Wide = true;
405	Mask \|= `1` << Reg;
406	}
407	if (!Wide) {
408	unsigned NewOpc;
409	switch (Opc) {
410	case ARM::t2LDMIA_RET:
411	NewOpc = ARM::tPOP_RET;
412	break;
413	case ARM::t2LDMIA_UPD:
414	NewOpc = ARM::tPOP;
415	break;
416	case ARM::t2STMDB_UPD:
417	NewOpc = ARM::tPUSH;
418	break;
419	default:
420	llvm_unreachable("");
421	}
422	MachineInstrBuilder NewInstr =
423	BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: NewOpc)).setMIFlags(MBBI ->getFlags());
424	for (unsigned i = `2`, NumOps = MBBI ->getNumOperands(); i != NumOps; ++i)
425	NewInstr.add(MO: MBBI ->getOperand(i));
426	MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(I: MBBI, MI: NewInstr);
427	MBB->erase(I: MBBI);
428	MBBI = NewMBBI;
429	}
430	unsigned SEHOpc =
431	(Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
432	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: SEHOpc))
433	.addImm(Val: Mask)
434	.addImm(Val: Wide ? `1` : `0`)
435	.setMIFlags(Flags);
436	break;
437	}
438	case ARM::VSTMDDB_UPD:
439	case ARM::VLDMDIA_UPD: {
440	int First = -`1`, Last = `0`;
441	for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MBBI ->operands(), N: `4`)) {
442	unsigned Reg = RegInfo->getSEHRegNum(i: MO.getReg());
443	if (First == -`1`)
444	First = Reg;
445	Last = Reg;
446	}
447	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveFRegs))
448	.addImm(Val: First)
449	.addImm(Val: Last)
450	.setMIFlags(Flags);
451	break;
452	}
453	case ARM::tSUBspi:
454	case ARM::tADDspi:
455	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_StackAlloc))
456	.addImm(Val: MBBI ->getOperand(i: `2`).getImm() * `4`)
457	.addImm(/Wide=/Val: `0`)
458	.setMIFlags(Flags);
459	break;
460	case ARM::t2SUBspImm:
461	case ARM::t2SUBspImm12:
462	case ARM::t2ADDspImm:
463	case ARM::t2ADDspImm12:
464	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_StackAlloc))
465	.addImm(Val: MBBI ->getOperand(i: `2`).getImm())
466	.addImm(/Wide=/Val: `1`)
467	.setMIFlags(Flags);
468	break;
469
470	case ARM::tMOVr:
471	if (MBBI ->getOperand(i: `1`).getReg() == ARM::SP &&
472	(Flags & MachineInstr::FrameSetup)) {
473	unsigned Reg = RegInfo->getSEHRegNum(i: MBBI ->getOperand(i: `0`).getReg());
474	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveSP))
475	.addImm(Val: Reg)
476	.setMIFlags(Flags);
477	} else if (MBBI ->getOperand(i: `0`).getReg() == ARM::SP &&
478	(Flags & MachineInstr::FrameDestroy)) {
479	unsigned Reg = RegInfo->getSEHRegNum(i: MBBI ->getOperand(i: `1`).getReg());
480	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveSP))
481	.addImm(Val: Reg)
482	.setMIFlags(Flags);
483	} else {
484	report_fatal_error(reason: "No SEH Opcode for MOV");
485	}
486	break;
487
488	case ARM::tBX_RET:
489	case ARM::TCRETURNri:
490	case ARM::TCRETURNrinotr12:
491	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop_Ret))
492	.addImm(/Wide=/Val: `0`)
493	.setMIFlags(Flags);
494	break;
495
496	case ARM::TCRETURNdi:
497	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop_Ret))
498	.addImm(/Wide=/Val: `1`)
499	.setMIFlags(Flags);
500	break;
501	}
502	return MBB->insertAfter(I: MBBI, MI: MIB);
503	}
504
505	static MachineBasicBlock::iterator
506	initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI) {
507	if (MBBI == MBB.begin())
508	return MachineBasicBlock::iterator ();
509	return std::prev(x: MBBI);
510	}
511
512	static void insertSEHRange(MachineBasicBlock &MBB,
513	MachineBasicBlock::iterator Start,
514	const MachineBasicBlock::iterator &End,
515	const ARMBaseInstrInfo &TII, unsigned MIFlags) {
516	if (Start.isValid())
517	Start = std::next(x: Start);
518	else
519	Start = MBB.begin();
520
521	for (auto MI = Start; MI != End;) {
522	auto Next = std::next(x: MI);
523	// Check if this instruction already has got a SEH opcode added. In that
524	// case, don't do this generic mapping.
525	if (Next != End && isSEHInstruction(MI: *Next)) {
526	MI = std::next(x: Next);
527	while (MI != End && isSEHInstruction(MI: *MI))
528	++MI;
529	continue;
530	}
531	insertSEH(MBBI: MI, TII, Flags: MIFlags);
532	MI = Next;
533	}
534	}
535
536	static void emitRegPlusImmediate(
537	bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
538	const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
539	unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
540	ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = `0`) {
541	if (isARM)
542	emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, BaseReg: SrcReg, NumBytes,
543	Pred, PredReg, TII, MIFlags);
544	else
545	emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, BaseReg: SrcReg, NumBytes,
546	Pred, PredReg, TII, MIFlags);
547	}
548
549	static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
550	MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
551	const ARMBaseInstrInfo &TII, int NumBytes,
552	unsigned MIFlags = MachineInstr::NoFlags,
553	ARMCC::CondCodes Pred = ARMCC::AL,
554	unsigned PredReg = `0`) {
555	emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, DestReg: ARM::SP, SrcReg: ARM::SP, NumBytes,
556	MIFlags, Pred, PredReg);
557	}
558
559	static int sizeOfSPAdjustment(const MachineInstr &MI) {
560	int RegSize;
561	switch (MI.getOpcode()) {
562	case ARM::VSTMDDB_UPD:
563	RegSize = `8`;
564	break;
565	case ARM::STMDB_UPD:
566	case ARM::t2STMDB_UPD:
567	RegSize = `4`;
568	break;
569	case ARM::t2STR_PRE:
570	case ARM::STR_PRE_IMM:
571	return `4`;
572	default:
573	llvm_unreachable("Unknown push or pop like instruction");
574	}
575
576	int count = `0`;
577	// ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
578	// pred) so the list starts at 4.
579	for (int i = MI.getNumOperands() - `1`; i >= `4`; --i)
580	count += RegSize;
581	return count;
582	}
583
584	static bool WindowsRequiresStackProbe(const MachineFunction &MF,
585	size_t StackSizeInBytes) {
586	const MachineFrameInfo &MFI = MF.getFrameInfo();
587	const Function &F = MF.getFunction();
588	unsigned StackProbeSize = (MFI.getStackProtectorIndex() > `0`) ? `4080` : `4096`;
589
590	StackProbeSize =
591	F.getFnAttributeAsParsedInteger(Kind: "stack-probe-size", Default: StackProbeSize);
592	return (StackSizeInBytes >= StackProbeSize) &&
593	!F.hasFnAttribute(Kind: "no-stack-arg-probe");
594	}
595
596	namespace {
597
598	struct StackAdjustingInsts {
599	struct InstInfo {
600	MachineBasicBlock::iterator I;
601	unsigned SPAdjust;
602	bool BeforeFPSet;
603	};
604
605	SmallVector<InstInfo, `4`> Insts;
606
607	void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
608	bool BeforeFPSet = false) {
609	InstInfo Info = {.I: I, .SPAdjust: SPAdjust, .BeforeFPSet: BeforeFPSet};
610	Insts.push_back(Elt: Info);
611	}
612
613	void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
614	auto Info =
615	llvm::find_if(Range&: Insts, P: [&](InstInfo &Info) { return Info.I == I; });
616	assert(Info != Insts.end() && "invalid sp adjusting instruction");
617	Info->SPAdjust += ExtraBytes;
618	}
619
620	void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
621	const ARMBaseInstrInfo &TII, bool HasFP) {
622	MachineFunction &MF = *MBB.getParent();
623	unsigned CFAOffset = `0`;
624	for (auto &Info : Insts) {
625	if (HasFP && !Info.BeforeFPSet)
626	return;
627
628	CFAOffset += Info.SPAdjust;
629	unsigned CFIIndex = MF.addFrameInst(
630	Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: CFAOffset));
631	BuildMI(BB&: MBB, I: std::next(x: Info.I), MIMD: dl,
632	MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
633	.addCFIIndex(CFIIndex)
634	.setMIFlags(MachineInstr::FrameSetup);
635	}
636	}
637	};
638
639	} // end anonymous namespace
640
641	/// Emit an instruction sequence that will align the address in
642	/// register Reg by zero-ing out the lower bits. For versions of the
643	/// architecture that support Neon, this must be done in a single
644	/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
645	/// single instruction. That function only gets called when optimizing
646	/// spilling of D registers on a core with the Neon instruction set
647	/// present.
648	static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
649	const TargetInstrInfo &TII,
650	MachineBasicBlock &MBB,
651	MachineBasicBlock::iterator MBBI,
652	const DebugLoc &DL, const unsigned Reg,
653	const Align Alignment,
654	const bool MustBeSingleInstruction) {
655	const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
656	const bool CanUseBFC = AST.hasV6T2Ops() \|\| AST.hasV7Ops();
657	const unsigned AlignMask = Alignment.value() - `1U`;
658	const unsigned NrBitsToZero = Log2(A: Alignment);
659	assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
660	if (!AFI->isThumbFunction()) {
661	// if the BFC instruction is available, use that to zero the lower
662	// bits:
663	// bfc Reg, #0, log2(Alignment)
664	// otherwise use BIC, if the mask to zero the required number of bits
665	// can be encoded in the bic immediate field
666	// bic Reg, Reg, Alignment-1
667	// otherwise, emit
668	// lsr Reg, Reg, log2(Alignment)
669	// lsl Reg, Reg, log2(Alignment)
670	if (CanUseBFC) {
671	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::BFC), DestReg: Reg)
672	.addReg(RegNo: Reg, flags: RegState::Kill)
673	.addImm(Val: ~AlignMask)
674	.add(MOs: predOps(Pred: ARMCC::AL));
675	} else if (AlignMask <= `255`) {
676	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::BICri), DestReg: Reg)
677	.addReg(RegNo: Reg, flags: RegState::Kill)
678	.addImm(Val: AlignMask)
679	.add(MOs: predOps(Pred: ARMCC::AL))
680	.add(MO: condCodeOp());
681	} else {
682	assert(!MustBeSingleInstruction &&
683	"Shouldn't call emitAligningInstructions demanding a single "
684	"instruction to be emitted for large stack alignment for a target "
685	"without BFC.");
686	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVsi), DestReg: Reg)
687	.addReg(RegNo: Reg, flags: RegState::Kill)
688	.addImm(Val: ARM_AM::getSORegOpc(ShOp: ARM_AM::lsr, Imm: NrBitsToZero))
689	.add(MOs: predOps(Pred: ARMCC::AL))
690	.add(MO: condCodeOp());
691	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVsi), DestReg: Reg)
692	.addReg(RegNo: Reg, flags: RegState::Kill)
693	.addImm(Val: ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: NrBitsToZero))
694	.add(MOs: predOps(Pred: ARMCC::AL))
695	.add(MO: condCodeOp());
696	}
697	} else {
698	// Since this is only reached for Thumb-2 targets, the BFC instruction
699	// should always be available.
700	assert(CanUseBFC);
701	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::t2BFC), DestReg: Reg)
702	.addReg(RegNo: Reg, flags: RegState::Kill)
703	.addImm(Val: ~AlignMask)
704	.add(MOs: predOps(Pred: ARMCC::AL));
705	}
706	}
707
708	/// We need the offset of the frame pointer relative to other MachineFrameInfo
709	/// offsets which are encoded relative to SP at function begin.
710	/// See also emitPrologue() for how the FP is set up.
711	/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
712	/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
713	/// this to produce a conservative estimate that we check in an assert() later.
714	static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
715	const MachineFunction &MF) {
716	// For Thumb1, push.w isn't available, so the first push will always push
717	// r7 and lr onto the stack first.
718	if (AFI.isThumb1OnlyFunction())
719	return -AFI.getArgRegsSaveSize() - (`2` * `4`);
720	// This is a conservative estimation: Assume the frame pointer being r7 and
721	// pc("r15") up to r8 getting spilled before (= 8 registers).
722	int MaxRegBytes = `8` * `4`;
723	if (STI.splitFramePointerPush(MF)) {
724	// Here, r11 can be stored below all of r4-r15 (3 registers more than
725	// above), plus d8-d15.
726	MaxRegBytes = `11` * `4` + `8` * `8`;
727	}
728	int FPCXTSaveSize =
729	(STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? `4` : `0`;
730	return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
731	}
732
733	void ARMFrameLowering::emitPrologue(MachineFunction &MF,
734	MachineBasicBlock &MBB) const {
735	MachineBasicBlock::iterator MBBI = MBB.begin();
736	MachineFrameInfo &MFI = MF.getFrameInfo();
737	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
738	MCContext &Context = MF.getContext();
739	const TargetMachine &TM = MF.getTarget();
740	const MCRegisterInfo *MRI = Context.getRegisterInfo();
741	const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
742	const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
743	assert(!AFI->isThumb1OnlyFunction() &&
744	"This emitPrologue does not support Thumb1!");
745	bool isARM = !AFI->isThumbFunction();
746	Align Alignment = STI.getFrameLowering()->getStackAlign();
747	unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
748	unsigned NumBytes = MFI.getStackSize();
749	const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
750	int FPCXTSaveSize = `0`;
751	bool NeedsWinCFI = needsWinCFI(MF);
752
753	// Debug location must be unknown since the first debug location is used
754	// to determine the end of the prologue.
755	DebugLoc dl;
756
757	Register FramePtr = RegInfo->getFrameRegister(MF);
758
759	// Determine the sizes of each callee-save spill areas and record which frame
760	// belongs to which callee-save spill areas.
761	unsigned GPRCS1Size = `0`, GPRCS2Size = `0`, DPRCSSize = `0`;
762	int FramePtrSpillFI = `0`;
763	int D8SpillFI = `0`;
764
765	// All calls are tail calls in GHC calling conv, and functions have no
766	// prologue/epilogue.
767	if (MF.getFunction().getCallingConv() == CallingConv::GHC)
768	return;
769
770	StackAdjustingInsts DefCFAOffsetCandidates;
771	bool HasFP = hasFP(MF);
772
773	if (!AFI->hasStackFrame() &&
774	(!STI.isTargetWindows() \|\| !WindowsRequiresStackProbe(MF, StackSizeInBytes: NumBytes))) {
775	if (NumBytes != `0`) {
776	emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -NumBytes,
777	MIFlags: MachineInstr::FrameSetup);
778	DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: NumBytes, BeforeFPSet: true);
779	}
780	if (!NeedsWinCFI)
781	DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
782	if (NeedsWinCFI && MBBI != MBB.begin()) {
783	insertSEHRange(MBB, Start: {}, End: MBBI, TII, MIFlags: MachineInstr::FrameSetup);
784	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_PrologEnd))
785	.setMIFlag(MachineInstr::FrameSetup);
786	MF.setHasWinCFI(true);
787	}
788	return;
789	}
790
791	// Determine spill area sizes.
792	if (STI.splitFramePointerPush(MF)) {
793	for (const CalleeSavedInfo &I : CSI) {
794	Register Reg = I.getReg();
795	int FI = I.getFrameIdx();
796	switch (Reg) {
797	case ARM::R11:
798	case ARM::LR:
799	if (Reg == FramePtr)
800	FramePtrSpillFI = FI;
801	GPRCS2Size += `4`;
802	break;
803	case ARM::R0:
804	case ARM::R1:
805	case ARM::R2:
806	case ARM::R3:
807	case ARM::R4:
808	case ARM::R5:
809	case ARM::R6:
810	case ARM::R7:
811	case ARM::R8:
812	case ARM::R9:
813	case ARM::R10:
814	case ARM::R12:
815	GPRCS1Size += `4`;
816	break;
817	case ARM::FPCXTNS:
818	FPCXTSaveSize = `4`;
819	break;
820	default:
821	// This is a DPR. Exclude the aligned DPRCS2 spills.
822	if (Reg == ARM::D8)
823	D8SpillFI = FI;
824	if (Reg < ARM::D8 \|\| Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
825	DPRCSSize += `8`;
826	}
827	}
828	} else {
829	for (const CalleeSavedInfo &I : CSI) {
830	Register Reg = I.getReg();
831	int FI = I.getFrameIdx();
832	switch (Reg) {
833	case ARM::R8:
834	case ARM::R9:
835	case ARM::R10:
836	case ARM::R11:
837	case ARM::R12:
838	if (STI.splitFramePushPop(MF)) {
839	GPRCS2Size += `4`;
840	break;
841	}
842	[[fallthrough]];
843	case ARM::R0:
844	case ARM::R1:
845	case ARM::R2:
846	case ARM::R3:
847	case ARM::R4:
848	case ARM::R5:
849	case ARM::R6:
850	case ARM::R7:
851	case ARM::LR:
852	if (Reg == FramePtr)
853	FramePtrSpillFI = FI;
854	GPRCS1Size += `4`;
855	break;
856	case ARM::FPCXTNS:
857	FPCXTSaveSize = `4`;
858	break;
859	default:
860	// This is a DPR. Exclude the aligned DPRCS2 spills.
861	if (Reg == ARM::D8)
862	D8SpillFI = FI;
863	if (Reg < ARM::D8 \|\| Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
864	DPRCSSize += `8`;
865	}
866	}
867	}
868
869	MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
870
871	// Move past the PAC computation.
872	if (AFI->shouldSignReturnAddress())
873	LastPush = MBBI ++;
874
875	// Move past FPCXT area.
876	if (FPCXTSaveSize > `0`) {
877	LastPush = MBBI ++;
878	DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: FPCXTSaveSize, BeforeFPSet: true);
879	}
880
881	// Allocate the vararg register save area.
882	if (ArgRegsSaveSize) {
883	emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -ArgRegsSaveSize,
884	MIFlags: MachineInstr::FrameSetup);
885	LastPush = std::prev(x: MBBI);
886	DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: ArgRegsSaveSize, BeforeFPSet: true);
887	}
888
889	// Move past area 1.
890	if (GPRCS1Size > `0`) {
891	GPRCS1Push = LastPush = MBBI ++;
892	DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS1Size, BeforeFPSet: true);
893	}
894
895	// Determine starting offsets of spill areas.
896	unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
897	unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
898	unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
899	Align DPRAlign = DPRCSSize ? std::min(a: Align (`8`), b: Alignment) : Align (`4`);
900	unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
901	if (!STI.splitFramePointerPush(MF)) {
902	DPRGapSize += GPRCS2Size;
903	}
904	DPRGapSize %= DPRAlign.value();
905
906	unsigned DPRCSOffset;
907	if (STI.splitFramePointerPush(MF)) {
908	DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
909	GPRCS2Offset = DPRCSOffset - GPRCS2Size;
910	} else {
911	DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
912	}
913	int FramePtrOffsetInPush = `0`;
914	if (HasFP) {
915	int FPOffset = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI);
916	assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
917	"Max FP estimation is wrong");
918	FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
919	AFI->setFramePtrSpillOffset(MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
920	NumBytes);
921	}
922	AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
923	AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
924	AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
925
926	// Move past area 2.
927	if (GPRCS2Size > `0` && !STI.splitFramePointerPush(MF)) {
928	GPRCS2Push = LastPush = MBBI ++;
929	DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS2Size);
930	}
931
932	// Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
933	// .cfi_offset operations will reflect that.
934	if (DPRGapSize) {
935	assert(DPRGapSize == `4` && "unexpected alignment requirements for DPRs");
936	if (LastPush != MBB.end() &&
937	tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*LastPush, NumBytes: DPRGapSize))
938	DefCFAOffsetCandidates.addExtraBytes(I: LastPush, ExtraBytes: DPRGapSize);
939	else {
940	emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -DPRGapSize,
941	MIFlags: MachineInstr::FrameSetup);
942	DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: DPRGapSize);
943	}
944	}
945
946	// Move past area 3.
947	if (DPRCSSize > `0`) {
948	// Since vpush register list cannot have gaps, there may be multiple vpush
949	// instructions in the prologue.
950	while (MBBI != MBB.end() && MBBI ->getOpcode() == ARM::VSTMDDB_UPD) {
951	DefCFAOffsetCandidates.addInst(I: MBBI, SPAdjust: sizeOfSPAdjustment(MI: *MBBI));
952	LastPush = MBBI ++;
953	}
954	}
955
956	// Move past the aligned DPRCS2 area.
957	if (AFI->getNumAlignedDPRCS2Regs() > `0`) {
958	MBBI = skipAlignedDPRCS2Spills(MI: MBBI, NumAlignedDPRCS2Regs: AFI->getNumAlignedDPRCS2Regs());
959	// The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
960	// leaves the stack pointer pointing to the DPRCS2 area.
961	//
962	// Adjust NumBytes to represent the stack slots below the DPRCS2 area.
963	NumBytes += MFI.getObjectOffset(ObjectIdx: D8SpillFI);
964	} else
965	NumBytes = DPRCSOffset;
966
967	if (GPRCS2Size > `0` && STI.splitFramePointerPush(MF)) {
968	GPRCS2Push = LastPush = MBBI ++;
969	DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS2Size);
970	}
971
972	bool NeedsWinCFIStackAlloc = NeedsWinCFI;
973	if (STI.splitFramePointerPush(MF) && HasFP)
974	NeedsWinCFIStackAlloc = false;
975
976	if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, StackSizeInBytes: NumBytes)) {
977	uint32_t NumWords = NumBytes >> `2`;
978
979	if (NumWords < `65536`) {
980	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVi16), DestReg: ARM::R4)
981	.addImm(Val: NumWords)
982	.setMIFlags(MachineInstr::FrameSetup)
983	.add(MOs: predOps(Pred: ARMCC::AL));
984	} else {
985	// Split into two instructions here, instead of using t2MOVi32imm,
986	// to allow inserting accurate SEH instructions (including accurate
987	// instruction size for each of them).
988	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVi16), DestReg: ARM::R4)
989	.addImm(Val: NumWords & `0xffff`)
990	.setMIFlags(MachineInstr::FrameSetup)
991	.add(MOs: predOps(Pred: ARMCC::AL));
992	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVTi16), DestReg: ARM::R4)
993	.addReg(RegNo: ARM::R4)
994	.addImm(Val: NumWords >> `16`)
995	.setMIFlags(MachineInstr::FrameSetup)
996	.add(MOs: predOps(Pred: ARMCC::AL));
997	}
998
999	switch (TM.getCodeModel()) {
1000	case CodeModel::Tiny:
1001	llvm_unreachable("Tiny code model not available on ARM.");
1002	case CodeModel::Small:
1003	case CodeModel::Medium:
1004	case CodeModel::Kernel:
1005	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tBL))
1006	.add(MOs: predOps(Pred: ARMCC::AL))
1007	.addExternalSymbol(FnName: "__chkstk")
1008	.addReg(RegNo: ARM::R4, flags: RegState::Implicit)
1009	.setMIFlags(MachineInstr::FrameSetup);
1010	break;
1011	case CodeModel::Large:
1012	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVi32imm), DestReg: ARM::R12)
1013	.addExternalSymbol(FnName: "__chkstk")
1014	.setMIFlags(MachineInstr::FrameSetup);
1015
1016	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tBLXr))
1017	.add(MOs: predOps(Pred: ARMCC::AL))
1018	.addReg(RegNo: ARM::R12, flags: RegState::Kill)
1019	.addReg(RegNo: ARM::R4, flags: RegState::Implicit)
1020	.setMIFlags(MachineInstr::FrameSetup);
1021	break;
1022	}
1023
1024	MachineInstrBuilder Instr, SEH;
1025	Instr = BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2SUBrr), DestReg: ARM::SP)
1026	.addReg(RegNo: ARM::SP, flags: RegState::Kill)
1027	.addReg(RegNo: ARM::R4, flags: RegState::Kill)
1028	.setMIFlags(MachineInstr::FrameSetup)
1029	.add(MOs: predOps(Pred: ARMCC::AL))
1030	.add(MO: condCodeOp());
1031	if (NeedsWinCFIStackAlloc) {
1032	SEH = BuildMI(MF, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_StackAlloc))
1033	.addImm(Val: NumBytes)
1034	.addImm(/Wide=/Val: `1`)
1035	.setMIFlags(MachineInstr::FrameSetup);
1036	MBB.insertAfter(I: Instr, MI: SEH);
1037	}
1038	NumBytes = `0`;
1039	}
1040
1041	if (NumBytes) {
1042	// Adjust SP after all the callee-save spills.
1043	if (AFI->getNumAlignedDPRCS2Regs() == `0` &&
1044	tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*LastPush, NumBytes))
1045	DefCFAOffsetCandidates.addExtraBytes(I: LastPush, ExtraBytes: NumBytes);
1046	else {
1047	emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -NumBytes,
1048	MIFlags: MachineInstr::FrameSetup);
1049	DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: NumBytes);
1050	}
1051
1052	if (HasFP && isARM)
1053	// Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1054	// Note it's not safe to do this in Thumb2 mode because it would have
1055	// taken two instructions:
1056	// mov sp, r7
1057	// sub sp, #24
1058	// If an interrupt is taken between the two instructions, then sp is in
1059	// an inconsistent state (pointing to the middle of callee-saved area).
1060	// The interrupt handler can end up clobbering the registers.
1061	AFI->setShouldRestoreSPFromFP(true);
1062	}
1063
1064	// Set FP to point to the stack slot that contains the previous FP.
1065	// For iOS, FP is R7, which has now been stored in spill area 1.
1066	// Otherwise, if this is not iOS, all the callee-saved registers go
1067	// into spill area 1, including the FP in R11. In either case, it
1068	// is in area one and the adjustment needs to take place just after
1069	// that push.
1070	// FIXME: The above is not necessary true when PACBTI is enabled.
1071	// AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
1072	// so FP ends up on area two.
1073	MachineBasicBlock::iterator AfterPush;
1074	if (HasFP) {
1075	AfterPush = std::next(x: GPRCS1Push);
1076	unsigned PushSize = sizeOfSPAdjustment(MI: *GPRCS1Push);
1077	int FPOffset = PushSize + FramePtrOffsetInPush;
1078	if (STI.splitFramePointerPush(MF)) {
1079	AfterPush = std::next(x: GPRCS2Push);
1080	emitRegPlusImmediate(isARM: !AFI->isThumbFunction(), MBB, MBBI&: AfterPush, dl, TII,
1081	DestReg: FramePtr, SrcReg: ARM::SP, NumBytes: `0`, MIFlags: MachineInstr::FrameSetup);
1082	} else {
1083	emitRegPlusImmediate(isARM: !AFI->isThumbFunction(), MBB, MBBI&: AfterPush, dl, TII,
1084	DestReg: FramePtr, SrcReg: ARM::SP, NumBytes: FPOffset,
1085	MIFlags: MachineInstr::FrameSetup);
1086	}
1087	if (!NeedsWinCFI) {
1088	if (FramePtrOffsetInPush + PushSize != `0`) {
1089	unsigned CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfa(
1090	L: nullptr, Register: MRI->getDwarfRegNum(RegNum: FramePtr, isEH: true),
1091	Offset: FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
1092	BuildMI(BB&: MBB, I: AfterPush, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
1093	.addCFIIndex(CFIIndex)
1094	.setMIFlags(MachineInstr::FrameSetup);
1095	} else {
1096	unsigned CFIIndex =
1097	MF.addFrameInst(Inst: MCCFIInstruction::createDefCfaRegister(
1098	L: nullptr, Register: MRI->getDwarfRegNum(RegNum: FramePtr, isEH: true)));
1099	BuildMI(BB&: MBB, I: AfterPush, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
1100	.addCFIIndex(CFIIndex)
1101	.setMIFlags(MachineInstr::FrameSetup);
1102	}
1103	}
1104	}
1105
1106	// Emit a SEH opcode indicating the prologue end. The rest of the prologue
1107	// instructions below don't need to be replayed to unwind the stack.
1108	if (NeedsWinCFI && MBBI != MBB.begin()) {
1109	MachineBasicBlock::iterator End = MBBI;
1110	if (HasFP && STI.splitFramePointerPush(MF))
1111	End = AfterPush;
1112	insertSEHRange(MBB, Start: {}, End, TII, MIFlags: MachineInstr::FrameSetup);
1113	BuildMI(BB&: MBB, I: End, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_PrologEnd))
1114	.setMIFlag(MachineInstr::FrameSetup);
1115	MF.setHasWinCFI(true);
1116	}
1117
1118	// Now that the prologue's actual instructions are finalised, we can insert
1119	// the necessary DWARF cf instructions to describe the situation. Start by
1120	// recording where each register ended up:
1121	if (GPRCS1Size > `0` && !NeedsWinCFI) {
1122	MachineBasicBlock::iterator Pos = std::next(x: GPRCS1Push);
1123	int CFIIndex;
1124	for (const auto &Entry : CSI) {
1125	Register Reg = Entry.getReg();
1126	int FI = Entry.getFrameIdx();
1127	switch (Reg) {
1128	case ARM::R8:
1129	case ARM::R9:
1130	case ARM::R10:
1131	case ARM::R11:
1132	case ARM::R12:
1133	if (STI.splitFramePushPop(MF))
1134	break;
1135	[[fallthrough]];
1136	case ARM::R0:
1137	case ARM::R1:
1138	case ARM::R2:
1139	case ARM::R3:
1140	case ARM::R4:
1141	case ARM::R5:
1142	case ARM::R6:
1143	case ARM::R7:
1144	case ARM::LR:
1145	CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createOffset(
1146	L: nullptr, Register: MRI->getDwarfRegNum(RegNum: Reg, isEH: true), Offset: MFI.getObjectOffset(ObjectIdx: FI)));
1147	BuildMI(BB&: MBB, I: Pos, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
1148	.addCFIIndex(CFIIndex)
1149	.setMIFlags(MachineInstr::FrameSetup);
1150	break;
1151	}
1152	}
1153	}
1154
1155	if (GPRCS2Size > `0` && !NeedsWinCFI) {
1156	MachineBasicBlock::iterator Pos = std::next(x: GPRCS2Push);
1157	for (const auto &Entry : CSI) {
1158	Register Reg = Entry.getReg();
1159	int FI = Entry.getFrameIdx();
1160	switch (Reg) {
1161	case ARM::R8:
1162	case ARM::R9:
1163	case ARM::R10:
1164	case ARM::R11:
1165	case ARM::R12:
1166	if (STI.splitFramePushPop(MF)) {
1167	unsigned DwarfReg = MRI->getDwarfRegNum(
1168	RegNum: Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, isEH: true);
1169	int64_t Offset = MFI.getObjectOffset(ObjectIdx: FI);
1170	unsigned CFIIndex = MF.addFrameInst(
1171	Inst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfReg, Offset));
1172	BuildMI(BB&: MBB, I: Pos, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
1173	.addCFIIndex(CFIIndex)
1174	.setMIFlags(MachineInstr::FrameSetup);
1175	}
1176	break;
1177	}
1178	}
1179	}
1180
1181	if (DPRCSSize > `0` && !NeedsWinCFI) {
1182	// Since vpush register list cannot have gaps, there may be multiple vpush
1183	// instructions in the prologue.
1184	MachineBasicBlock::iterator Pos = std::next(x: LastPush);
1185	for (const auto &Entry : CSI) {
1186	Register Reg = Entry.getReg();
1187	int FI = Entry.getFrameIdx();
1188	if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
1189	(Reg < ARM::D8 \|\| Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
1190	unsigned DwarfReg = MRI->getDwarfRegNum(RegNum: Reg, isEH: true);
1191	int64_t Offset = MFI.getObjectOffset(ObjectIdx: FI);
1192	unsigned CFIIndex = MF.addFrameInst(
1193	Inst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfReg, Offset));
1194	BuildMI(BB&: MBB, I: Pos, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
1195	.addCFIIndex(CFIIndex)
1196	.setMIFlags(MachineInstr::FrameSetup);
1197	}
1198	}
1199	}
1200
1201	// Now we can emit descriptions of where the canonical frame address was
1202	// throughout the process. If we have a frame pointer, it takes over the job
1203	// half-way through, so only the first few .cfi_def_cfa_offset instructions
1204	// actually get emitted.
1205	if (!NeedsWinCFI)
1206	DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
1207
1208	if (STI.isTargetELF() && hasFP(MF))
1209	MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
1210	AFI->getFramePtrSpillOffset());
1211
1212	AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1213	AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1214	AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1215	AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1216	AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
1217
1218	// If we need dynamic stack realignment, do it here. Be paranoid and make
1219	// sure if we also have VLAs, we have a base pointer for frame access.
1220	// If aligned NEON registers were spilled, the stack has already been
1221	// realigned.
1222	if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1223	Align MaxAlign = MFI.getMaxAlign();
1224	assert(!AFI->isThumb1OnlyFunction());
1225	if (!AFI->isThumbFunction()) {
1226	emitAligningInstructions(MF, AFI, TII, MBB, MBBI, DL: dl, Reg: ARM::SP, Alignment: MaxAlign,
1227	MustBeSingleInstruction: false);
1228	} else {
1229	// We cannot use sp as source/dest register here, thus we're using r4 to
1230	// perform the calculations. We're emitting the following sequence:
1231	// mov r4, sp
1232	// -- use emitAligningInstructions to produce best sequence to zero
1233	// -- out lower bits in r4
1234	// mov sp, r4
1235	// FIXME: It will be better just to find spare register here.
1236	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::R4)
1237	.addReg(RegNo: ARM::SP, flags: RegState::Kill)
1238	.add(MOs: predOps(Pred: ARMCC::AL));
1239	emitAligningInstructions(MF, AFI, TII, MBB, MBBI, DL: dl, Reg: ARM::R4, Alignment: MaxAlign,
1240	MustBeSingleInstruction: false);
1241	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP)
1242	.addReg(RegNo: ARM::R4, flags: RegState::Kill)
1243	.add(MOs: predOps(Pred: ARMCC::AL));
1244	}
1245
1246	AFI->setShouldRestoreSPFromFP(true);
1247	}
1248
1249	// If we need a base pointer, set it up here. It's whatever the value
1250	// of the stack pointer is at this point. Any variable size objects
1251	// will be allocated after this, so we can still use the base pointer
1252	// to reference locals.
1253	// FIXME: Clarify FrameSetup flags here.
1254	if (RegInfo->hasBasePointer(MF)) {
1255	if (isARM)
1256	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::MOVr), DestReg: RegInfo->getBaseRegister())
1257	.addReg(RegNo: ARM::SP)
1258	.add(MOs: predOps(Pred: ARMCC::AL))
1259	.add(MO: condCodeOp());
1260	else
1261	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: RegInfo->getBaseRegister())
1262	.addReg(RegNo: ARM::SP)
1263	.add(MOs: predOps(Pred: ARMCC::AL));
1264	}
1265
1266	// If the frame has variable sized objects then the epilogue must restore
1267	// the sp from fp. We can assume there's an FP here since hasFP already
1268	// checks for hasVarSizedObjects.
1269	if (MFI.hasVarSizedObjects())
1270	AFI->setShouldRestoreSPFromFP(true);
1271	}
1272
1273	void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
1274	MachineBasicBlock &MBB) const {
1275	MachineFrameInfo &MFI = MF.getFrameInfo();
1276	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1277	const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1278	const ARMBaseInstrInfo &TII =
1279	*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1280	assert(!AFI->isThumb1OnlyFunction() &&
1281	"This emitEpilogue does not support Thumb1!");
1282	bool isARM = !AFI->isThumbFunction();
1283
1284	// Amount of stack space we reserved next to incoming args for either
1285	// varargs registers or stack arguments in tail calls made by this function.
1286	unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1287
1288	// How much of the stack used by incoming arguments this function is expected
1289	// to restore in this particular epilogue.
1290	int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1291	int NumBytes = (int)MFI.getStackSize();
1292	Register FramePtr = RegInfo->getFrameRegister(MF);
1293
1294	// All calls are tail calls in GHC calling conv, and functions have no
1295	// prologue/epilogue.
1296	if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1297	return;
1298
1299	// First put ourselves on the first (from top) terminator instructions.
1300	MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1301	DebugLoc dl = MBBI != MBB.end() ? MBBI ->getDebugLoc() : DebugLoc ();
1302
1303	MachineBasicBlock::iterator RangeStart;
1304	if (!AFI->hasStackFrame()) {
1305	if (MF.hasWinCFI()) {
1306	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_EpilogStart))
1307	.setMIFlag(MachineInstr::FrameDestroy);
1308	RangeStart = initMBBRange(MBB, MBBI);
1309	}
1310
1311	if (NumBytes + IncomingArgStackToRestore != `0`)
1312	emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1313	NumBytes: NumBytes + IncomingArgStackToRestore,
1314	MIFlags: MachineInstr::FrameDestroy);
1315	} else {
1316	// Unwind MBBI to point to first LDR / VLDRD.
1317	if (MBBI != MBB.begin()) {
1318	do {
1319	--MBBI;
1320	} while (MBBI != MBB.begin() &&
1321	MBBI ->getFlag(Flag: MachineInstr::FrameDestroy));
1322	if (!MBBI ->getFlag(Flag: MachineInstr::FrameDestroy))
1323	++MBBI;
1324	}
1325
1326	if (MF.hasWinCFI()) {
1327	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_EpilogStart))
1328	.setMIFlag(MachineInstr::FrameDestroy);
1329	RangeStart = initMBBRange(MBB, MBBI);
1330	}
1331
1332	// Move SP to start of FP callee save spill area.
1333	NumBytes -= (ReservedArgStack +
1334	AFI->getFPCXTSaveAreaSize() +
1335	AFI->getGPRCalleeSavedArea1Size() +
1336	AFI->getGPRCalleeSavedArea2Size() +
1337	AFI->getDPRCalleeSavedGapSize() +
1338	AFI->getDPRCalleeSavedAreaSize());
1339
1340	// Reset SP based on frame pointer only if the stack frame extends beyond
1341	// frame pointer stack slot or target is ELF and the function has FP.
1342	if (AFI->shouldRestoreSPFromFP()) {
1343	NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1344	if (NumBytes) {
1345	if (isARM)
1346	emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg: ARM::SP, BaseReg: FramePtr, NumBytes: -NumBytes,
1347	Pred: ARMCC::AL, PredReg: `0`, TII,
1348	MIFlags: MachineInstr::FrameDestroy);
1349	else {
1350	// It's not possible to restore SP from FP in a single instruction.
1351	// For iOS, this looks like:
1352	// mov sp, r7
1353	// sub sp, #24
1354	// This is bad, if an interrupt is taken after the mov, sp is in an
1355	// inconsistent state.
1356	// Use the first callee-saved register as a scratch register.
1357	assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1358	"No scratch register to restore SP from FP!");
1359	emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg: ARM::R4, BaseReg: FramePtr, NumBytes: -NumBytes,
1360	Pred: ARMCC::AL, PredReg: `0`, TII, MIFlags: MachineInstr::FrameDestroy);
1361	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP)
1362	.addReg(RegNo: ARM::R4)
1363	.add(MOs: predOps(Pred: ARMCC::AL))
1364	.setMIFlag(MachineInstr::FrameDestroy);
1365	}
1366	} else {
1367	// Thumb2 or ARM.
1368	if (isARM)
1369	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::MOVr), DestReg: ARM::SP)
1370	.addReg(RegNo: FramePtr)
1371	.add(MOs: predOps(Pred: ARMCC::AL))
1372	.add(MO: condCodeOp())
1373	.setMIFlag(MachineInstr::FrameDestroy);
1374	else
1375	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP)
1376	.addReg(RegNo: FramePtr)
1377	.add(MOs: predOps(Pred: ARMCC::AL))
1378	.setMIFlag(MachineInstr::FrameDestroy);
1379	}
1380	} else if (NumBytes &&
1381	!tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*MBBI, NumBytes))
1382	emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1383	MIFlags: MachineInstr::FrameDestroy);
1384
1385	// Increment past our save areas.
1386	if (AFI->getGPRCalleeSavedArea2Size() && STI.splitFramePointerPush(MF))
1387	MBBI ++;
1388
1389	if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
1390	MBBI ++;
1391	// Since vpop register list cannot have gaps, there may be multiple vpop
1392	// instructions in the epilogue.
1393	while (MBBI != MBB.end() && MBBI ->getOpcode() == ARM::VLDMDIA_UPD)
1394	MBBI ++;
1395	}
1396	if (AFI->getDPRCalleeSavedGapSize()) {
1397	assert(AFI->getDPRCalleeSavedGapSize() == `4` &&
1398	"unexpected DPR alignment gap");
1399	emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: AFI->getDPRCalleeSavedGapSize(),
1400	MIFlags: MachineInstr::FrameDestroy);
1401	}
1402
1403	if (AFI->getGPRCalleeSavedArea2Size() && !STI.splitFramePointerPush(MF))
1404	MBBI ++;
1405	if (AFI->getGPRCalleeSavedArea1Size()) MBBI ++;
1406
1407	if (ReservedArgStack \|\| IncomingArgStackToRestore) {
1408	assert((int)ReservedArgStack + IncomingArgStackToRestore >= `0` &&
1409	"attempting to restore negative stack amount");
1410	emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1411	NumBytes: ReservedArgStack + IncomingArgStackToRestore,
1412	MIFlags: MachineInstr::FrameDestroy);
1413	}
1414
1415	// Validate PAC, It should have been already popped into R12. For CMSE entry
1416	// function, the validation instruction is emitted during expansion of the
1417	// tBXNS_RET, since the validation must use the value of SP at function
1418	// entry, before saving, resp. after restoring, FPCXTNS.
1419	if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
1420	BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc (), MCID: STI.getInstrInfo()->get(Opcode: ARM::t2AUT));
1421	}
1422
1423	if (MF.hasWinCFI()) {
1424	insertSEHRange(MBB, Start: RangeStart, End: MBB.end(), TII, MIFlags: MachineInstr::FrameDestroy);
1425	BuildMI(BB&: MBB, I: MBB.end(), MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_EpilogEnd))
1426	.setMIFlag(MachineInstr::FrameDestroy);
1427	}
1428	}
1429
1430	/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1431	/// debug info. It's the same as what we use for resolving the code-gen
1432	/// references for now. FIXME: This can go wrong when references are
1433	/// SP-relative and simple call frames aren't used.
1434	StackOffset ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1435	int FI,
1436	Register &FrameReg) const {
1437	return StackOffset::getFixed(Fixed: ResolveFrameIndexReference(MF, FI, FrameReg, SPAdj: `0`));
1438	}
1439
1440	int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
1441	int FI, Register &FrameReg,
1442	int SPAdj) const {
1443	const MachineFrameInfo &MFI = MF.getFrameInfo();
1444	const ARMBaseRegisterInfo RegInfo = static_cast<const* ARMBaseRegisterInfo *>(
1445	MF.getSubtarget().getRegisterInfo());
1446	const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1447	int Offset = MFI.getObjectOffset(ObjectIdx: FI) + MFI.getStackSize();
1448	int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1449	bool isFixed = MFI.isFixedObjectIndex(ObjectIdx: FI);
1450
1451	FrameReg = ARM::SP;
1452	Offset += SPAdj;
1453
1454	// SP can move around if there are allocas. We may also lose track of SP
1455	// when emergency spilling inside a non-reserved call frame setup.
1456	bool hasMovingSP = !hasReservedCallFrame(MF);
1457
1458	// When dynamically realigning the stack, use the frame pointer for
1459	// parameters, and the stack/base pointer for locals.
1460	if (RegInfo->hasStackRealignment(MF)) {
1461	assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1462	if (isFixed) {
1463	FrameReg = RegInfo->getFrameRegister(MF);
1464	Offset = FPOffset;
1465	} else if (hasMovingSP) {
1466	assert(RegInfo->hasBasePointer(MF) &&
1467	"VLAs and dynamic stack alignment, but missing base pointer!");
1468	FrameReg = RegInfo->getBaseRegister();
1469	Offset -= SPAdj;
1470	}
1471	return Offset;
1472	}
1473
1474	// If there is a frame pointer, use it when we can.
1475	if (hasFP(MF) && AFI->hasStackFrame()) {
1476	// Use frame pointer to reference fixed objects. Use it for locals if
1477	// there are VLAs (and thus the SP isn't reliable as a base).
1478	if (isFixed \|\| (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1479	FrameReg = RegInfo->getFrameRegister(MF);
1480	return FPOffset;
1481	} else if (hasMovingSP) {
1482	assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1483	if (AFI->isThumb2Function()) {
1484	// Try to use the frame pointer if we can, else use the base pointer
1485	// since it's available. This is handy for the emergency spill slot, in
1486	// particular.
1487	if (FPOffset >= -`255` && FPOffset < `0`) {
1488	FrameReg = RegInfo->getFrameRegister(MF);
1489	return FPOffset;
1490	}
1491	}
1492	} else if (AFI->isThumbFunction()) {
1493	// Prefer SP to base pointer, if the offset is suitably aligned and in
1494	// range as the effective range of the immediate offset is bigger when
1495	// basing off SP.
1496	// Use add <rd>, sp, #<imm8>
1497	// ldr <rd>, [sp, #<imm8>]
1498	if (Offset >= `0` && (Offset & `3`) == `0` && Offset <= `1020`)
1499	return Offset;
1500	// In Thumb2 mode, the negative offset is very limited. Try to avoid
1501	// out of range references. ldr <rt>,[<rn>, #-<imm8>]
1502	if (AFI->isThumb2Function() && FPOffset >= -`255` && FPOffset < `0`) {
1503	FrameReg = RegInfo->getFrameRegister(MF);
1504	return FPOffset;
1505	}
1506	} else if (Offset > (FPOffset < `0` ? -FPOffset : FPOffset)) {
1507	// Otherwise, use SP or FP, whichever is closer to the stack slot.
1508	FrameReg = RegInfo->getFrameRegister(MF);
1509	return FPOffset;
1510	}
1511	}
1512	// Use the base pointer if we have one.
1513	// FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1514	// That can happen if we forced a base pointer for a large call frame.
1515	if (RegInfo->hasBasePointer(MF)) {
1516	FrameReg = RegInfo->getBaseRegister();
1517	Offset -= SPAdj;
1518	}
1519	return Offset;
1520	}
1521
1522	void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1523	MachineBasicBlock::iterator MI,
1524	ArrayRef<CalleeSavedInfo> CSI,
1525	unsigned StmOpc, unsigned StrOpc,
1526	bool NoGap, bool (Func)(unsigned, bool*),
1527	unsigned NumAlignedDPRCS2Regs,
1528	unsigned MIFlags) const {
1529	MachineFunction &MF = *MBB.getParent();
1530	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1531	const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1532
1533	DebugLoc DL;
1534
1535	using RegAndKill = std::pair<unsigned, bool>;
1536
1537	SmallVector<RegAndKill, `4`> Regs;
1538	unsigned i = CSI.size();
1539	while (i != `0`) {
1540	unsigned LastReg = `0`;
1541	for (; i != `0`; --i) {
1542	Register Reg = CSI [i-`1`].getReg();
1543	if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1544
1545	// D-registers in the aligned area DPRCS2 are NOT spilled here.
1546	if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1547	continue;
1548
1549	const MachineRegisterInfo &MRI = MF.getRegInfo();
1550	bool isLiveIn = MRI.isLiveIn(Reg);
1551	if (!isLiveIn && !MRI.isReserved(PhysReg: Reg))
1552	MBB.addLiveIn(PhysReg: Reg);
1553	// If NoGap is true, push consecutive registers and then leave the rest
1554	// for other instructions. e.g.
1555	// vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1556	if (NoGap && LastReg && LastReg != Reg-`1`)
1557	break;
1558	LastReg = Reg;
1559	// Do not set a kill flag on values that are also marked as live-in. This
1560	// happens with the @llvm-returnaddress intrinsic and with arguments
1561	// passed in callee saved registers.
1562	// Omitting the kill flags is conservatively correct even if the live-in
1563	// is not used after all.
1564	Regs.push_back(Elt: std::make_pair(x&: Reg, /isKill=/y: !isLiveIn));
1565	}
1566
1567	if (Regs.empty())
1568	continue;
1569
1570	llvm::sort(C&: Regs, Comp: [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1571	return TRI.getEncodingValue(RegNo: LHS.first) < TRI.getEncodingValue(RegNo: RHS.first);
1572	});
1573
1574	if (Regs.size() > `1` \|\| StrOpc== `0`) {
1575	MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: StmOpc), DestReg: ARM::SP)
1576	.addReg(RegNo: ARM::SP)
1577	.setMIFlags(MIFlags)
1578	.add(MOs: predOps(Pred: ARMCC::AL));
1579	for (unsigned i = `0`, e = Regs.size(); i < e; ++i)
1580	MIB.addReg(RegNo: Regs [i].first, flags: getKillRegState(B: Regs [i].second));
1581	} else if (Regs.size() == `1`) {
1582	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: StrOpc), DestReg: ARM::SP)
1583	.addReg(RegNo: Regs [`0`].first, flags: getKillRegState(B: Regs [`0`].second))
1584	.addReg(RegNo: ARM::SP)
1585	.setMIFlags(MIFlags)
1586	.addImm(Val: -`4`)
1587	.add(MOs: predOps(Pred: ARMCC::AL));
1588	}
1589	Regs.clear();
1590
1591	// Put any subsequent vpush instructions before this one: they will refer to
1592	// higher register numbers so need to be pushed first in order to preserve
1593	// monotonicity.
1594	if (MI != MBB.begin())
1595	--MI;
1596	}
1597	}
1598
1599	void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1600	MachineBasicBlock::iterator MI,
1601	MutableArrayRef<CalleeSavedInfo> CSI,
1602	unsigned LdmOpc, unsigned LdrOpc,
1603	bool isVarArg, bool NoGap,
1604	bool (Func)(unsigned, bool*),
1605	unsigned NumAlignedDPRCS2Regs) const {
1606	MachineFunction &MF = *MBB.getParent();
1607	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1608	const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1609	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1610	bool hasPAC = AFI->shouldSignReturnAddress();
1611	DebugLoc DL;
1612	bool isTailCall = false;
1613	bool isInterrupt = false;
1614	bool isTrap = false;
1615	bool isCmseEntry = false;
1616	if (MBB.end() != MI) {
1617	DL = MI ->getDebugLoc();
1618	unsigned RetOpcode = MI ->getOpcode();
1619	isTailCall =
1620	(RetOpcode == ARM::TCRETURNdi \|\| RetOpcode == ARM::TCRETURNri \|\|
1621	RetOpcode == ARM::TCRETURNrinotr12);
1622	isInterrupt =
1623	RetOpcode == ARM::SUBS_PC_LR \|\| RetOpcode == ARM::t2SUBS_PC_LR;
1624	isTrap =
1625	RetOpcode == ARM::TRAP \|\| RetOpcode == ARM::TRAPNaCl \|\|
1626	RetOpcode == ARM::tTRAP;
1627	isCmseEntry = (RetOpcode == ARM::tBXNS \|\| RetOpcode == ARM::tBXNS_RET);
1628	}
1629
1630	SmallVector<unsigned, `4`> Regs;
1631	unsigned i = CSI.size();
1632	while (i != `0`) {
1633	unsigned LastReg = `0`;
1634	bool DeleteRet = false;
1635	for (; i != `0`; --i) {
1636	CalleeSavedInfo &Info = CSI [i-`1`];
1637	Register Reg = Info.getReg();
1638	if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1639
1640	// The aligned reloads from area DPRCS2 are not inserted here.
1641	if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1642	continue;
1643	if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1644	!isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == `0` &&
1645	STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1646	!STI.splitFramePointerPush(MF)) {
1647	Reg = ARM::PC;
1648	// Fold the return instruction into the LDM.
1649	DeleteRet = true;
1650	LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1651	}
1652
1653	// If NoGap is true, pop consecutive registers and then leave the rest
1654	// for other instructions. e.g.
1655	// vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1656	if (NoGap && LastReg && LastReg != Reg-`1`)
1657	break;
1658
1659	LastReg = Reg;
1660	Regs.push_back(Elt: Reg);
1661	}
1662
1663	if (Regs.empty())
1664	continue;
1665
1666	llvm::sort(C&: Regs, Comp: [&](unsigned LHS, unsigned RHS) {
1667	return TRI.getEncodingValue(RegNo: LHS) < TRI.getEncodingValue(RegNo: RHS);
1668	});
1669
1670	if (Regs.size() > `1` \|\| LdrOpc == `0`) {
1671	MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: LdmOpc), DestReg: ARM::SP)
1672	.addReg(RegNo: ARM::SP)
1673	.add(MOs: predOps(Pred: ARMCC::AL))
1674	.setMIFlags(MachineInstr::FrameDestroy);
1675	for (unsigned Reg : Regs)
1676	MIB.addReg(RegNo: Reg, flags: getDefRegState(B: true));
1677	if (DeleteRet) {
1678	if (MI != MBB.end()) {
1679	MIB.copyImplicitOps(OtherMI: *MI);
1680	MI ->eraseFromParent();
1681	}
1682	}
1683	MI = MIB;
1684	} else if (Regs.size() == `1`) {
1685	// If we adjusted the reg to PC from LR above, switch it back here. We
1686	// only do that for LDM.
1687	if (Regs [`0`] == ARM::PC)
1688	Regs [`0`] = ARM::LR;
1689	MachineInstrBuilder MIB =
1690	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: LdrOpc), DestReg: Regs [`0`])
1691	.addReg(RegNo: ARM::SP, flags: RegState::Define)
1692	.addReg(RegNo: ARM::SP)
1693	.setMIFlags(MachineInstr::FrameDestroy);
1694	// ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1695	// that refactoring is complete (eventually).
1696	if (LdrOpc == ARM::LDR_POST_REG \|\| LdrOpc == ARM::LDR_POST_IMM) {
1697	MIB.addReg(RegNo: `0`);
1698	MIB.addImm(Val: ARM_AM::getAM2Opc(Opc: ARM_AM::add, Imm12: `4`, SO: ARM_AM::no_shift));
1699	} else
1700	MIB.addImm(Val: `4`);
1701	MIB.add(MOs: predOps(Pred: ARMCC::AL));
1702	}
1703	Regs.clear();
1704
1705	// Put any subsequent vpop instructions after this one: they will refer to
1706	// higher register numbers so need to be popped afterwards.
1707	if (MI != MBB.end())
1708	++MI;
1709	}
1710	}
1711
1712	/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1713	/// starting from d8. Also insert stack realignment code and leave the stack
1714	/// pointer pointing to the d8 spill slot.
1715	static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
1716	MachineBasicBlock::iterator MI,
1717	unsigned NumAlignedDPRCS2Regs,
1718	ArrayRef<CalleeSavedInfo> CSI,
1719	const TargetRegisterInfo *TRI) {
1720	MachineFunction &MF = *MBB.getParent();
1721	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1722	DebugLoc DL = MI != MBB.end() ? MI ->getDebugLoc() : DebugLoc ();
1723	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1724	MachineFrameInfo &MFI = MF.getFrameInfo();
1725
1726	// Mark the D-register spill slots as properly aligned. Since MFI computes
1727	// stack slot layout backwards, this can actually mean that the d-reg stack
1728	// slot offsets can be wrong. The offset for d8 will always be correct.
1729	for (const CalleeSavedInfo &I : CSI) {
1730	unsigned DNum = I.getReg() - ARM::D8;
1731	if (DNum > NumAlignedDPRCS2Regs - `1`)
1732	continue;
1733	int FI = I.getFrameIdx();
1734	// The even-numbered registers will be 16-byte aligned, the odd-numbered
1735	// registers will be 8-byte aligned.
1736	MFI.setObjectAlignment(ObjectIdx: FI, Alignment: DNum % `2` ? Align (`8`) : Align (`16`));
1737
1738	// The stack slot for D8 needs to be maximally aligned because this is
1739	// actually the point where we align the stack pointer. MachineFrameInfo
1740	// computes all offsets relative to the incoming stack pointer which is a
1741	// bit weird when realigning the stack. Any extra padding for this
1742	// over-alignment is not realized because the code inserted below adjusts
1743	// the stack pointer by numregs 8 before aligning the stack pointer.*
1744	if (DNum == `0`)
1745	MFI.setObjectAlignment(ObjectIdx: FI, Alignment: MFI.getMaxAlign());
1746	}
1747
1748	// Move the stack pointer to the d8 spill slot, and align it at the same
1749	// time. Leave the stack slot address in the scratch register r4.
1750	//
1751	// sub r4, sp, #numregs 8*
1752	// bic r4, r4, #align - 1
1753	// mov sp, r4
1754	//
1755	bool isThumb = AFI->isThumbFunction();
1756	assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1757	AFI->setShouldRestoreSPFromFP(true);
1758
1759	// sub r4, sp, #numregs 8*
1760	// The immediate is <= 64, so it doesn't need any special encoding.
1761	unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1762	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: ARM::R4)
1763	.addReg(RegNo: ARM::SP)
1764	.addImm(Val: `8` * NumAlignedDPRCS2Regs)
1765	.add(MOs: predOps(Pred: ARMCC::AL))
1766	.add(MO: condCodeOp());
1767
1768	Align MaxAlign = MF.getFrameInfo().getMaxAlign();
1769	// We must set parameter MustBeSingleInstruction to true, since
1770	// skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1771	// stack alignment. Luckily, this can always be done since all ARM
1772	// architecture versions that support Neon also support the BFC
1773	// instruction.
1774	emitAligningInstructions(MF, AFI, TII, MBB, MBBI: MI, DL, Reg: ARM::R4, Alignment: MaxAlign, MustBeSingleInstruction: true);
1775
1776	// mov sp, r4
1777	// The stack pointer must be adjusted before spilling anything, otherwise
1778	// the stack slots could be clobbered by an interrupt handler.
1779	// Leave r4 live, it is used below.
1780	Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1781	MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: ARM::SP)
1782	.addReg(RegNo: ARM::R4)
1783	.add(MOs: predOps(Pred: ARMCC::AL));
1784	if (!isThumb)
1785	MIB.add(MO: condCodeOp());
1786
1787	// Now spill NumAlignedDPRCS2Regs registers starting from d8.
1788	// r4 holds the stack slot address.
1789	unsigned NextReg = ARM::D8;
1790
1791	// 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1792	// The writeback is only needed when emitting two vst1.64 instructions.
1793	if (NumAlignedDPRCS2Regs >= `6`) {
1794	unsigned SupReg = TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0,
1795	RC: &ARM::QQPRRegClass);
1796	MBB.addLiveIn(PhysReg: SupReg);
1797	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VST1d64Qwb_fixed), DestReg: ARM::R4)
1798	.addReg(RegNo: ARM::R4, flags: RegState::Kill)
1799	.addImm(Val: `16`)
1800	.addReg(RegNo: NextReg)
1801	.addReg(RegNo: SupReg, flags: RegState::ImplicitKill)
1802	.add(MOs: predOps(Pred: ARMCC::AL));
1803	NextReg += `4`;
1804	NumAlignedDPRCS2Regs -= `4`;
1805	}
1806
1807	// We won't modify r4 beyond this point. It currently points to the next
1808	// register to be spilled.
1809	unsigned R4BaseReg = NextReg;
1810
1811	// 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1812	if (NumAlignedDPRCS2Regs >= `4`) {
1813	unsigned SupReg = TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0,
1814	RC: &ARM::QQPRRegClass);
1815	MBB.addLiveIn(PhysReg: SupReg);
1816	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VST1d64Q))
1817	.addReg(RegNo: ARM::R4)
1818	.addImm(Val: `16`)
1819	.addReg(RegNo: NextReg)
1820	.addReg(RegNo: SupReg, flags: RegState::ImplicitKill)
1821	.add(MOs: predOps(Pred: ARMCC::AL));
1822	NextReg += `4`;
1823	NumAlignedDPRCS2Regs -= `4`;
1824	}
1825
1826	// 16-byte aligned vst1.64 with 2 d-regs.
1827	if (NumAlignedDPRCS2Regs >= `2`) {
1828	unsigned SupReg = TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0,
1829	RC: &ARM::QPRRegClass);
1830	MBB.addLiveIn(PhysReg: SupReg);
1831	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VST1q64))
1832	.addReg(RegNo: ARM::R4)
1833	.addImm(Val: `16`)
1834	.addReg(RegNo: SupReg)
1835	.add(MOs: predOps(Pred: ARMCC::AL));
1836	NextReg += `2`;
1837	NumAlignedDPRCS2Regs -= `2`;
1838	}
1839
1840	// Finally, use a vanilla vstr.64 for the odd last register.
1841	if (NumAlignedDPRCS2Regs) {
1842	MBB.addLiveIn(PhysReg: NextReg);
1843	// vstr.64 uses addrmode5 which has an offset scale of 4.
1844	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VSTRD))
1845	.addReg(RegNo: NextReg)
1846	.addReg(RegNo: ARM::R4)
1847	.addImm(Val: (NextReg - R4BaseReg) * `2`)
1848	.add(MOs: predOps(Pred: ARMCC::AL));
1849	}
1850
1851	// The last spill instruction inserted should kill the scratch register r4.
1852	std::prev(x: MI)->addRegisterKilled(IncomingReg: ARM::R4, RegInfo: TRI);
1853	}
1854
1855	/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1856	/// iterator to the following instruction.
1857	static MachineBasicBlock::iterator
1858	skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
1859	unsigned NumAlignedDPRCS2Regs) {
1860	// sub r4, sp, #numregs 8*
1861	// bic r4, r4, #align - 1
1862	// mov sp, r4
1863	++MI; ++MI; ++MI;
1864	assert(MI->mayStore() && "Expecting spill instruction");
1865
1866	// These switches all fall through.
1867	switch(NumAlignedDPRCS2Regs) {
1868	case `7`:
1869	++MI;
1870	assert(MI->mayStore() && "Expecting spill instruction");
1871	[[fallthrough]];
1872	default:
1873	++MI;
1874	assert(MI->mayStore() && "Expecting spill instruction");
1875	[[fallthrough]];
1876	case `1`:
1877	case `2`:
1878	case `4`:
1879	assert(MI->killsRegister(ARM::R4, /TRI=/nullptr) && "Missed kill flag");
1880	++MI;
1881	}
1882	return MI;
1883	}
1884
1885	/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1886	/// starting from d8. These instructions are assumed to execute while the
1887	/// stack is still aligned, unlike the code inserted by emitPopInst.
1888	static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
1889	MachineBasicBlock::iterator MI,
1890	unsigned NumAlignedDPRCS2Regs,
1891	ArrayRef<CalleeSavedInfo> CSI,
1892	const TargetRegisterInfo *TRI) {
1893	MachineFunction &MF = *MBB.getParent();
1894	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1895	DebugLoc DL = MI != MBB.end() ? MI ->getDebugLoc() : DebugLoc ();
1896	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1897
1898	// Find the frame index assigned to d8.
1899	int D8SpillFI = `0`;
1900	for (const CalleeSavedInfo &I : CSI)
1901	if (I.getReg() == ARM::D8) {
1902	D8SpillFI = I.getFrameIdx();
1903	break;
1904	}
1905
1906	// Materialize the address of the d8 spill slot into the scratch register r4.
1907	// This can be fairly complicated if the stack frame is large, so just use
1908	// the normal frame index elimination mechanism to do it. This code runs as
1909	// the initial part of the epilog where the stack and base pointers haven't
1910	// been changed yet.
1911	bool isThumb = AFI->isThumbFunction();
1912	assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1913
1914	unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1915	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: ARM::R4)
1916	.addFrameIndex(Idx: D8SpillFI)
1917	.addImm(Val: `0`)
1918	.add(MOs: predOps(Pred: ARMCC::AL))
1919	.add(MO: condCodeOp());
1920
1921	// Now restore NumAlignedDPRCS2Regs registers starting from d8.
1922	unsigned NextReg = ARM::D8;
1923
1924	// 16-byte aligned vld1.64 with 4 d-regs and writeback.
1925	if (NumAlignedDPRCS2Regs >= `6`) {
1926	unsigned SupReg = TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0,
1927	RC: &ARM::QQPRRegClass);
1928	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLD1d64Qwb_fixed), DestReg: NextReg)
1929	.addReg(RegNo: ARM::R4, flags: RegState::Define)
1930	.addReg(RegNo: ARM::R4, flags: RegState::Kill)
1931	.addImm(Val: `16`)
1932	.addReg(RegNo: SupReg, flags: RegState::ImplicitDefine)
1933	.add(MOs: predOps(Pred: ARMCC::AL));
1934	NextReg += `4`;
1935	NumAlignedDPRCS2Regs -= `4`;
1936	}
1937
1938	// We won't modify r4 beyond this point. It currently points to the next
1939	// register to be spilled.
1940	unsigned R4BaseReg = NextReg;
1941
1942	// 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1943	if (NumAlignedDPRCS2Regs >= `4`) {
1944	unsigned SupReg = TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0,
1945	RC: &ARM::QQPRRegClass);
1946	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLD1d64Q), DestReg: NextReg)
1947	.addReg(RegNo: ARM::R4)
1948	.addImm(Val: `16`)
1949	.addReg(RegNo: SupReg, flags: RegState::ImplicitDefine)
1950	.add(MOs: predOps(Pred: ARMCC::AL));
1951	NextReg += `4`;
1952	NumAlignedDPRCS2Regs -= `4`;
1953	}
1954
1955	// 16-byte aligned vld1.64 with 2 d-regs.
1956	if (NumAlignedDPRCS2Regs >= `2`) {
1957	unsigned SupReg = TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0,
1958	RC: &ARM::QPRRegClass);
1959	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLD1q64), DestReg: SupReg)
1960	.addReg(RegNo: ARM::R4)
1961	.addImm(Val: `16`)
1962	.add(MOs: predOps(Pred: ARMCC::AL));
1963	NextReg += `2`;
1964	NumAlignedDPRCS2Regs -= `2`;
1965	}
1966
1967	// Finally, use a vanilla vldr.64 for the remaining odd register.
1968	if (NumAlignedDPRCS2Regs)
1969	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLDRD), DestReg: NextReg)
1970	.addReg(RegNo: ARM::R4)
1971	.addImm(Val: `2` * (NextReg - R4BaseReg))
1972	.add(MOs: predOps(Pred: ARMCC::AL));
1973
1974	// Last store kills r4.
1975	std::prev(x: MI)->addRegisterKilled(IncomingReg: ARM::R4, RegInfo: TRI);
1976	}
1977
1978	bool ARMFrameLowering::spillCalleeSavedRegisters(
1979	MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1980	ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo TRI) const* {
1981	if (CSI.empty())
1982	return false;
1983
1984	MachineFunction &MF = *MBB.getParent();
1985	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1986
1987	unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1988	unsigned PushOneOpc = AFI->isThumbFunction() ?
1989	ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1990	unsigned FltOpc = ARM::VSTMDDB_UPD;
1991	unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1992	// Compute PAC in R12.
1993	if (AFI->shouldSignReturnAddress()) {
1994	BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc (), MCID: STI.getInstrInfo()->get(Opcode: ARM::t2PAC))
1995	.setMIFlags(MachineInstr::FrameSetup);
1996	}
1997	// Save the non-secure floating point context.
1998	if (llvm::any_of(Range&: CSI, P: [](const CalleeSavedInfo &C) {
1999	return C.getReg() == ARM::FPCXTNS;
2000	})) {
2001	BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc (), MCID: STI.getInstrInfo()->get(Opcode: ARM::VSTR_FPCXTNS_pre),
2002	DestReg: ARM::SP)
2003	.addReg(RegNo: ARM::SP)
2004	.addImm(Val: -`4`)
2005	.add(MOs: predOps(Pred: ARMCC::AL));
2006	}
2007	if (STI.splitFramePointerPush(MF)) {
2008	emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false,
2009	Func: &isSplitFPArea1Register, NumAlignedDPRCS2Regs: `0`, MIFlags: MachineInstr::FrameSetup);
2010	emitPushInst(MBB, MI, CSI, StmOpc: FltOpc, StrOpc: `0`, NoGap: true, Func: &isARMArea3Register,
2011	NumAlignedDPRCS2Regs, MIFlags: MachineInstr::FrameSetup);
2012	emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false,
2013	Func: &isSplitFPArea2Register, NumAlignedDPRCS2Regs: `0`, MIFlags: MachineInstr::FrameSetup);
2014	} else {
2015	emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false, Func: &isARMArea1Register,
2016	NumAlignedDPRCS2Regs: `0`, MIFlags: MachineInstr::FrameSetup);
2017	emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false, Func: &isARMArea2Register,
2018	NumAlignedDPRCS2Regs: `0`, MIFlags: MachineInstr::FrameSetup);
2019	emitPushInst(MBB, MI, CSI, StmOpc: FltOpc, StrOpc: `0`, NoGap: true, Func: &isARMArea3Register,
2020	NumAlignedDPRCS2Regs, MIFlags: MachineInstr::FrameSetup);
2021	}
2022
2023	// The code above does not insert spill code for the aligned DPRCS2 registers.
2024	// The stack realignment code will be inserted between the push instructions
2025	// and these spills.
2026	if (NumAlignedDPRCS2Regs)
2027	emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2028
2029	return true;
2030	}
2031
2032	bool ARMFrameLowering::restoreCalleeSavedRegisters(
2033	MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2034	MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo TRI) const* {
2035	if (CSI.empty())
2036	return false;
2037
2038	MachineFunction &MF = *MBB.getParent();
2039	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2040	bool isVarArg = AFI->getArgRegsSaveSize() > `0`;
2041	unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2042
2043	// The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2044	// registers. Do that here instead.
2045	if (NumAlignedDPRCS2Regs)
2046	emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2047
2048	unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
2049	unsigned LdrOpc =
2050	AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
2051	unsigned FltOpc = ARM::VLDMDIA_UPD;
2052	if (STI.splitFramePointerPush(MF)) {
2053	emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false,
2054	Func: &isSplitFPArea2Register, NumAlignedDPRCS2Regs: `0`);
2055	emitPopInst(MBB, MI, CSI, LdmOpc: FltOpc, LdrOpc: `0`, isVarArg, NoGap: true, Func: &isARMArea3Register,
2056	NumAlignedDPRCS2Regs);
2057	emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false,
2058	Func: &isSplitFPArea1Register, NumAlignedDPRCS2Regs: `0`);
2059	} else {
2060	emitPopInst(MBB, MI, CSI, LdmOpc: FltOpc, LdrOpc: `0`, isVarArg, NoGap: true, Func: &isARMArea3Register,
2061	NumAlignedDPRCS2Regs);
2062	emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false,
2063	Func: &isARMArea2Register, NumAlignedDPRCS2Regs: `0`);
2064	emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false,
2065	Func: &isARMArea1Register, NumAlignedDPRCS2Regs: `0`);
2066	}
2067
2068	return true;
2069	}
2070
2071	// FIXME: Make generic?
2072	static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF,
2073	const ARMBaseInstrInfo &TII) {
2074	unsigned FnSize = `0`;
2075	for (auto &MBB : MF) {
2076	for (auto &MI : MBB)
2077	FnSize += TII.getInstSizeInBytes(MI);
2078	}
2079	if (MF.getJumpTableInfo())
2080	for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
2081	FnSize += Table.MBBs.size() * `4`;
2082	FnSize += MF.getConstantPool()->getConstants().size() * `4`;
2083	return FnSize;
2084	}
2085
2086	/// estimateRSStackSizeLimit - Look at each instruction that references stack
2087	/// frames and return the stack size limit beyond which some of these
2088	/// instructions will require a scratch register during their expansion later.
2089	// FIXME: Move to TII?
2090	static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
2091	const TargetFrameLowering *TFI,
2092	bool &HasNonSPFrameIndex) {
2093	const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2094	const ARMBaseInstrInfo &TII =
2095	*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2096	const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2097	unsigned Limit = (`1` << `12`) - `1`;
2098	for (auto &MBB : MF) {
2099	for (auto &MI : MBB) {
2100	if (MI.isDebugInstr())
2101	continue;
2102	for (unsigned i = `0`, e = MI.getNumOperands(); i != e; ++i) {
2103	if (!MI.getOperand(i).isFI())
2104	continue;
2105
2106	// When using ADDri to get the address of a stack object, 255 is the
2107	// largest offset guaranteed to fit in the immediate offset.
2108	if (MI.getOpcode() == ARM::ADDri) {
2109	Limit = std::min(a: Limit, b: (`1U` << `8`) - `1`);
2110	break;
2111	}
2112	// t2ADDri will not require an extra register, it can reuse the
2113	// destination.
2114	if (MI.getOpcode() == ARM::t2ADDri \|\| MI.getOpcode() == ARM::t2ADDri12)
2115	break;
2116
2117	const MCInstrDesc &MCID = MI.getDesc();
2118	const TargetRegisterClass *RegClass = TII.getRegClass(MCID, OpNum: i, TRI, MF);
2119	if (RegClass && !RegClass->contains(Reg: ARM::SP))
2120	HasNonSPFrameIndex = true;
2121
2122	// Otherwise check the addressing mode.
2123	switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
2124	case ARMII::AddrMode_i12:
2125	case ARMII::AddrMode2:
2126	// Default 12 bit limit.
2127	break;
2128	case ARMII::AddrMode3:
2129	case ARMII::AddrModeT2_i8neg:
2130	Limit = std::min(a: Limit, b: (`1U` << `8`) - `1`);
2131	break;
2132	case ARMII::AddrMode5FP16:
2133	Limit = std::min(a: Limit, b: ((`1U` << `8`) - `1`) * `2`);
2134	break;
2135	case ARMII::AddrMode5:
2136	case ARMII::AddrModeT2_i8s4:
2137	case ARMII::AddrModeT2_ldrex:
2138	Limit = std::min(a: Limit, b: ((`1U` << `8`) - `1`) * `4`);
2139	break;
2140	case ARMII::AddrModeT2_i12:
2141	// i12 supports only positive offset so these will be converted to
2142	// i8 opcodes. See llvm::rewriteT2FrameIndex.
2143	if (TFI->hasFP(MF) && AFI->hasStackFrame())
2144	Limit = std::min(a: Limit, b: (`1U` << `8`) - `1`);
2145	break;
2146	case ARMII::AddrMode4:
2147	case ARMII::AddrMode6:
2148	// Addressing modes 4 & 6 (load/store) instructions can't encode an
2149	// immediate offset for stack references.
2150	return `0`;
2151	case ARMII::AddrModeT2_i7:
2152	Limit = std::min(a: Limit, b: ((`1U` << `7`) - `1`) * `1`);
2153	break;
2154	case ARMII::AddrModeT2_i7s2:
2155	Limit = std::min(a: Limit, b: ((`1U` << `7`) - `1`) * `2`);
2156	break;
2157	case ARMII::AddrModeT2_i7s4:
2158	Limit = std::min(a: Limit, b: ((`1U` << `7`) - `1`) * `4`);
2159	break;
2160	default:
2161	llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2162	}
2163	break; // At most one FI per instruction
2164	}
2165	}
2166	}
2167
2168	return Limit;
2169	}
2170
2171	// In functions that realign the stack, it can be an advantage to spill the
2172	// callee-saved vector registers after realigning the stack. The vst1 and vld1
2173	// instructions take alignment hints that can improve performance.
2174	static void
2175	checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
2176	MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(`0`);
2177	if (!SpillAlignedNEONRegs)
2178	return;
2179
2180	// Naked functions don't spill callee-saved registers.
2181	if (MF.getFunction().hasFnAttribute(Kind: Attribute::Naked))
2182	return;
2183
2184	// We are planning to use NEON instructions vst1 / vld1.
2185	if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2186	return;
2187
2188	// Don't bother if the default stack alignment is sufficiently high.
2189	if (MF.getSubtarget().getFrameLowering()->getStackAlign() >= Align (`8`))
2190	return;
2191
2192	// Aligned spills require stack realignment.
2193	if (!static_cast<const ARMBaseRegisterInfo *>(
2194	MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
2195	return;
2196
2197	// We always spill contiguous d-registers starting from d8. Count how many
2198	// needs spilling. The register allocator will almost always use the
2199	// callee-saved registers in order, but it can happen that there are holes in
2200	// the range. Registers above the hole will be spilled to the standard DPRCS
2201	// area.
2202	unsigned NumSpills = `0`;
2203	for (; NumSpills < `8`; ++NumSpills)
2204	if (!SavedRegs.test(Idx: ARM::D8 + NumSpills))
2205	break;
2206
2207	// Don't do this for just one d-register. It's not worth it.
2208	if (NumSpills < `2`)
2209	return;
2210
2211	// Spill the first NumSpills D-registers after realigning the stack.
2212	MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2213
2214	// A scratch register is required for the vst1 / vld1 instructions.
2215	SavedRegs.set(ARM::R4);
2216	}
2217
2218	bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2219	// For CMSE entry functions, we want to save the FPCXT_NS immediately
2220	// upon function entry (resp. restore it immmediately before return)
2221	if (STI.hasV8_1MMainlineOps() &&
2222	MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction())
2223	return false;
2224
2225	// We are disabling shrinkwrapping for now when PAC is enabled, as
2226	// shrinkwrapping can cause clobbering of r12 when the PAC code is
2227	// generated. A follow-up patch will fix this in a more performant manner.
2228	if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(
2229	SpillsLR: true / SpillsLR /))
2230	return false;
2231
2232	return true;
2233	}
2234
2235	bool ARMFrameLowering::requiresAAPCSFrameRecord(
2236	const MachineFunction &MF) const {
2237	const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
2238	return Subtarget.createAAPCSFrameChain() && hasFP(MF);
2239	}
2240
2241	// Thumb1 may require a spill when storing to a frame index through FP (or any
2242	// access with execute-only), for cases where FP is a high register (R11). This
2243	// scans the function for cases where this may happen.
2244	static bool canSpillOnFrameIndexAccess(const MachineFunction &MF,
2245	const TargetFrameLowering &TFI) {
2246	const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2247	if (!AFI->isThumb1OnlyFunction())
2248	return false;
2249
2250	const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
2251	for (const auto &MBB : MF)
2252	for (const auto &MI : MBB)
2253	if (MI.getOpcode() == ARM::tSTRspi \|\| MI.getOpcode() == ARM::tSTRi \|\|
2254	STI.genExecuteOnly())
2255	for (const auto &Op : MI.operands())
2256	if (Op.isFI()) {
2257	Register Reg;
2258	TFI.getFrameIndexReference(MF, FI: Op.getIndex(), FrameReg&: Reg);
2259	if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
2260	return true;
2261	}
2262	return false;
2263	}
2264
2265	void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
2266	BitVector &SavedRegs,
2267	RegScavenger RS) const* {
2268	TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2269	// This tells PEI to spill the FP as if it is any other callee-save register
2270	// to take advantage the eliminateFrameIndex machinery. This also ensures it
2271	// is spilled in the order specified by getCalleeSavedRegs() to make it easier
2272	// to combine multiple loads / stores.
2273	bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF));
2274	bool CS1Spilled = false;
2275	bool LRSpilled = false;
2276	unsigned NumGPRSpills = `0`;
2277	unsigned NumFPRSpills = `0`;
2278	SmallVector<unsigned, `4`> UnspilledCS1GPRs;
2279	SmallVector<unsigned, `4`> UnspilledCS2GPRs;
2280	const ARMBaseRegisterInfo RegInfo = static_cast<const* ARMBaseRegisterInfo *>(
2281	MF.getSubtarget().getRegisterInfo());
2282	const ARMBaseInstrInfo &TII =
2283	*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2284	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2285	MachineFrameInfo &MFI = MF.getFrameInfo();
2286	MachineRegisterInfo &MRI = MF.getRegInfo();
2287	const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2288	(void)TRI; // Silence unused warning in non-assert builds.
2289	Register FramePtr = RegInfo->getFrameRegister(MF);
2290
2291	// Spill R4 if Thumb2 function requires stack realignment - it will be used as
2292	// scratch register. Also spill R4 if Thumb2 function has varsized objects,
2293	// since it's not always possible to restore sp from fp in a single
2294	// instruction.
2295	// FIXME: It will be better just to find spare register here.
2296	if (AFI->isThumb2Function() &&
2297	(MFI.hasVarSizedObjects() \|\| RegInfo->hasStackRealignment(MF)))
2298	SavedRegs.set(ARM::R4);
2299
2300	// If a stack probe will be emitted, spill R4 and LR, since they are
2301	// clobbered by the stack probe call.
2302	// This estimate should be a safe, conservative estimate. The actual
2303	// stack probe is enabled based on the size of the local objects;
2304	// this estimate also includes the varargs store size.
2305	if (STI.isTargetWindows() &&
2306	WindowsRequiresStackProbe(MF, StackSizeInBytes: MFI.estimateStackSize(MF))) {
2307	SavedRegs.set(ARM::R4);
2308	SavedRegs.set(ARM::LR);
2309	}
2310
2311	if (AFI->isThumb1OnlyFunction()) {
2312	// Spill LR if Thumb1 function uses variable length argument lists.
2313	if (AFI->getArgRegsSaveSize() > `0`)
2314	SavedRegs.set(ARM::LR);
2315
2316	// Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2317	// requires stack alignment. We don't know for sure what the stack size
2318	// will be, but for this, an estimate is good enough. If there anything
2319	// changes it, it'll be a spill, which implies we've used all the registers
2320	// and so R4 is already used, so not marking it here will be OK.
2321	// FIXME: It will be better just to find spare register here.
2322	if (MFI.hasVarSizedObjects() \|\| RegInfo->hasStackRealignment(MF) \|\|
2323	MFI.estimateStackSize(MF) > `508`)
2324	SavedRegs.set(ARM::R4);
2325	}
2326
2327	// See if we can spill vector registers to aligned stack.
2328	checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2329
2330	// Spill the BasePtr if it's used.
2331	if (RegInfo->hasBasePointer(MF))
2332	SavedRegs.set(RegInfo->getBaseRegister());
2333
2334	// On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2335	if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2336	CanEliminateFrame = false;
2337
2338	// When return address signing is enabled R12 is treated as callee-saved.
2339	if (AFI->shouldSignReturnAddress())
2340	CanEliminateFrame = false;
2341
2342	// Don't spill FP if the frame can be eliminated. This is determined
2343	// by scanning the callee-save registers to see if any is modified.
2344	const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF: &MF);
2345	for (unsigned i = `0`; CSRegs[i]; ++i) {
2346	unsigned Reg = CSRegs[i];
2347	bool Spilled = false;
2348	if (SavedRegs.test(Idx: Reg)) {
2349	Spilled = true;
2350	CanEliminateFrame = false;
2351	}
2352
2353	if (!ARM::GPRRegClass.contains(Reg)) {
2354	if (Spilled) {
2355	if (ARM::SPRRegClass.contains(Reg))
2356	NumFPRSpills++;
2357	else if (ARM::DPRRegClass.contains(Reg))
2358	NumFPRSpills += `2`;
2359	else if (ARM::QPRRegClass.contains(Reg))
2360	NumFPRSpills += `4`;
2361	}
2362	continue;
2363	}
2364
2365	if (Spilled) {
2366	NumGPRSpills++;
2367
2368	if (!STI.splitFramePushPop(MF)) {
2369	if (Reg == ARM::LR)
2370	LRSpilled = true;
2371	CS1Spilled = true;
2372	continue;
2373	}
2374
2375	// Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2376	switch (Reg) {
2377	case ARM::LR:
2378	LRSpilled = true;
2379	[[fallthrough]];
2380	case ARM::R0: case ARM::R1:
2381	case ARM::R2: case ARM::R3:
2382	case ARM::R4: case ARM::R5:
2383	case ARM::R6: case ARM::R7:
2384	CS1Spilled = true;
2385	break;
2386	default:
2387	break;
2388	}
2389	} else {
2390	if (!STI.splitFramePushPop(MF)) {
2391	UnspilledCS1GPRs.push_back(Elt: Reg);
2392	continue;
2393	}
2394
2395	switch (Reg) {
2396	case ARM::R0: case ARM::R1:
2397	case ARM::R2: case ARM::R3:
2398	case ARM::R4: case ARM::R5:
2399	case ARM::R6: case ARM::R7:
2400	case ARM::LR:
2401	UnspilledCS1GPRs.push_back(Elt: Reg);
2402	break;
2403	default:
2404	UnspilledCS2GPRs.push_back(Elt: Reg);
2405	break;
2406	}
2407	}
2408	}
2409
2410	bool ForceLRSpill = false;
2411	if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2412	unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2413	// Force LR to be spilled if the Thumb function size is > 2048. This enables
2414	// use of BL to implement far jump.
2415	if (FnSize >= (`1` << `11`)) {
2416	CanEliminateFrame = false;
2417	ForceLRSpill = true;
2418	}
2419	}
2420
2421	// If any of the stack slot references may be out of range of an immediate
2422	// offset, make sure a register (or a spill slot) is available for the
2423	// register scavenger. Note that if we're indexing off the frame pointer, the
2424	// effective stack size is 4 bytes larger since the FP points to the stack
2425	// slot of the previous FP. Also, if we have variable sized objects in the
2426	// function, stack slot references will often be negative, and some of
2427	// our instructions are positive-offset only, so conservatively consider
2428	// that case to want a spill slot (or register) as well. Similarly, if
2429	// the function adjusts the stack pointer during execution and the
2430	// adjustments aren't already part of our stack size estimate, our offset
2431	// calculations may be off, so be conservative.
2432	// FIXME: We could add logic to be more precise about negative offsets
2433	// and which instructions will need a scratch register for them. Is it
2434	// worth the effort and added fragility?
2435	unsigned EstimatedStackSize =
2436	MFI.estimateStackSize(MF) + `4` * (NumGPRSpills + NumFPRSpills);
2437
2438	// Determine biggest (positive) SP offset in MachineFrameInfo.
2439	int MaxFixedOffset = `0`;
2440	for (int I = MFI.getObjectIndexBegin(); I < `0`; ++I) {
2441	int MaxObjectOffset = MFI.getObjectOffset(ObjectIdx: I) + MFI.getObjectSize(ObjectIdx: I);
2442	MaxFixedOffset = std::max(a: MaxFixedOffset, b: MaxObjectOffset);
2443	}
2444
2445	bool HasFP = hasFP(MF);
2446	if (HasFP) {
2447	if (AFI->hasStackFrame())
2448	EstimatedStackSize += `4`;
2449	} else {
2450	// If FP is not used, SP will be used to access arguments, so count the
2451	// size of arguments into the estimation.
2452	EstimatedStackSize += MaxFixedOffset;
2453	}
2454	EstimatedStackSize += `16`; // For possible paddings.
2455
2456	unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2457	bool HasNonSPFrameIndex = false;
2458	if (AFI->isThumb1OnlyFunction()) {
2459	// For Thumb1, don't bother to iterate over the function. The only
2460	// instruction that requires an emergency spill slot is a store to a
2461	// frame index.
2462	//
2463	// tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2464	// immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2465	// a 5-bit unsigned immediate.
2466	//
2467	// We could try to check if the function actually contains a tSTRspi
2468	// that might need the spill slot, but it's not really important.
2469	// Functions with VLAs or extremely large call frames are rare, and
2470	// if a function is allocating more than 1KB of stack, an extra 4-byte
2471	// slot probably isn't relevant.
2472	//
2473	// A special case is the scenario where r11 is used as FP, where accesses
2474	// to a frame index will require its value to be moved into a low reg.
2475	// This is handled later on, once we are able to determine if we have any
2476	// fp-relative accesses.
2477	if (RegInfo->hasBasePointer(MF))
2478	EstimatedRSStackSizeLimit = (`1U` << `5`) * `4`;
2479	else
2480	EstimatedRSStackSizeLimit = (`1U` << `8`) * `4`;
2481	EstimatedRSFixedSizeLimit = (`1U` << `5`) * `4`;
2482	} else {
2483	EstimatedRSStackSizeLimit =
2484	estimateRSStackSizeLimit(MF, TFI: this, HasNonSPFrameIndex);
2485	EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2486	}
2487	// Final estimate of whether sp or bp-relative accesses might require
2488	// scavenging.
2489	bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2490
2491	// If the stack pointer moves and we don't have a base pointer, the
2492	// estimate logic doesn't work. The actual offsets might be larger when
2493	// we're constructing a call frame, or we might need to use negative
2494	// offsets from fp.
2495	bool HasMovingSP = MFI.hasVarSizedObjects() \|\|
2496	(MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2497	bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) \|\| !HasMovingSP;
2498
2499	// If we have a frame pointer, we assume arguments will be accessed
2500	// relative to the frame pointer. Check whether fp-relative accesses to
2501	// arguments require scavenging.
2502	//
2503	// We could do slightly better on Thumb1; in some cases, an sp-relative
2504	// offset would be legal even though an fp-relative offset is not.
2505	int MaxFPOffset = getMaxFPOffset(STI, AFI: *AFI, MF);
2506	bool HasLargeArgumentList =
2507	HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2508
2509	bool BigFrameOffsets = HasLargeStack \|\| !HasBPOrFixedSP \|\|
2510	HasLargeArgumentList \|\| HasNonSPFrameIndex;
2511	LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2512	<< "; EstimatedStack: " << EstimatedStackSize
2513	<< "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2514	<< "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2515	if (BigFrameOffsets \|\|
2516	!CanEliminateFrame \|\| RegInfo->cannotEliminateFrame(MF)) {
2517	AFI->setHasStackFrame(true);
2518
2519	if (HasFP) {
2520	SavedRegs.set(FramePtr);
2521	// If the frame pointer is required by the ABI, also spill LR so that we
2522	// emit a complete frame record.
2523	if ((requiresAAPCSFrameRecord(MF) \|\|
2524	MF.getTarget().Options.DisableFramePointerElim(MF)) &&
2525	!LRSpilled) {
2526	SavedRegs.set(ARM::LR);
2527	LRSpilled = true;
2528	NumGPRSpills++;
2529	auto LRPos = llvm::find(Range&: UnspilledCS1GPRs, Val: ARM::LR);
2530	if (LRPos != UnspilledCS1GPRs.end())
2531	UnspilledCS1GPRs.erase(CI: LRPos);
2532	}
2533	auto FPPos = llvm::find(Range&: UnspilledCS1GPRs, Val: FramePtr);
2534	if (FPPos != UnspilledCS1GPRs.end())
2535	UnspilledCS1GPRs.erase(CI: FPPos);
2536	NumGPRSpills++;
2537	if (FramePtr == ARM::R7)
2538	CS1Spilled = true;
2539	}
2540
2541	// This is the number of extra spills inserted for callee-save GPRs which
2542	// would not otherwise be used by the function. When greater than zero it
2543	// guaranteees that it is possible to scavenge a register to hold the
2544	// address of a stack slot. On Thumb1, the register must be a valid operand
2545	// to tSTRi, i.e. r4-r7. For other subtargets, this is any GPR, i.e. r4-r11
2546	// or lr.
2547	//
2548	// If we don't insert a spill, we instead allocate an emergency spill
2549	// slot, which can be used by scavenging to spill an arbitrary register.
2550	//
2551	// We currently don't try to figure out whether any specific instruction
2552	// requires scavening an additional register.
2553	unsigned NumExtraCSSpill = `0`;
2554
2555	if (AFI->isThumb1OnlyFunction()) {
2556	// For Thumb1-only targets, we need some low registers when we save and
2557	// restore the high registers (which aren't allocatable, but could be
2558	// used by inline assembly) because the push/pop instructions can not
2559	// access high registers. If necessary, we might need to push more low
2560	// registers to ensure that there is at least one free that can be used
2561	// for the saving & restoring, and preferably we should ensure that as
2562	// many as are needed are available so that fewer push/pop instructions
2563	// are required.
2564
2565	// Low registers which are not currently pushed, but could be (r4-r7).
2566	SmallVector<unsigned, `4`> AvailableRegs;
2567
2568	// Unused argument registers (r0-r3) can be clobbered in the prologue for
2569	// free.
2570	int EntryRegDeficit = `0`;
2571	for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2572	if (!MF.getRegInfo().isLiveIn(Reg)) {
2573	--EntryRegDeficit;
2574	LLVM_DEBUG(dbgs()
2575	<< printReg(Reg, TRI)
2576	<< " is unused argument register, EntryRegDeficit = "
2577	<< EntryRegDeficit << "\n");
2578	}
2579	}
2580
2581	// Unused return registers can be clobbered in the epilogue for free.
2582	int ExitRegDeficit = AFI->getReturnRegsCount() - `4`;
2583	LLVM_DEBUG(dbgs() << AFI->getReturnRegsCount()
2584	<< " return regs used, ExitRegDeficit = "
2585	<< ExitRegDeficit << "\n");
2586
2587	int RegDeficit = std::max(a: EntryRegDeficit, b: ExitRegDeficit);
2588	LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2589
2590	// r4-r6 can be used in the prologue if they are pushed by the first push
2591	// instruction.
2592	for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2593	if (SavedRegs.test(Idx: Reg)) {
2594	--RegDeficit;
2595	LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2596	<< " is saved low register, RegDeficit = "
2597	<< RegDeficit << "\n");
2598	} else {
2599	AvailableRegs.push_back(Elt: Reg);
2600	LLVM_DEBUG(
2601	dbgs()
2602	<< printReg(Reg, TRI)
2603	<< " is non-saved low register, adding to AvailableRegs\n");
2604	}
2605	}
2606
2607	// r7 can be used if it is not being used as the frame pointer.
2608	if (!HasFP \|\| FramePtr != ARM::R7) {
2609	if (SavedRegs.test(Idx: ARM::R7)) {
2610	--RegDeficit;
2611	LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2612	<< RegDeficit << "\n");
2613	} else {
2614	AvailableRegs.push_back(Elt: ARM::R7);
2615	LLVM_DEBUG(
2616	dbgs()
2617	<< "%r7 is non-saved low register, adding to AvailableRegs\n");
2618	}
2619	}
2620
2621	// Each of r8-r11 needs to be copied to a low register, then pushed.
2622	for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2623	if (SavedRegs.test(Idx: Reg)) {
2624	++RegDeficit;
2625	LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2626	<< " is saved high register, RegDeficit = "
2627	<< RegDeficit << "\n");
2628	}
2629	}
2630
2631	// LR can only be used by PUSH, not POP, and can't be used at all if the
2632	// llvm.returnaddress intrinsic is used. This is only worth doing if we
2633	// are more limited at function entry than exit.
2634	if ((EntryRegDeficit > ExitRegDeficit) &&
2635	!(MF.getRegInfo().isLiveIn(Reg: ARM::LR) &&
2636	MF.getFrameInfo().isReturnAddressTaken())) {
2637	if (SavedRegs.test(Idx: ARM::LR)) {
2638	--RegDeficit;
2639	LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2640	<< RegDeficit << "\n");
2641	} else {
2642	AvailableRegs.push_back(Elt: ARM::LR);
2643	LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2644	}
2645	}
2646
2647	// If there are more high registers that need pushing than low registers
2648	// available, push some more low registers so that we can use fewer push
2649	// instructions. This might not reduce RegDeficit all the way to zero,
2650	// because we can only guarantee that r4-r6 are available, but r8-r11 may
2651	// need saving.
2652	LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2653	for (; RegDeficit > `0` && !AvailableRegs.empty(); --RegDeficit) {
2654	unsigned Reg = AvailableRegs.pop_back_val();
2655	LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2656	<< " to make up reg deficit\n");
2657	SavedRegs.set(Reg);
2658	NumGPRSpills++;
2659	CS1Spilled = true;
2660	assert(!MRI.isReserved(Reg) && "Should not be reserved");
2661	if (Reg != ARM::LR && !MRI.isPhysRegUsed(PhysReg: Reg))
2662	NumExtraCSSpill++;
2663	UnspilledCS1GPRs.erase(CI: llvm::find(Range&: UnspilledCS1GPRs, Val: Reg));
2664	if (Reg == ARM::LR)
2665	LRSpilled = true;
2666	}
2667	LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2668	<< "\n");
2669	}
2670
2671	// Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2672	// restore LR in that case.
2673	bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2674
2675	// If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2676	// Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2677	if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2678	SavedRegs.set(ARM::LR);
2679	NumGPRSpills++;
2680	SmallVectorImpl<unsigned>::iterator LRPos;
2681	LRPos = llvm::find(Range&: UnspilledCS1GPRs, Val: (unsigned)ARM::LR);
2682	if (LRPos != UnspilledCS1GPRs.end())
2683	UnspilledCS1GPRs.erase(CI: LRPos);
2684
2685	ForceLRSpill = false;
2686	if (!MRI.isReserved(PhysReg: ARM::LR) && !MRI.isPhysRegUsed(PhysReg: ARM::LR) &&
2687	!AFI->isThumb1OnlyFunction())
2688	NumExtraCSSpill++;
2689	}
2690
2691	// If stack and double are 8-byte aligned and we are spilling an odd number
2692	// of GPRs, spill one extra callee save GPR so we won't have to pad between
2693	// the integer and double callee save areas.
2694	LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2695	const Align TargetAlign = getStackAlign();
2696	if (TargetAlign >= Align (`8`) && (NumGPRSpills & `1`)) {
2697	if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2698	for (unsigned Reg : UnspilledCS1GPRs) {
2699	// Don't spill high register if the function is thumb. In the case of
2700	// Windows on ARM, accept R11 (frame pointer)
2701	if (!AFI->isThumbFunction() \|\|
2702	(STI.isTargetWindows() && Reg == ARM::R11) \|\|
2703	isARMLowRegister(Reg) \|\|
2704	(Reg == ARM::LR && !ExpensiveLRRestore)) {
2705	SavedRegs.set(Reg);
2706	LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2707	<< " to make up alignment\n");
2708	if (!MRI.isReserved(PhysReg: Reg) && !MRI.isPhysRegUsed(PhysReg: Reg) &&
2709	!(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2710	NumExtraCSSpill++;
2711	break;
2712	}
2713	}
2714	} else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
2715	unsigned Reg = UnspilledCS2GPRs.front();
2716	SavedRegs.set(Reg);
2717	LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2718	<< " to make up alignment\n");
2719	if (!MRI.isReserved(PhysReg: Reg) && !MRI.isPhysRegUsed(PhysReg: Reg))
2720	NumExtraCSSpill++;
2721	}
2722	}
2723
2724	// Estimate if we might need to scavenge registers at some point in order
2725	// to materialize a stack offset. If so, either spill one additional
2726	// callee-saved register or reserve a special spill slot to facilitate
2727	// register scavenging. Thumb1 needs a spill slot for stack pointer
2728	// adjustments and for frame index accesses when FP is high register,
2729	// even when the frame itself is small.
2730	unsigned RegsNeeded = `0`;
2731	if (BigFrameOffsets \|\| canSpillOnFrameIndexAccess(MF, TFI: *this)) {
2732	RegsNeeded++;
2733	// With thumb1 execute-only we may need an additional register for saving
2734	// and restoring the CPSR.
2735	if (AFI->isThumb1OnlyFunction() && STI.genExecuteOnly() && !STI.useMovt())
2736	RegsNeeded++;
2737	}
2738
2739	if (RegsNeeded > NumExtraCSSpill) {
2740	// If any non-reserved CS register isn't spilled, just spill one or two
2741	// extra. That should take care of it!
2742	unsigned NumExtras = TargetAlign.value() / `4`;
2743	SmallVector<unsigned, `2`> Extras;
2744	while (NumExtras && !UnspilledCS1GPRs.empty()) {
2745	unsigned Reg = UnspilledCS1GPRs.pop_back_val();
2746	if (!MRI.isReserved(PhysReg: Reg) &&
2747	(!AFI->isThumb1OnlyFunction() \|\| isARMLowRegister(Reg))) {
2748	Extras.push_back(Elt: Reg);
2749	NumExtras--;
2750	}
2751	}
2752	// For non-Thumb1 functions, also check for hi-reg CS registers
2753	if (!AFI->isThumb1OnlyFunction()) {
2754	while (NumExtras && !UnspilledCS2GPRs.empty()) {
2755	unsigned Reg = UnspilledCS2GPRs.pop_back_val();
2756	if (!MRI.isReserved(PhysReg: Reg)) {
2757	Extras.push_back(Elt: Reg);
2758	NumExtras--;
2759	}
2760	}
2761	}
2762	if (NumExtras == `0`) {
2763	for (unsigned Reg : Extras) {
2764	SavedRegs.set(Reg);
2765	if (!MRI.isPhysRegUsed(PhysReg: Reg))
2766	NumExtraCSSpill++;
2767	}
2768	}
2769	while ((RegsNeeded > NumExtraCSSpill) && RS) {
2770	// Reserve a slot closest to SP or frame pointer.
2771	LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
2772	const TargetRegisterClass &RC = ARM::GPRRegClass;
2773	unsigned Size = TRI->getSpillSize(RC);
2774	Align Alignment = TRI->getSpillAlign(RC);
2775	RS->addScavengingFrameIndex(
2776	FI: MFI.CreateStackObject(Size, Alignment, isSpillSlot: false));
2777	--RegsNeeded;
2778	}
2779	}
2780	}
2781
2782	if (ForceLRSpill)
2783	SavedRegs.set(ARM::LR);
2784	AFI->setLRIsSpilled(SavedRegs.test(Idx: ARM::LR));
2785	}
2786
2787	void ARMFrameLowering::updateLRRestored(MachineFunction &MF) {
2788	MachineFrameInfo &MFI = MF.getFrameInfo();
2789	if (!MFI.isCalleeSavedInfoValid())
2790	return;
2791
2792	// Check if all terminators do not implicitly use LR. Then we can 'restore' LR
2793	// into PC so it is not live out of the return block: Clear the Restored bit
2794	// in that case.
2795	for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
2796	if (Info.getReg() != ARM::LR)
2797	continue;
2798	if (all_of(Range&: MF, P: [](const MachineBasicBlock &MBB) {
2799	return all_of(Range: MBB.terminators(), P: [](const MachineInstr &Term) {
2800	return !Term.isReturn() \|\| Term.getOpcode() == ARM::LDMIA_RET \|\|
2801	Term.getOpcode() == ARM::t2LDMIA_RET \|\|
2802	Term.getOpcode() == ARM::tPOP_RET;
2803	});
2804	})) {
2805	Info.setRestored(false);
2806	break;
2807	}
2808	}
2809	}
2810
2811	void ARMFrameLowering::processFunctionBeforeFrameFinalized(
2812	MachineFunction &MF, RegScavenger RS) const* {
2813	TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS);
2814	updateLRRestored(MF);
2815	}
2816
2817	void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF,
2818	BitVector &SavedRegs) const {
2819	TargetFrameLowering::getCalleeSaves(MF, SavedRegs);
2820
2821	// If we have the "returned" parameter attribute which guarantees that we
2822	// return the value which was passed in r0 unmodified (e.g. C++ 'structors),
2823	// record that fact for IPRA.
2824	const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2825	if (AFI->getPreservesR0())
2826	SavedRegs.set(ARM::R0);
2827	}
2828
2829	bool ARMFrameLowering::assignCalleeSavedSpillSlots(
2830	MachineFunction &MF, const TargetRegisterInfo *TRI,
2831	std::vector<CalleeSavedInfo> &CSI) const {
2832	// For CMSE entry functions, handle floating-point context as if it was a
2833	// callee-saved register.
2834	if (STI.hasV8_1MMainlineOps() &&
2835	MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction()) {
2836	CSI.emplace_back(args: ARM::FPCXTNS);
2837	CSI.back().setRestored(false);
2838	}
2839
2840	// For functions, which sign their return address, upon function entry, the
2841	// return address PAC is computed in R12. Treat R12 as a callee-saved register
2842	// in this case.
2843	const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
2844	if (AFI.shouldSignReturnAddress()) {
2845	// The order of register must match the order we push them, because the
2846	// PEI assigns frame indices in that order. When compiling for return
2847	// address sign and authenication, we use split push, therefore the orders
2848	// we want are:
2849	// LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2850	CSI.insert(position: find_if(Range&: CSI,
2851	P: [=](const auto &CS) {
2852	Register Reg = CS.getReg();
2853	return Reg == ARM::R10 \|\| Reg == ARM::R11 \|\|
2854	Reg == ARM::R8 \|\| Reg == ARM::R9 \|\|
2855	ARM::DPRRegClass.contains(Reg);
2856	}),
2857	x: CalleeSavedInfo (ARM::R12));
2858	}
2859
2860	return false;
2861	}
2862
2863	const TargetFrameLowering::SpillSlot *
2864	ARMFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
2865	static const SpillSlot FixedSpillOffsets[] = {{.Reg: ARM::FPCXTNS, .Offset: -`4`}};
2866	NumEntries = std::size(FixedSpillOffsets);
2867	return FixedSpillOffsets;
2868	}
2869
2870	MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
2871	MachineFunction &MF, MachineBasicBlock &MBB,
2872	MachineBasicBlock::iterator I) const {
2873	const ARMBaseInstrInfo &TII =
2874	*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2875	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2876	bool isARM = !AFI->isThumbFunction();
2877	DebugLoc dl = I ->getDebugLoc();
2878	unsigned Opc = I ->getOpcode();
2879	bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
2880	unsigned CalleePopAmount = IsDestroy ? I ->getOperand(i: `1`).getImm() : `0`;
2881
2882	assert(!AFI->isThumb1OnlyFunction() &&
2883	"This eliminateCallFramePseudoInstr does not support Thumb1!");
2884
2885	int PIdx = I ->findFirstPredOperandIdx();
2886	ARMCC::CondCodes Pred = (PIdx == -`1`)
2887	? ARMCC::AL
2888	: (ARMCC::CondCodes)I ->getOperand(i: PIdx).getImm();
2889	unsigned PredReg = TII.getFramePred(MI: *I);
2890
2891	if (!hasReservedCallFrame(MF)) {
2892	// Bail early if the callee is expected to do the adjustment.
2893	if (IsDestroy && CalleePopAmount != -`1U`)
2894	return MBB.erase(I);
2895
2896	// If we have alloca, convert as follows:
2897	// ADJCALLSTACKDOWN -> sub, sp, sp, amount
2898	// ADJCALLSTACKUP -> add, sp, sp, amount
2899	unsigned Amount = TII.getFrameSize(I: *I);
2900	if (Amount != `0`) {
2901	// We need to keep the stack aligned properly. To do this, we round the
2902	// amount of space needed for the outgoing arguments up to the next
2903	// alignment boundary.
2904	Amount = alignSPAdjust(SPAdj: Amount);
2905
2906	if (Opc == ARM::ADJCALLSTACKDOWN \|\| Opc == ARM::tADJCALLSTACKDOWN) {
2907	emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: -Amount, MIFlags: MachineInstr::NoFlags,
2908	Pred, PredReg);
2909	} else {
2910	assert(Opc == ARM::ADJCALLSTACKUP \|\| Opc == ARM::tADJCALLSTACKUP);
2911	emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: Amount, MIFlags: MachineInstr::NoFlags,
2912	Pred, PredReg);
2913	}
2914	}
2915	} else if (CalleePopAmount != -`1U`) {
2916	// If the calling convention demands that the callee pops arguments from the
2917	// stack, we want to add it back if we have a reserved call frame.
2918	emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: -CalleePopAmount,
2919	MIFlags: MachineInstr::NoFlags, Pred, PredReg);
2920	}
2921	return MBB.erase(I);
2922	}
2923
2924	/// Get the minimum constant for ARM that is greater than or equal to the
2925	/// argument. In ARM, constants can have any value that can be produced by
2926	/// rotating an 8-bit value to the right by an even number of bits within a
2927	/// 32-bit word.
2928	static uint32_t alignToARMConstant(uint32_t Value) {
2929	unsigned Shifted = `0`;
2930
2931	if (Value == `0`)
2932	return `0`;
2933
2934	while (!(Value & `0xC0000000`)) {
2935	Value = Value << `2`;
2936	Shifted += `2`;
2937	}
2938
2939	bool Carry = (Value & `0x00FFFFFF`);
2940	Value = ((Value & `0xFF000000`) >> `24`) + Carry;
2941
2942	if (Value & `0x0000100`)
2943	Value = Value & `0x000001FC`;
2944
2945	if (Shifted > `24`)
2946	Value = Value >> (Shifted - `24`);
2947	else
2948	Value = Value << (`24` - Shifted);
2949
2950	return Value;
2951	}
2952
2953	// The stack limit in the TCB is set to this many bytes above the actual
2954	// stack limit.
2955	static const uint64_t kSplitStackAvailable = `256`;
2956
2957	// Adjust the function prologue to enable split stacks. This currently only
2958	// supports android and linux.
2959	//
2960	// The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2961	// must be well defined in order to allow for consistent implementations of the
2962	// __morestack helper function. The ABI is also not a normal ABI in that it
2963	// doesn't follow the normal calling conventions because this allows the
2964	// prologue of each function to be optimized further.
2965	//
2966	// Currently, the ABI looks like (when calling __morestack)
2967	//
2968	// r4 holds the minimum stack size requested for this function call*
2969	// r5 holds the stack size of the arguments to the function*
2970	// the beginning of the function is 3 instructions after the call to*
2971	// __morestack
2972	//
2973	// Implementations of __morestack should use r4 to allocate a new stack, r5 to
2974	// place the arguments on to the new stack, and the 3-instruction knowledge to
2975	// jump directly to the body of the function when working on the new stack.
2976	//
2977	// An old (and possibly no longer compatible) implementation of __morestack for
2978	// ARM can be found at [1].
2979	//
2980	// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
2981	void ARMFrameLowering::adjustForSegmentedStacks(
2982	MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2983	unsigned Opcode;
2984	unsigned CFIIndex;
2985	const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2986	bool Thumb = ST->isThumb();
2987	bool Thumb2 = ST->isThumb2();
2988
2989	// Sadly, this currently doesn't support varargs, platforms other than
2990	// android/linux. Note that thumb1/thumb2 are support for android/linux.
2991	if (MF.getFunction().isVarArg())
2992	report_fatal_error(reason: "Segmented stacks do not support vararg functions.");
2993	if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2994	report_fatal_error(reason: "Segmented stacks not supported on this platform.");
2995
2996	MachineFrameInfo &MFI = MF.getFrameInfo();
2997	MCContext &Context = MF.getContext();
2998	const MCRegisterInfo *MRI = Context.getRegisterInfo();
2999	const ARMBaseInstrInfo &TII =
3000	*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3001	ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
3002	DebugLoc DL;
3003
3004	if (!MFI.needsSplitStackProlog())
3005	return;
3006
3007	uint64_t StackSize = MFI.getStackSize();
3008
3009	// Use R4 and R5 as scratch registers.
3010	// We save R4 and R5 before use and restore them before leaving the function.
3011	unsigned ScratchReg0 = ARM::R4;
3012	unsigned ScratchReg1 = ARM::R5;
3013	unsigned MovOp = ST->useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
3014	uint64_t AlignedStackSize;
3015
3016	MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
3017	MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
3018	MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
3019	MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
3020	MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
3021
3022	// Grab everything that reaches PrologueMBB to update there liveness as well.
3023	SmallPtrSet<MachineBasicBlock *, `8`> BeforePrologueRegion;
3024	SmallVector<MachineBasicBlock *, `2`> WalkList;
3025	WalkList.push_back(Elt: &PrologueMBB);
3026
3027	do {
3028	MachineBasicBlock *CurMBB = WalkList.pop_back_val();
3029	for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
3030	if (BeforePrologueRegion.insert(Ptr: PredBB).second)
3031	WalkList.push_back(Elt: PredBB);
3032	}
3033	} while (!WalkList.empty());
3034
3035	// The order in that list is important.
3036	// The blocks will all be inserted before PrologueMBB using that order.
3037	// Therefore the block that should appear first in the CFG should appear
3038	// first in the list.
3039	MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
3040	PostStackMBB};
3041
3042	for (MachineBasicBlock *B : AddedBlocks)
3043	BeforePrologueRegion.insert(Ptr: B);
3044
3045	for (const auto &LI : PrologueMBB.liveins()) {
3046	for (MachineBasicBlock *PredBB : BeforePrologueRegion)
3047	PredBB->addLiveIn(RegMaskPair: LI);
3048	}
3049
3050	// Remove the newly added blocks from the list, since we know
3051	// we do not have to do the following updates for them.
3052	for (MachineBasicBlock *B : AddedBlocks) {
3053	BeforePrologueRegion.erase(Ptr: B);
3054	MF.insert(MBBI: PrologueMBB.getIterator(), MBB: B);
3055	}
3056
3057	for (MachineBasicBlock *MBB : BeforePrologueRegion) {
3058	// Make sure the LiveIns are still sorted and unique.
3059	MBB->sortUniqueLiveIns();
3060	// Replace the edges to PrologueMBB by edges to the sequences
3061	// we are about to add, but only update for immediate predecessors.
3062	if (MBB->isSuccessor(MBB: &PrologueMBB))
3063	MBB->ReplaceUsesOfBlockWith(Old: &PrologueMBB, New: AddedBlocks[`0`]);
3064	}
3065
3066	// The required stack size that is aligned to ARM constant criterion.
3067	AlignedStackSize = alignToARMConstant(Value: StackSize);
3068
3069	// When the frame size is less than 256 we just compare the stack
3070	// boundary directly to the value of the stack pointer, per gcc.
3071	bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
3072
3073	// We will use two of the callee save registers as scratch registers so we
3074	// need to save those registers onto the stack.
3075	// We will use SR0 to hold stack limit and SR1 to hold the stack size
3076	// requested and arguments for __morestack().
3077	// SR0: Scratch Register #0
3078	// SR1: Scratch Register #1
3079	// push {SR0, SR1}
3080	if (Thumb) {
3081	BuildMI(BB: PrevStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPUSH))
3082	.add(MOs: predOps(Pred: ARMCC::AL))
3083	.addReg(RegNo: ScratchReg0)
3084	.addReg(RegNo: ScratchReg1);
3085	} else {
3086	BuildMI(BB: PrevStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::STMDB_UPD))
3087	.addReg(RegNo: ARM::SP, flags: RegState::Define)
3088	.addReg(RegNo: ARM::SP)
3089	.add(MOs: predOps(Pred: ARMCC::AL))
3090	.addReg(RegNo: ScratchReg0)
3091	.addReg(RegNo: ScratchReg1);
3092	}
3093
3094	// Emit the relevant DWARF information about the change in stack pointer as
3095	// well as where to find both r4 and r5 (the callee-save registers)
3096	if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3097	CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: `8`));
3098	BuildMI(BB: PrevStackMBB, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
3099	.addCFIIndex(CFIIndex);
3100	CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createOffset(
3101	L: nullptr, Register: MRI->getDwarfRegNum(RegNum: ScratchReg1, isEH: true), Offset: -`4`));
3102	BuildMI(BB: PrevStackMBB, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
3103	.addCFIIndex(CFIIndex);
3104	CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createOffset(
3105	L: nullptr, Register: MRI->getDwarfRegNum(RegNum: ScratchReg0, isEH: true), Offset: -`8`));
3106	BuildMI(BB: PrevStackMBB, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
3107	.addCFIIndex(CFIIndex);
3108	}
3109
3110	// mov SR1, sp
3111	if (Thumb) {
3112	BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ScratchReg1)
3113	.addReg(RegNo: ARM::SP)
3114	.add(MOs: predOps(Pred: ARMCC::AL));
3115	} else if (CompareStackPointer) {
3116	BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVr), DestReg: ScratchReg1)
3117	.addReg(RegNo: ARM::SP)
3118	.add(MOs: predOps(Pred: ARMCC::AL))
3119	.add(MO: condCodeOp());
3120	}
3121
3122	// sub SR1, sp, #StackSize
3123	if (!CompareStackPointer && Thumb) {
3124	if (AlignedStackSize < `256`) {
3125	BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tSUBi8), DestReg: ScratchReg1)
3126	.add(MO: condCodeOp())
3127	.addReg(RegNo: ScratchReg1)
3128	.addImm(Val: AlignedStackSize)
3129	.add(MOs: predOps(Pred: ARMCC::AL));
3130	} else {
3131	if (Thumb2 \|\| ST->genExecuteOnly()) {
3132	BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg0)
3133	.addImm(Val: AlignedStackSize);
3134	} else {
3135	auto MBBI = McrMBB->end();
3136	auto RegInfo = STI.getRegisterInfo();
3137	RegInfo->emitLoadConstPool(MBB&: *McrMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: `0`,
3138	Val: AlignedStackSize);
3139	}
3140	BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tSUBrr), DestReg: ScratchReg1)
3141	.add(MO: condCodeOp())
3142	.addReg(RegNo: ScratchReg1)
3143	.addReg(RegNo: ScratchReg0)
3144	.add(MOs: predOps(Pred: ARMCC::AL));
3145	}
3146	} else if (!CompareStackPointer) {
3147	if (AlignedStackSize < `256`) {
3148	BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::SUBri), DestReg: ScratchReg1)
3149	.addReg(RegNo: ARM::SP)
3150	.addImm(Val: AlignedStackSize)
3151	.add(MOs: predOps(Pred: ARMCC::AL))
3152	.add(MO: condCodeOp());
3153	} else {
3154	auto MBBI = McrMBB->end();
3155	auto RegInfo = STI.getRegisterInfo();
3156	RegInfo->emitLoadConstPool(MBB&: *McrMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: `0`,
3157	Val: AlignedStackSize);
3158	BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::SUBrr), DestReg: ScratchReg1)
3159	.addReg(RegNo: ARM::SP)
3160	.addReg(RegNo: ScratchReg0)
3161	.add(MOs: predOps(Pred: ARMCC::AL))
3162	.add(MO: condCodeOp());
3163	}
3164	}
3165
3166	if (Thumb && ST->isThumb1Only()) {
3167	if (ST->genExecuteOnly()) {
3168	BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg0)
3169	.addExternalSymbol(FnName: "__STACK_LIMIT");
3170	} else {
3171	unsigned PCLabelId = ARMFI->createPICLabelUId();
3172	ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(
3173	C&: MF.getFunction().getContext(), s: "__STACK_LIMIT", ID: PCLabelId, PCAdj: `0`);
3174	MachineConstantPool *MCP = MF.getConstantPool();
3175	unsigned CPI = MCP->getConstantPoolIndex(V: NewCPV, Alignment: Align (`4`));
3176
3177	// ldr SR0, [pc, offset(STACK_LIMIT)]
3178	BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tLDRpci), DestReg: ScratchReg0)
3179	.addConstantPoolIndex(Idx: CPI)
3180	.add(MOs: predOps(Pred: ARMCC::AL));
3181	}
3182
3183	// ldr SR0, [SR0]
3184	BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tLDRi), DestReg: ScratchReg0)
3185	.addReg(RegNo: ScratchReg0)
3186	.addImm(Val: `0`)
3187	.add(MOs: predOps(Pred: ARMCC::AL));
3188	} else {
3189	// Get TLS base address from the coprocessor
3190	// mrc p15, #0, SR0, c13, c0, #3
3191	BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: Thumb ? ARM::t2MRC : ARM::MRC),
3192	DestReg: ScratchReg0)
3193	.addImm(Val: `15`)
3194	.addImm(Val: `0`)
3195	.addImm(Val: `13`)
3196	.addImm(Val: `0`)
3197	.addImm(Val: `3`)
3198	.add(MOs: predOps(Pred: ARMCC::AL));
3199
3200	// Use the last tls slot on android and a private field of the TCP on linux.
3201	assert(ST->isTargetAndroid() \|\| ST->isTargetLinux());
3202	unsigned TlsOffset = ST->isTargetAndroid() ? `63` : `1`;
3203
3204	// Get the stack limit from the right offset
3205	// ldr SR0, [sr0, #4 TlsOffset]*
3206	BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
3207	DestReg: ScratchReg0)
3208	.addReg(RegNo: ScratchReg0)
3209	.addImm(Val: `4` * TlsOffset)
3210	.add(MOs: predOps(Pred: ARMCC::AL));
3211	}
3212
3213	// Compare stack limit with stack size requested.
3214	// cmp SR0, SR1
3215	Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
3216	BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode))
3217	.addReg(RegNo: ScratchReg0)
3218	.addReg(RegNo: ScratchReg1)
3219	.add(MOs: predOps(Pred: ARMCC::AL));
3220
3221	// This jump is taken if StackLimit <= SP - stack required.
3222	Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
3223	BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode))
3224	.addMBB(MBB: PostStackMBB)
3225	.addImm(Val: ARMCC::LS)
3226	.addReg(RegNo: ARM::CPSR);
3227
3228	// Calling __morestack(StackSize, Size of stack arguments).
3229	// __morestack knows that the stack size requested is in SR0(r4)
3230	// and amount size of stack arguments is in SR1(r5).
3231
3232	// Pass first argument for the __morestack by Scratch Register #0.
3233	// The amount size of stack required
3234	if (Thumb) {
3235	if (AlignedStackSize < `256`) {
3236	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVi8), DestReg: ScratchReg0)
3237	.add(MO: condCodeOp())
3238	.addImm(Val: AlignedStackSize)
3239	.add(MOs: predOps(Pred: ARMCC::AL));
3240	} else {
3241	if (Thumb2 \|\| ST->genExecuteOnly()) {
3242	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg0)
3243	.addImm(Val: AlignedStackSize);
3244	} else {
3245	auto MBBI = AllocMBB->end();
3246	auto RegInfo = STI.getRegisterInfo();
3247	RegInfo->emitLoadConstPool(MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: `0`,
3248	Val: AlignedStackSize);
3249	}
3250	}
3251	} else {
3252	if (AlignedStackSize < `256`) {
3253	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVi), DestReg: ScratchReg0)
3254	.addImm(Val: AlignedStackSize)
3255	.add(MOs: predOps(Pred: ARMCC::AL))
3256	.add(MO: condCodeOp());
3257	} else {
3258	auto MBBI = AllocMBB->end();
3259	auto RegInfo = STI.getRegisterInfo();
3260	RegInfo->emitLoadConstPool(MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: `0`,
3261	Val: AlignedStackSize);
3262	}
3263	}
3264
3265	// Pass second argument for the __morestack by Scratch Register #1.
3266	// The amount size of stack consumed to save function arguments.
3267	if (Thumb) {
3268	if (ARMFI->getArgumentStackSize() < `256`) {
3269	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVi8), DestReg: ScratchReg1)
3270	.add(MO: condCodeOp())
3271	.addImm(Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()))
3272	.add(MOs: predOps(Pred: ARMCC::AL));
3273	} else {
3274	if (Thumb2 \|\| ST->genExecuteOnly()) {
3275	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg1)
3276	.addImm(Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3277	} else {
3278	auto MBBI = AllocMBB->end();
3279	auto RegInfo = STI.getRegisterInfo();
3280	RegInfo->emitLoadConstPool(
3281	MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg1, SubIdx: `0`,
3282	Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3283	}
3284	}
3285	} else {
3286	if (alignToARMConstant(Value: ARMFI->getArgumentStackSize()) < `256`) {
3287	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVi), DestReg: ScratchReg1)
3288	.addImm(Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()))
3289	.add(MOs: predOps(Pred: ARMCC::AL))
3290	.add(MO: condCodeOp());
3291	} else {
3292	auto MBBI = AllocMBB->end();
3293	auto RegInfo = STI.getRegisterInfo();
3294	RegInfo->emitLoadConstPool(
3295	MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg1, SubIdx: `0`,
3296	Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3297	}
3298	}
3299
3300	// push {lr} - Save return address of this function.
3301	if (Thumb) {
3302	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPUSH))
3303	.add(MOs: predOps(Pred: ARMCC::AL))
3304	.addReg(RegNo: ARM::LR);
3305	} else {
3306	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::STMDB_UPD))
3307	.addReg(RegNo: ARM::SP, flags: RegState::Define)
3308	.addReg(RegNo: ARM::SP)
3309	.add(MOs: predOps(Pred: ARMCC::AL))
3310	.addReg(RegNo: ARM::LR);
3311	}
3312
3313	// Emit the DWARF info about the change in stack as well as where to find the
3314	// previous link register
3315	if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3316	CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: `12`));
3317	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
3318	.addCFIIndex(CFIIndex);
3319	CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createOffset(
3320	L: nullptr, Register: MRI->getDwarfRegNum(RegNum: ARM::LR, isEH: true), Offset: -`12`));
3321	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
3322	.addCFIIndex(CFIIndex);
3323	}
3324
3325	// Call __morestack().
3326	if (Thumb) {
3327	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tBL))
3328	.add(MOs: predOps(Pred: ARMCC::AL))
3329	.addExternalSymbol(FnName: "__morestack");
3330	} else {
3331	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::BL))
3332	.addExternalSymbol(FnName: "__morestack");
3333	}
3334
3335	// pop {lr} - Restore return address of this original function.
3336	if (Thumb) {
3337	if (ST->isThumb1Only()) {
3338	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP))
3339	.add(MOs: predOps(Pred: ARMCC::AL))
3340	.addReg(RegNo: ScratchReg0);
3341	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::LR)
3342	.addReg(RegNo: ScratchReg0)
3343	.add(MOs: predOps(Pred: ARMCC::AL));
3344	} else {
3345	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::t2LDR_POST))
3346	.addReg(RegNo: ARM::LR, flags: RegState::Define)
3347	.addReg(RegNo: ARM::SP, flags: RegState::Define)
3348	.addReg(RegNo: ARM::SP)
3349	.addImm(Val: `4`)
3350	.add(MOs: predOps(Pred: ARMCC::AL));
3351	}
3352	} else {
3353	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::LDMIA_UPD))
3354	.addReg(RegNo: ARM::SP, flags: RegState::Define)
3355	.addReg(RegNo: ARM::SP)
3356	.add(MOs: predOps(Pred: ARMCC::AL))
3357	.addReg(RegNo: ARM::LR);
3358	}
3359
3360	// Restore SR0 and SR1 in case of __morestack() was called.
3361	// __morestack() will skip PostStackMBB block so we need to restore
3362	// scratch registers from here.
3363	// pop {SR0, SR1}
3364	if (Thumb) {
3365	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP))
3366	.add(MOs: predOps(Pred: ARMCC::AL))
3367	.addReg(RegNo: ScratchReg0)
3368	.addReg(RegNo: ScratchReg1);
3369	} else {
3370	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::LDMIA_UPD))
3371	.addReg(RegNo: ARM::SP, flags: RegState::Define)
3372	.addReg(RegNo: ARM::SP)
3373	.add(MOs: predOps(Pred: ARMCC::AL))
3374	.addReg(RegNo: ScratchReg0)
3375	.addReg(RegNo: ScratchReg1);
3376	}
3377
3378	// Update the CFA offset now that we've popped
3379	if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3380	CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: `0`));
3381	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
3382	.addCFIIndex(CFIIndex);
3383	}
3384
3385	// Return from this function.
3386	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ST->getReturnOpcode())).add(MOs: predOps(Pred: ARMCC::AL));
3387
3388	// Restore SR0 and SR1 in case of __morestack() was not called.
3389	// pop {SR0, SR1}
3390	if (Thumb) {
3391	BuildMI(BB: PostStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP))
3392	.add(MOs: predOps(Pred: ARMCC::AL))
3393	.addReg(RegNo: ScratchReg0)
3394	.addReg(RegNo: ScratchReg1);
3395	} else {
3396	BuildMI(BB: PostStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::LDMIA_UPD))
3397	.addReg(RegNo: ARM::SP, flags: RegState::Define)
3398	.addReg(RegNo: ARM::SP)
3399	.add(MOs: predOps(Pred: ARMCC::AL))
3400	.addReg(RegNo: ScratchReg0)
3401	.addReg(RegNo: ScratchReg1);
3402	}
3403
3404	// Update the CFA offset now that we've popped
3405	if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3406	CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: `0`));
3407	BuildMI(BB: PostStackMBB, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
3408	.addCFIIndex(CFIIndex);
3409
3410	// Tell debuggers that r4 and r5 are now the same as they were in the
3411	// previous function, that they're the "Same Value".
3412	CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createSameValue(
3413	L: nullptr, Register: MRI->getDwarfRegNum(RegNum: ScratchReg0, isEH: true)));
3414	BuildMI(BB: PostStackMBB, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
3415	.addCFIIndex(CFIIndex);
3416	CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createSameValue(
3417	L: nullptr, Register: MRI->getDwarfRegNum(RegNum: ScratchReg1, isEH: true)));
3418	BuildMI(BB: PostStackMBB, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
3419	.addCFIIndex(CFIIndex);
3420	}
3421
3422	// Organizing MBB lists
3423	PostStackMBB->addSuccessor(Succ: &PrologueMBB);
3424
3425	AllocMBB->addSuccessor(Succ: PostStackMBB);
3426
3427	GetMBB->addSuccessor(Succ: PostStackMBB);
3428	GetMBB->addSuccessor(Succ: AllocMBB);
3429
3430	McrMBB->addSuccessor(Succ: GetMBB);
3431
3432	PrevStackMBB->addSuccessor(Succ: McrMBB);
3433
3434	#ifdef EXPENSIVE_CHECKS
3435	MF.verify();
3436	#endif
3437	}
3438

Browse the source code of llvm_projects/llvm/lib/Target/ARM/ARMFrameLowering.cpp