ARMFrameLowering.cpp source code [llvm_projects/llvm/lib/Target/ARM/ARMFrameLowering.cpp]

1	//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains the ARM implementation of TargetFrameLowering class.
10	//
11	//===----------------------------------------------------------------------===//
12	//
13	// This file contains the ARM implementation of TargetFrameLowering class.
14	//
15	// On ARM, stack frames are structured as follows:
16	//
17	// The stack grows downward.
18	//
19	// All of the individual frame areas on the frame below are optional, i.e. it's
20	// possible to create a function so that the particular area isn't present
21	// in the frame.
22	//
23	// At function entry, the "frame" looks as follows:
24	//
25	// \| \| Higher address
26	// \|-----------------------------------\|
27	// \| \|
28	// \| arguments passed on the stack \|
29	// \| \|
30	// \|-----------------------------------\| <- sp
31	// \| \| Lower address
32	//
33	//
34	// After the prologue has run, the frame has the following general structure.
35	// Technically the last frame area (VLAs) doesn't get created until in the
36	// main function body, after the prologue is run. However, it's depicted here
37	// for completeness.
38	//
39	// \| \| Higher address
40	// \|-----------------------------------\|
41	// \| \|
42	// \| arguments passed on the stack \|
43	// \| \|
44	// \|-----------------------------------\| <- (sp at function entry)
45	// \| \|
46	// \| varargs from registers \|
47	// \| \|
48	// \|-----------------------------------\|
49	// \| \|
50	// \| prev_lr \|
51	// \| prev_fp \|
52	// \| (a.k.a. "frame record") \|
53	// \| \|
54	// \|- - - - - - - - - - - - - - - - - -\| <- fp (r7 or r11)
55	// \| \|
56	// \| callee-saved gpr registers \|
57	// \| \|
58	// \|-----------------------------------\|
59	// \| \|
60	// \| callee-saved fp/simd regs \|
61	// \| \|
62	// \|-----------------------------------\|
63	// \|.empty.space.to.make.part.below....\|
64	// \|.aligned.in.case.it.needs.more.than\| (size of this area is unknown at
65	// \|.the.standard.8-byte.alignment.....\| compile time; if present)
66	// \|-----------------------------------\|
67	// \| \|
68	// \| local variables of fixed size \|
69	// \| including spill slots \|
70	// \|-----------------------------------\| <- base pointer (not defined by ABI,
71	// \|.variable-sized.local.variables....\| LLVM chooses r6)
72	// \|.(VLAs)............................\| (size of this area is unknown at
73	// \|...................................\| compile time)
74	// \|-----------------------------------\| <- sp
75	// \| \| Lower address
76	//
77	//
78	// To access the data in a frame, at-compile time, a constant offset must be
79	// computable from one of the pointers (fp, bp, sp) to access it. The size
80	// of the areas with a dotted background cannot be computed at compile-time
81	// if they are present, making it required to have all three of fp, bp and
82	// sp to be set up to be able to access all contents in the frame areas,
83	// assuming all of the frame areas are non-empty.
84	//
85	// For most functions, some of the frame areas are empty. For those functions,
86	// it may not be necessary to set up fp or bp:
87	// A base pointer is definitely needed when there are both VLAs and local*
88	// variables with more-than-default alignment requirements.
89	// A frame pointer is definitely needed when there are local variables with*
90	// more-than-default alignment requirements.
91	//
92	// In some cases when a base pointer is not strictly needed, it is generated
93	// anyway when offsets from the frame pointer to access local variables become
94	// so large that the offset can't be encoded in the immediate fields of loads
95	// or stores.
96	//
97	// The frame pointer might be chosen to be r7 or r11, depending on the target
98	// architecture and operating system. See ARMSubtarget::getFramePointerReg for
99	// details.
100	//
101	// Outgoing function arguments must be at the bottom of the stack frame when
102	// calling another function. If we do not have variable-sized stack objects, we
103	// can allocate a "reserved call frame" area at the bottom of the local
104	// variable area, large enough for all outgoing calls. If we do have VLAs, then
105	// the stack pointer must be decremented and incremented around each call to
106	// make space for the arguments below the VLAs.
107	//
108	//===----------------------------------------------------------------------===//
109
110	#include "ARMFrameLowering.h"
111	#include "ARMBaseInstrInfo.h"
112	#include "ARMBaseRegisterInfo.h"
113	#include "ARMConstantPoolValue.h"
114	#include "ARMMachineFunctionInfo.h"
115	#include "ARMSubtarget.h"
116	#include "MCTargetDesc/ARMAddressingModes.h"
117	#include "MCTargetDesc/ARMBaseInfo.h"
118	#include "Utils/ARMBaseInfo.h"
119	#include "llvm/ADT/BitVector.h"
120	#include "llvm/ADT/STLExtras.h"
121	#include "llvm/ADT/SmallPtrSet.h"
122	#include "llvm/ADT/SmallVector.h"
123	#include "llvm/CodeGen/CFIInstBuilder.h"
124	#include "llvm/CodeGen/MachineBasicBlock.h"
125	#include "llvm/CodeGen/MachineConstantPool.h"
126	#include "llvm/CodeGen/MachineFrameInfo.h"
127	#include "llvm/CodeGen/MachineFunction.h"
128	#include "llvm/CodeGen/MachineInstr.h"
129	#include "llvm/CodeGen/MachineInstrBuilder.h"
130	#include "llvm/CodeGen/MachineJumpTableInfo.h"
131	#include "llvm/CodeGen/MachineModuleInfo.h"
132	#include "llvm/CodeGen/MachineOperand.h"
133	#include "llvm/CodeGen/MachineRegisterInfo.h"
134	#include "llvm/CodeGen/RegisterScavenging.h"
135	#include "llvm/CodeGen/TargetInstrInfo.h"
136	#include "llvm/CodeGen/TargetRegisterInfo.h"
137	#include "llvm/CodeGen/TargetSubtargetInfo.h"
138	#include "llvm/IR/Attributes.h"
139	#include "llvm/IR/CallingConv.h"
140	#include "llvm/IR/DebugLoc.h"
141	#include "llvm/IR/Function.h"
142	#include "llvm/MC/MCAsmInfo.h"
143	#include "llvm/MC/MCInstrDesc.h"
144	#include "llvm/Support/CodeGen.h"
145	#include "llvm/Support/CommandLine.h"
146	#include "llvm/Support/Compiler.h"
147	#include "llvm/Support/Debug.h"
148	#include "llvm/Support/ErrorHandling.h"
149	#include "llvm/Support/raw_ostream.h"
150	#include "llvm/Target/TargetMachine.h"
151	#include "llvm/Target/TargetOptions.h"
152	#include <algorithm>
153	#include <cassert>
154	#include <cstddef>
155	#include <cstdint>
156	#include <iterator>
157	#include <utility>
158	#include <vector>
159
160	#define DEBUG_TYPE "arm-frame-lowering"
161
162	using namespace llvm;
163
164	static cl::opt<bool>
165	SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(Val: true),
166	cl::desc ("Align ARM NEON spills in prolog and epilog"));
167
168	static MachineBasicBlock::iterator
169	skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
170	unsigned NumAlignedDPRCS2Regs);
171
172	enum class SpillArea {
173	GPRCS1,
174	GPRCS2,
175	FPStatus,
176	DPRCS1,
177	DPRCS2,
178	GPRCS3,
179	FPCXT,
180	};
181
182	/// Get the spill area that Reg should be saved into in the prologue.
183	SpillArea getSpillArea(Register Reg,
184	ARMSubtarget::PushPopSplitVariation Variation,
185	unsigned NumAlignedDPRCS2Regs,
186	const ARMBaseRegisterInfo *RegInfo) {
187	// NoSplit:
188	// push {r0-r12, lr} GPRCS1
189	// vpush {r8-d15} DPRCS1
190	//
191	// SplitR7:
192	// push {r0-r7, lr} GPRCS1
193	// push {r8-r12} GPRCS2
194	// vpush {r8-d15} DPRCS1
195	//
196	// SplitR11WindowsSEH:
197	// push {r0-r10, r12} GPRCS1
198	// vpush {r8-d15} DPRCS1
199	// push {r11, lr} GPRCS3
200	//
201	// SplitR11AAPCSSignRA:
202	// push {r0-r10, r12} GPRSC1
203	// push {r11, lr} GPRCS2
204	// vpush {r8-d15} DPRCS1
205
206	// If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
207	// the top of the stack frame.
208	// The DPRCS2 region is used for ABIs which only guarantee 4-byte alignment
209	// of SP. If used, it will be below the other save areas, after the stack has
210	// been re-aligned.
211
212	switch (Reg) {
213	default:
214	dbgs() << "Don't know where to spill " << printReg(Reg, TRI: RegInfo) << "\n";
215	llvm_unreachable("Don't know where to spill this register");
216	break;
217
218	case ARM::FPCXTNS:
219	return SpillArea::FPCXT;
220
221	case ARM::FPSCR:
222	case ARM::FPEXC:
223	return SpillArea::FPStatus;
224
225	case ARM::R0:
226	case ARM::R1:
227	case ARM::R2:
228	case ARM::R3:
229	case ARM::R4:
230	case ARM::R5:
231	case ARM::R6:
232	case ARM::R7:
233	return SpillArea::GPRCS1;
234
235	case ARM::R8:
236	case ARM::R9:
237	case ARM::R10:
238	if (Variation == ARMSubtarget::SplitR7)
239	return SpillArea::GPRCS2;
240	else
241	return SpillArea::GPRCS1;
242
243	case ARM::R11:
244	if (Variation == ARMSubtarget::SplitR7 \|\|
245	Variation == ARMSubtarget::SplitR11AAPCSSignRA)
246	return SpillArea::GPRCS2;
247	if (Variation == ARMSubtarget::SplitR11WindowsSEH)
248	return SpillArea::GPRCS3;
249
250	return SpillArea::GPRCS1;
251
252	case ARM::R12:
253	if (Variation == ARMSubtarget::SplitR7)
254	return SpillArea::GPRCS2;
255	else
256	return SpillArea::GPRCS1;
257
258	case ARM::LR:
259	if (Variation == ARMSubtarget::SplitR11AAPCSSignRA)
260	return SpillArea::GPRCS2;
261	if (Variation == ARMSubtarget::SplitR11WindowsSEH)
262	return SpillArea::GPRCS3;
263
264	return SpillArea::GPRCS1;
265
266	case ARM::D0:
267	case ARM::D1:
268	case ARM::D2:
269	case ARM::D3:
270	case ARM::D4:
271	case ARM::D5:
272	case ARM::D6:
273	case ARM::D7:
274	return SpillArea::DPRCS1;
275
276	case ARM::D8:
277	case ARM::D9:
278	case ARM::D10:
279	case ARM::D11:
280	case ARM::D12:
281	case ARM::D13:
282	case ARM::D14:
283	case ARM::D15:
284	if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
285	return SpillArea::DPRCS2;
286	else
287	return SpillArea::DPRCS1;
288
289	case ARM::D16:
290	case ARM::D17:
291	case ARM::D18:
292	case ARM::D19:
293	case ARM::D20:
294	case ARM::D21:
295	case ARM::D22:
296	case ARM::D23:
297	case ARM::D24:
298	case ARM::D25:
299	case ARM::D26:
300	case ARM::D27:
301	case ARM::D28:
302	case ARM::D29:
303	case ARM::D30:
304	case ARM::D31:
305	return SpillArea::DPRCS1;
306	}
307	}
308
309	ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
310	: TargetFrameLowering (StackGrowsDown, sti.getStackAlignment(), `0`, Align (`4`)),
311	STI(sti) {}
312
313	bool ARMFrameLowering::keepFramePointer(const MachineFunction &MF) const {
314	// iOS always has a FP for backtracking, force other targets to keep their FP
315	// when doing FastISel. The emitted code is currently superior, and in cases
316	// like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
317	return MF.getSubtarget<ARMSubtarget>().useFastISel();
318	}
319
320	/// Returns true if the target can safely skip saving callee-saved registers
321	/// for noreturn nounwind functions.
322	bool ARMFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const {
323	assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
324	MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
325	!MF.getFunction().hasFnAttribute(Attribute::UWTable));
326
327	// Frame pointer and link register are not treated as normal CSR, thus we
328	// can always skip CSR saves for nonreturning functions.
329	return true;
330	}
331
332	/// hasFPImpl - Return true if the specified function should have a dedicated
333	/// frame pointer register. This is true if the function has variable sized
334	/// allocas or if frame pointer elimination is disabled.
335	bool ARMFrameLowering::hasFPImpl(const MachineFunction &MF) const {
336	const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
337	const MachineFrameInfo &MFI = MF.getFrameInfo();
338
339	// Check to see if the target want to forcibly keep frame pointer.
340	if (keepFramePointer(MF))
341	return true;
342
343	// ABI-required frame pointer.
344	if (MF.getTarget().Options.DisableFramePointerElim(MF))
345	return true;
346
347	// Frame pointer required for use within this function.
348	return (RegInfo->hasStackRealignment(MF) \|\| MFI.hasVarSizedObjects() \|\|
349	MFI.isFrameAddressTaken());
350	}
351
352	/// isFPReserved - Return true if the frame pointer register should be
353	/// considered a reserved register on the scope of the specified function.
354	bool ARMFrameLowering::isFPReserved(const MachineFunction &MF) const {
355	return hasFP(MF) \|\| MF.getTarget().Options.FramePointerIsReserved(MF);
356	}
357
358	/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
359	/// not required, we reserve argument space for call sites in the function
360	/// immediately on entry to the current function. This eliminates the need for
361	/// add/sub sp brackets around call sites. Returns true if the call frame is
362	/// included as part of the stack frame.
363	bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
364	const MachineFrameInfo &MFI = MF.getFrameInfo();
365	unsigned CFSize = MFI.getMaxCallFrameSize();
366	// It's not always a good idea to include the call frame as part of the
367	// stack frame. ARM (especially Thumb) has small immediate offset to
368	// address the stack frame. So a large call frame can cause poor codegen
369	// and may even makes it impossible to scavenge a register.
370	if (CFSize >= ((`1` << `12`) - `1`) / `2`) // Half of imm12
371	return false;
372
373	return !MFI.hasVarSizedObjects();
374	}
375
376	/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
377	/// call frame pseudos can be simplified. Unlike most targets, having a FP
378	/// is not sufficient here since we still may reference some objects via SP
379	/// even when FP is available in Thumb2 mode.
380	bool
381	ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
382	return hasReservedCallFrame(MF) \|\| MF.getFrameInfo().hasVarSizedObjects();
383	}
384
385	// Returns how much of the incoming argument stack area we should clean up in an
386	// epilogue. For the C calling convention this will be 0, for guaranteed tail
387	// call conventions it can be positive (a normal return or a tail call to a
388	// function that uses less stack space for arguments) or negative (for a tail
389	// call to a function that needs more stack space than us for arguments).
390	static int getArgumentStackToRestore(MachineFunction &MF,
391	MachineBasicBlock &MBB) {
392	MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
393	bool IsTailCallReturn = false;
394	if (MBB.end() != MBBI) {
395	unsigned RetOpcode = MBBI ->getOpcode();
396	IsTailCallReturn = RetOpcode == ARM::TCRETURNdi \|\|
397	RetOpcode == ARM::TCRETURNri \|\|
398	RetOpcode == ARM::TCRETURNrinotr12;
399	}
400	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
401
402	int ArgumentPopSize = `0`;
403	if (IsTailCallReturn) {
404	MachineOperand &StackAdjust = MBBI ->getOperand(i: `1`);
405
406	// For a tail-call in a callee-pops-arguments environment, some or all of
407	// the stack may actually be in use for the call's arguments, this is
408	// calculated during LowerCall and consumed here...
409	ArgumentPopSize = StackAdjust.getImm();
410	} else {
411	// ... otherwise the amount to pop is all* of the argument space,*
412	// conveniently stored in the MachineFunctionInfo by
413	// LowerFormalArguments. This will, of course, be zero for the C calling
414	// convention.
415	ArgumentPopSize = AFI->getArgumentStackToRestore();
416	}
417
418	return ArgumentPopSize;
419	}
420
421	static bool needsWinCFI(const MachineFunction &MF) {
422	const Function &F = MF.getFunction();
423	return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
424	F.needsUnwindTableEntry();
425	}
426
427	// Given a load or a store instruction, generate an appropriate unwinding SEH
428	// code on Windows.
429	static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
430	const TargetInstrInfo &TII,
431	unsigned Flags) {
432	unsigned Opc = MBBI ->getOpcode();
433	MachineBasicBlock *MBB = MBBI ->getParent();
434	MachineFunction &MF = *MBB->getParent();
435	DebugLoc DL = MBBI ->getDebugLoc();
436	MachineInstrBuilder MIB;
437	const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
438	const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
439
440	Flags \|= MachineInstr::NoMerge;
441
442	switch (Opc) {
443	default:
444	report_fatal_error(reason: "No SEH Opcode for instruction " + TII.getName(Opcode: Opc));
445	break;
446	case ARM::t2ADDri: // add.w r11, sp, #xx
447	case ARM::t2ADDri12: // add.w r11, sp, #xx
448	case ARM::t2MOVTi16: // movt r4, #xx
449	case ARM::tBL: // bl __chkstk
450	// These are harmless if used for just setting up a frame pointer,
451	// but that frame pointer can't be relied upon for unwinding, unless
452	// set up with SEH_SaveSP.
453	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
454	.addImm(/Wide=/Val: `1`)
455	.setMIFlags(Flags);
456	break;
457
458	case ARM::t2MOVi16: { // mov(w) r4, #xx
459	bool Wide = MBBI ->getOperand(i: `1`).getImm() >= `256`;
460	if (!Wide) {
461	MachineInstrBuilder NewInstr =
462	BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVi8)).setMIFlags(MBBI ->getFlags());
463	NewInstr.add(MO: MBBI ->getOperand(i: `0`));
464	NewInstr.add(MO: t1CondCodeOp(/isDead=/true));
465	for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MBBI ->operands()))
466	NewInstr.add(MO);
467	MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(I: MBBI, MI: NewInstr);
468	MBB->erase(I: MBBI);
469	MBBI = NewMBBI;
470	}
471	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop)).addImm(Val: Wide).setMIFlags(Flags);
472	break;
473	}
474
475	case ARM::tBLXr: // blx r12 (__chkstk)
476	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
477	.addImm(/Wide=/Val: `0`)
478	.setMIFlags(Flags);
479	break;
480
481	case ARM::t2MOVi32imm: // movw+movt
482	// This pseudo instruction expands into two mov instructions. If the
483	// second operand is a symbol reference, this will stay as two wide
484	// instructions, movw+movt. If they're immediates, the first one can
485	// end up as a narrow mov though.
486	// As two SEH instructions are appended here, they won't get interleaved
487	// between the two final movw/movt instructions, but it doesn't make any
488	// practical difference.
489	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
490	.addImm(/Wide=/Val: `1`)
491	.setMIFlags(Flags);
492	MBB->insertAfter(I: MBBI, MI: MIB);
493	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop))
494	.addImm(/Wide=/Val: `1`)
495	.setMIFlags(Flags);
496	break;
497
498	case ARM::t2STR_PRE:
499	if (MBBI ->getOperand(i: `0`).getReg() == ARM::SP &&
500	MBBI ->getOperand(i: `2`).getReg() == ARM::SP &&
501	MBBI ->getOperand(i: `3`).getImm() == -`4`) {
502	unsigned Reg = RegInfo->getSEHRegNum(i: MBBI ->getOperand(i: `1`).getReg());
503	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveRegs))
504	.addImm(Val: `1ULL` << Reg)
505	.addImm(/Wide=/Val: `1`)
506	.setMIFlags(Flags);
507	} else {
508	report_fatal_error(reason: "No matching SEH Opcode for t2STR_PRE");
509	}
510	break;
511
512	case ARM::t2LDR_POST:
513	if (MBBI ->getOperand(i: `1`).getReg() == ARM::SP &&
514	MBBI ->getOperand(i: `2`).getReg() == ARM::SP &&
515	MBBI ->getOperand(i: `3`).getImm() == `4`) {
516	unsigned Reg = RegInfo->getSEHRegNum(i: MBBI ->getOperand(i: `0`).getReg());
517	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveRegs))
518	.addImm(Val: `1ULL` << Reg)
519	.addImm(/Wide=/Val: `1`)
520	.setMIFlags(Flags);
521	} else {
522	report_fatal_error(reason: "No matching SEH Opcode for t2LDR_POST");
523	}
524	break;
525
526	case ARM::t2LDMIA_RET:
527	case ARM::t2LDMIA_UPD:
528	case ARM::t2STMDB_UPD: {
529	unsigned Mask = `0`;
530	bool Wide = false;
531	for (unsigned i = `4`, NumOps = MBBI ->getNumOperands(); i != NumOps; ++i) {
532	const MachineOperand &MO = MBBI ->getOperand(i);
533	if (!MO.isReg() \|\| MO.isImplicit())
534	continue;
535	unsigned Reg = RegInfo->getSEHRegNum(i: MO.getReg());
536	if (Reg == `15`)
537	Reg = `14`;
538	if (Reg >= `8` && Reg <= `13`)
539	Wide = true;
540	else if (Opc == ARM::t2LDMIA_UPD && Reg == `14`)
541	Wide = true;
542	Mask \|= `1` << Reg;
543	}
544	if (!Wide) {
545	unsigned NewOpc;
546	switch (Opc) {
547	case ARM::t2LDMIA_RET:
548	NewOpc = ARM::tPOP_RET;
549	break;
550	case ARM::t2LDMIA_UPD:
551	NewOpc = ARM::tPOP;
552	break;
553	case ARM::t2STMDB_UPD:
554	NewOpc = ARM::tPUSH;
555	break;
556	default:
557	llvm_unreachable("");
558	}
559	MachineInstrBuilder NewInstr =
560	BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: NewOpc)).setMIFlags(MBBI ->getFlags());
561	for (unsigned i = `2`, NumOps = MBBI ->getNumOperands(); i != NumOps; ++i)
562	NewInstr.add(MO: MBBI ->getOperand(i));
563	MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(I: MBBI, MI: NewInstr);
564	MBB->erase(I: MBBI);
565	MBBI = NewMBBI;
566	}
567	unsigned SEHOpc =
568	(Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
569	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: SEHOpc))
570	.addImm(Val: Mask)
571	.addImm(Val: Wide ? `1` : `0`)
572	.setMIFlags(Flags);
573	break;
574	}
575	case ARM::VSTMDDB_UPD:
576	case ARM::VLDMDIA_UPD: {
577	int First = -`1`, Last = `0`;
578	for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MBBI ->operands(), N: `4`)) {
579	unsigned Reg = RegInfo->getSEHRegNum(i: MO.getReg());
580	if (First == -`1`)
581	First = Reg;
582	Last = Reg;
583	}
584	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveFRegs))
585	.addImm(Val: First)
586	.addImm(Val: Last)
587	.setMIFlags(Flags);
588	break;
589	}
590	case ARM::tSUBspi:
591	case ARM::tADDspi:
592	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_StackAlloc))
593	.addImm(Val: MBBI ->getOperand(i: `2`).getImm() * `4`)
594	.addImm(/Wide=/Val: `0`)
595	.setMIFlags(Flags);
596	break;
597	case ARM::t2SUBspImm:
598	case ARM::t2SUBspImm12:
599	case ARM::t2ADDspImm:
600	case ARM::t2ADDspImm12:
601	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_StackAlloc))
602	.addImm(Val: MBBI ->getOperand(i: `2`).getImm())
603	.addImm(/Wide=/Val: `1`)
604	.setMIFlags(Flags);
605	break;
606
607	case ARM::tMOVr:
608	if (MBBI ->getOperand(i: `1`).getReg() == ARM::SP &&
609	(Flags & MachineInstr::FrameSetup)) {
610	unsigned Reg = RegInfo->getSEHRegNum(i: MBBI ->getOperand(i: `0`).getReg());
611	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveSP))
612	.addImm(Val: Reg)
613	.setMIFlags(Flags);
614	} else if (MBBI ->getOperand(i: `0`).getReg() == ARM::SP &&
615	(Flags & MachineInstr::FrameDestroy)) {
616	unsigned Reg = RegInfo->getSEHRegNum(i: MBBI ->getOperand(i: `1`).getReg());
617	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_SaveSP))
618	.addImm(Val: Reg)
619	.setMIFlags(Flags);
620	} else {
621	report_fatal_error(reason: "No SEH Opcode for MOV");
622	}
623	break;
624
625	case ARM::tBX_RET:
626	case ARM::TCRETURNri:
627	case ARM::TCRETURNrinotr12:
628	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop_Ret))
629	.addImm(/Wide=/Val: `0`)
630	.setMIFlags(Flags);
631	break;
632
633	case ARM::TCRETURNdi:
634	MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::SEH_Nop_Ret))
635	.addImm(/Wide=/Val: `1`)
636	.setMIFlags(Flags);
637	break;
638	}
639	return MBB->insertAfter(I: MBBI, MI: MIB);
640	}
641
642	static MachineBasicBlock::iterator
643	initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI) {
644	if (MBBI == MBB.begin())
645	return MachineBasicBlock::iterator ();
646	return std::prev(x: MBBI);
647	}
648
649	static void insertSEHRange(MachineBasicBlock &MBB,
650	MachineBasicBlock::iterator Start,
651	const MachineBasicBlock::iterator &End,
652	const ARMBaseInstrInfo &TII, unsigned MIFlags) {
653	if (Start.isValid())
654	Start = std::next(x: Start);
655	else
656	Start = MBB.begin();
657
658	for (auto MI = Start; MI != End;) {
659	auto Next = std::next(x: MI);
660	// Check if this instruction already has got a SEH opcode added. In that
661	// case, don't do this generic mapping.
662	if (Next != End && isSEHInstruction(MI: *Next)) {
663	MI = std::next(x: Next);
664	while (MI != End && isSEHInstruction(MI: *MI))
665	++MI;
666	continue;
667	}
668	insertSEH(MBBI: MI, TII, Flags: MIFlags);
669	MI = Next;
670	}
671	}
672
673	static void emitRegPlusImmediate(
674	bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
675	const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
676	unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
677	ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = `0`) {
678	if (isARM)
679	emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, BaseReg: SrcReg, NumBytes,
680	Pred, PredReg, TII, MIFlags);
681	else
682	emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, BaseReg: SrcReg, NumBytes,
683	Pred, PredReg, TII, MIFlags);
684	}
685
686	static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
687	MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,
688	const ARMBaseInstrInfo &TII, int NumBytes,
689	unsigned MIFlags = MachineInstr::NoFlags,
690	ARMCC::CondCodes Pred = ARMCC::AL,
691	unsigned PredReg = `0`) {
692	emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, DestReg: ARM::SP, SrcReg: ARM::SP, NumBytes,
693	MIFlags, Pred, PredReg);
694	}
695
696	static int sizeOfSPAdjustment(const MachineInstr &MI) {
697	int RegSize;
698	switch (MI.getOpcode()) {
699	case ARM::VSTMDDB_UPD:
700	RegSize = `8`;
701	break;
702	case ARM::STMDB_UPD:
703	case ARM::t2STMDB_UPD:
704	RegSize = `4`;
705	break;
706	case ARM::t2STR_PRE:
707	case ARM::STR_PRE_IMM:
708	return `4`;
709	default:
710	llvm_unreachable("Unknown push or pop like instruction");
711	}
712
713	int count = `0`;
714	// ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
715	// pred) so the list starts at 4.
716	for (int i = MI.getNumOperands() - `1`; i >= `4`; --i)
717	count += RegSize;
718	return count;
719	}
720
721	static bool WindowsRequiresStackProbe(const MachineFunction &MF,
722	size_t StackSizeInBytes) {
723	const MachineFrameInfo &MFI = MF.getFrameInfo();
724	const Function &F = MF.getFunction();
725	unsigned StackProbeSize = (MFI.getStackProtectorIndex() > `0`) ? `4080` : `4096`;
726
727	StackProbeSize =
728	F.getFnAttributeAsParsedInteger(Kind: "stack-probe-size", Default: StackProbeSize);
729	return (StackSizeInBytes >= StackProbeSize) &&
730	!F.hasFnAttribute(Kind: "no-stack-arg-probe");
731	}
732
733	namespace {
734
735	struct StackAdjustingInsts {
736	struct InstInfo {
737	MachineBasicBlock::iterator I;
738	unsigned SPAdjust;
739	bool BeforeFPSet;
740
741	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
742	void dump() {
743	dbgs() << " " << (BeforeFPSet ? "before-fp " : " ")
744	<< "sp-adjust=" << SPAdjust;
745	I->dump();
746	}
747	#endif
748	};
749
750	SmallVector<InstInfo, `4`> Insts;
751
752	void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
753	bool BeforeFPSet = false) {
754	InstInfo Info = {.I: I, .SPAdjust: SPAdjust, .BeforeFPSet: BeforeFPSet};
755	Insts.push_back(Elt: Info);
756	}
757
758	void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
759	auto Info =
760	llvm::find_if(Range&: Insts, P: [&](InstInfo &Info) { return Info.I == I; });
761	assert(Info != Insts.end() && "invalid sp adjusting instruction");
762	Info->SPAdjust += ExtraBytes;
763	}
764
765	void emitDefCFAOffsets(MachineBasicBlock &MBB, bool HasFP) {
766	CFIInstBuilder CFIBuilder(MBB, MBB.end(), MachineInstr::FrameSetup);
767	unsigned CFAOffset = `0`;
768	for (auto &Info : Insts) {
769	if (HasFP && !Info.BeforeFPSet)
770	return;
771
772	CFAOffset += Info.SPAdjust;
773	CFIBuilder.setInsertPoint(std::next(x: Info.I));
774	CFIBuilder.buildDefCFAOffset(Offset: CFAOffset);
775	}
776	}
777
778	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
779	void dump() {
780	dbgs() << "StackAdjustingInsts:\n";
781	for (auto &Info : Insts)
782	Info.dump();
783	}
784	#endif
785	};
786
787	} // end anonymous namespace
788
789	/// Emit an instruction sequence that will align the address in
790	/// register Reg by zero-ing out the lower bits. For versions of the
791	/// architecture that support Neon, this must be done in a single
792	/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
793	/// single instruction. That function only gets called when optimizing
794	/// spilling of D registers on a core with the Neon instruction set
795	/// present.
796	static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
797	const TargetInstrInfo &TII,
798	MachineBasicBlock &MBB,
799	MachineBasicBlock::iterator MBBI,
800	const DebugLoc &DL, const unsigned Reg,
801	const Align Alignment,
802	const bool MustBeSingleInstruction) {
803	const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
804	const bool CanUseBFC = AST.hasV6T2Ops() \|\| AST.hasV7Ops();
805	const unsigned AlignMask = Alignment.value() - `1U`;
806	const unsigned NrBitsToZero = Log2(A: Alignment);
807	assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
808	if (!AFI->isThumbFunction()) {
809	// if the BFC instruction is available, use that to zero the lower
810	// bits:
811	// bfc Reg, #0, log2(Alignment)
812	// otherwise use BIC, if the mask to zero the required number of bits
813	// can be encoded in the bic immediate field
814	// bic Reg, Reg, Alignment-1
815	// otherwise, emit
816	// lsr Reg, Reg, log2(Alignment)
817	// lsl Reg, Reg, log2(Alignment)
818	if (CanUseBFC) {
819	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::BFC), DestReg: Reg)
820	.addReg(RegNo: Reg, flags: RegState::Kill)
821	.addImm(Val: ~AlignMask)
822	.add(MOs: predOps(Pred: ARMCC::AL));
823	} else if (AlignMask <= `255`) {
824	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::BICri), DestReg: Reg)
825	.addReg(RegNo: Reg, flags: RegState::Kill)
826	.addImm(Val: AlignMask)
827	.add(MOs: predOps(Pred: ARMCC::AL))
828	.add(MO: condCodeOp());
829	} else {
830	assert(!MustBeSingleInstruction &&
831	"Shouldn't call emitAligningInstructions demanding a single "
832	"instruction to be emitted for large stack alignment for a target "
833	"without BFC.");
834	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVsi), DestReg: Reg)
835	.addReg(RegNo: Reg, flags: RegState::Kill)
836	.addImm(Val: ARM_AM::getSORegOpc(ShOp: ARM_AM::lsr, Imm: NrBitsToZero))
837	.add(MOs: predOps(Pred: ARMCC::AL))
838	.add(MO: condCodeOp());
839	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVsi), DestReg: Reg)
840	.addReg(RegNo: Reg, flags: RegState::Kill)
841	.addImm(Val: ARM_AM::getSORegOpc(ShOp: ARM_AM::lsl, Imm: NrBitsToZero))
842	.add(MOs: predOps(Pred: ARMCC::AL))
843	.add(MO: condCodeOp());
844	}
845	} else {
846	// Since this is only reached for Thumb-2 targets, the BFC instruction
847	// should always be available.
848	assert(CanUseBFC);
849	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: ARM::t2BFC), DestReg: Reg)
850	.addReg(RegNo: Reg, flags: RegState::Kill)
851	.addImm(Val: ~AlignMask)
852	.add(MOs: predOps(Pred: ARMCC::AL));
853	}
854	}
855
856	/// We need the offset of the frame pointer relative to other MachineFrameInfo
857	/// offsets which are encoded relative to SP at function begin.
858	/// See also emitPrologue() for how the FP is set up.
859	/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
860	/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
861	/// this to produce a conservative estimate that we check in an assert() later.
862	static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
863	const MachineFunction &MF) {
864	ARMSubtarget::PushPopSplitVariation PushPopSplit =
865	STI.getPushPopSplitVariation(MF);
866	// For Thumb1, push.w isn't available, so the first push will always push
867	// r7 and lr onto the stack first.
868	if (AFI.isThumb1OnlyFunction())
869	return -AFI.getArgRegsSaveSize() - (`2` * `4`);
870	// This is a conservative estimation: Assume the frame pointer being r7 and
871	// pc("r15") up to r8 getting spilled before (= 8 registers).
872	int MaxRegBytes = `8` * `4`;
873	if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
874	// Here, r11 can be stored below all of r4-r15.
875	MaxRegBytes = `11` * `4`;
876	if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
877	// Here, r11 can be stored below all of r4-r15 plus d8-d15.
878	MaxRegBytes = `11` * `4` + `8` * `8`;
879	}
880	int FPCXTSaveSize =
881	(STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? `4` : `0`;
882	return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
883	}
884
885	void ARMFrameLowering::emitPrologue(MachineFunction &MF,
886	MachineBasicBlock &MBB) const {
887	MachineBasicBlock::iterator MBBI = MBB.begin();
888	MachineFrameInfo &MFI = MF.getFrameInfo();
889	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
890	const TargetMachine &TM = MF.getTarget();
891	const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
892	const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
893	assert(!AFI->isThumb1OnlyFunction() &&
894	"This emitPrologue does not support Thumb1!");
895	bool isARM = !AFI->isThumbFunction();
896	Align Alignment = STI.getFrameLowering()->getStackAlign();
897	unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
898	unsigned NumBytes = MFI.getStackSize();
899	const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
900	int FPCXTSaveSize = `0`;
901	bool NeedsWinCFI = needsWinCFI(MF);
902	ARMSubtarget::PushPopSplitVariation PushPopSplit =
903	STI.getPushPopSplitVariation(MF);
904
905	LLVM_DEBUG(dbgs() << "Emitting prologue for " << MF.getName() << "\n");
906
907	// Debug location must be unknown since the first debug location is used
908	// to determine the end of the prologue.
909	DebugLoc dl;
910
911	Register FramePtr = RegInfo->getFrameRegister(MF);
912
913	// Determine the sizes of each callee-save spill areas and record which frame
914	// belongs to which callee-save spill areas.
915	unsigned GPRCS1Size = `0`, GPRCS2Size = `0`, FPStatusSize = `0`,
916	DPRCS1Size = `0`, GPRCS3Size = `0`, DPRCS2Size = `0`;
917	int FramePtrSpillFI = `0`;
918	int D8SpillFI = `0`;
919
920	// All calls are tail calls in GHC calling conv, and functions have no
921	// prologue/epilogue.
922	if (MF.getFunction().getCallingConv() == CallingConv::GHC)
923	return;
924
925	StackAdjustingInsts DefCFAOffsetCandidates;
926	bool HasFP = hasFP(MF);
927
928	if (!AFI->hasStackFrame() &&
929	(!STI.isTargetWindows() \|\| !WindowsRequiresStackProbe(MF, StackSizeInBytes: NumBytes))) {
930	if (NumBytes != `0`) {
931	emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -NumBytes,
932	MIFlags: MachineInstr::FrameSetup);
933	DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: NumBytes, BeforeFPSet: true);
934	}
935	if (!NeedsWinCFI)
936	DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, HasFP);
937	if (NeedsWinCFI && MBBI != MBB.begin()) {
938	insertSEHRange(MBB, Start: {}, End: MBBI, TII, MIFlags: MachineInstr::FrameSetup);
939	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_PrologEnd))
940	.setMIFlag(MachineInstr::FrameSetup);
941	MF.setHasWinCFI(true);
942	}
943	return;
944	}
945
946	// Determine spill area sizes, and some important frame indices.
947	SpillArea FramePtrSpillArea = SpillArea::GPRCS1;
948	bool BeforeFPPush = true;
949	for (const CalleeSavedInfo &I : CSI) {
950	MCRegister Reg = I.getReg();
951	int FI = I.getFrameIdx();
952
953	SpillArea Area = getSpillArea(Reg, Variation: PushPopSplit,
954	NumAlignedDPRCS2Regs: AFI->getNumAlignedDPRCS2Regs(), RegInfo);
955
956	if (Reg == FramePtr.asMCReg()) {
957	FramePtrSpillFI = FI;
958	FramePtrSpillArea = Area;
959	}
960	if (Reg == ARM::D8)
961	D8SpillFI = FI;
962
963	switch (Area) {
964	case SpillArea::FPCXT:
965	FPCXTSaveSize += `4`;
966	break;
967	case SpillArea::GPRCS1:
968	GPRCS1Size += `4`;
969	break;
970	case SpillArea::GPRCS2:
971	GPRCS2Size += `4`;
972	break;
973	case SpillArea::FPStatus:
974	FPStatusSize += `4`;
975	break;
976	case SpillArea::DPRCS1:
977	DPRCS1Size += `8`;
978	break;
979	case SpillArea::GPRCS3:
980	GPRCS3Size += `4`;
981	break;
982	case SpillArea::DPRCS2:
983	DPRCS2Size += `8`;
984	break;
985	}
986	}
987
988	MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push,
989	DPRCS1Push, GPRCS3Push;
990
991	// Move past the PAC computation.
992	if (AFI->shouldSignReturnAddress())
993	LastPush = MBBI ++;
994
995	// Move past FPCXT area.
996	if (FPCXTSaveSize > `0`) {
997	LastPush = MBBI ++;
998	DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: FPCXTSaveSize, BeforeFPSet: BeforeFPPush);
999	}
1000
1001	// Allocate the vararg register save area.
1002	if (ArgRegsSaveSize) {
1003	emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -ArgRegsSaveSize,
1004	MIFlags: MachineInstr::FrameSetup);
1005	LastPush = std::prev(x: MBBI);
1006	DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: ArgRegsSaveSize, BeforeFPSet: BeforeFPPush);
1007	}
1008
1009	// Move past area 1.
1010	if (GPRCS1Size > `0`) {
1011	GPRCS1Push = LastPush = MBBI ++;
1012	DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS1Size, BeforeFPSet: BeforeFPPush);
1013	if (FramePtrSpillArea == SpillArea::GPRCS1)
1014	BeforeFPPush = false;
1015	}
1016
1017	// Determine starting offsets of spill areas. These offsets are all positive
1018	// offsets from the bottom of the lowest-addressed callee-save area
1019	// (excluding DPRCS2, which is th the re-aligned stack region) to the bottom
1020	// of the spill area in question.
1021	unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
1022	unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
1023	unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
1024	unsigned FPStatusOffset = GPRCS2Offset - FPStatusSize;
1025
1026	Align DPRAlign = DPRCS1Size ? std::min(a: Align (`8`), b: Alignment) : Align (`4`);
1027	unsigned DPRGapSize = (ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1028	GPRCS2Size + FPStatusSize) %
1029	DPRAlign.value();
1030
1031	unsigned DPRCS1Offset = FPStatusOffset - DPRGapSize - DPRCS1Size;
1032
1033	if (HasFP) {
1034	// Offset from the CFA to the saved frame pointer, will be negative.
1035	[[maybe_unused]] int FPOffset = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI);
1036	LLVM_DEBUG(dbgs() << "FramePtrSpillFI: " << FramePtrSpillFI
1037	<< ", FPOffset: " << FPOffset << "\n");
1038	assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
1039	"Max FP estimation is wrong");
1040	AFI->setFramePtrSpillOffset(MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
1041	NumBytes);
1042	}
1043	AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
1044	AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
1045	AFI->setDPRCalleeSavedArea1Offset(DPRCS1Offset);
1046
1047	// Move past area 2.
1048	if (GPRCS2Size > `0`) {
1049	assert(PushPopSplit != ARMSubtarget::SplitR11WindowsSEH);
1050	GPRCS2Push = LastPush = MBBI ++;
1051	DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS2Size, BeforeFPSet: BeforeFPPush);
1052	if (FramePtrSpillArea == SpillArea::GPRCS2)
1053	BeforeFPPush = false;
1054	}
1055
1056	// Move past FP status save area.
1057	if (FPStatusSize > `0`) {
1058	while (MBBI != MBB.end()) {
1059	unsigned Opc = MBBI ->getOpcode();
1060	if (Opc == ARM::VMRS \|\| Opc == ARM::VMRS_FPEXC)
1061	MBBI ++;
1062	else
1063	break;
1064	}
1065	LastPush = MBBI ++;
1066	DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: FPStatusSize);
1067	}
1068
1069	// Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
1070	// .cfi_offset operations will reflect that.
1071	if (DPRGapSize) {
1072	assert(DPRGapSize == `4` && "unexpected alignment requirements for DPRs");
1073	if (LastPush != MBB.end() &&
1074	tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*LastPush, NumBytes: DPRGapSize))
1075	DefCFAOffsetCandidates.addExtraBytes(I: LastPush, ExtraBytes: DPRGapSize);
1076	else {
1077	emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -DPRGapSize,
1078	MIFlags: MachineInstr::FrameSetup);
1079	DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: DPRGapSize, BeforeFPSet: BeforeFPPush);
1080	}
1081	}
1082
1083	// Move past DPRCS1Size.
1084	if (DPRCS1Size > `0`) {
1085	// Since vpush register list cannot have gaps, there may be multiple vpush
1086	// instructions in the prologue.
1087	while (MBBI != MBB.end() && MBBI ->getOpcode() == ARM::VSTMDDB_UPD) {
1088	DefCFAOffsetCandidates.addInst(I: MBBI, SPAdjust: sizeOfSPAdjustment(MI: *MBBI),
1089	BeforeFPSet: BeforeFPPush);
1090	DPRCS1Push = LastPush = MBBI ++;
1091	}
1092	}
1093
1094	// Move past the aligned DPRCS2 area.
1095	if (DPRCS2Size > `0`) {
1096	MBBI = skipAlignedDPRCS2Spills(MI: MBBI, NumAlignedDPRCS2Regs: AFI->getNumAlignedDPRCS2Regs());
1097	// The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
1098	// leaves the stack pointer pointing to the DPRCS2 area.
1099	//
1100	// Adjust NumBytes to represent the stack slots below the DPRCS2 area.
1101	NumBytes += MFI.getObjectOffset(ObjectIdx: D8SpillFI);
1102	} else
1103	NumBytes = DPRCS1Offset;
1104
1105	// Move GPRCS3, if using using SplitR11WindowsSEH.
1106	if (GPRCS3Size > `0`) {
1107	assert(PushPopSplit == ARMSubtarget::SplitR11WindowsSEH);
1108	GPRCS3Push = LastPush = MBBI ++;
1109	DefCFAOffsetCandidates.addInst(I: LastPush, SPAdjust: GPRCS3Size, BeforeFPSet: BeforeFPPush);
1110	if (FramePtrSpillArea == SpillArea::GPRCS3)
1111	BeforeFPPush = false;
1112	}
1113
1114	bool NeedsWinCFIStackAlloc = NeedsWinCFI;
1115	if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH && HasFP)
1116	NeedsWinCFIStackAlloc = false;
1117
1118	if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, StackSizeInBytes: NumBytes)) {
1119	uint32_t NumWords = NumBytes >> `2`;
1120
1121	if (NumWords < `65536`) {
1122	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVi16), DestReg: ARM::R4)
1123	.addImm(Val: NumWords)
1124	.setMIFlags(MachineInstr::FrameSetup)
1125	.add(MOs: predOps(Pred: ARMCC::AL));
1126	} else {
1127	// Split into two instructions here, instead of using t2MOVi32imm,
1128	// to allow inserting accurate SEH instructions (including accurate
1129	// instruction size for each of them).
1130	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVi16), DestReg: ARM::R4)
1131	.addImm(Val: NumWords & `0xffff`)
1132	.setMIFlags(MachineInstr::FrameSetup)
1133	.add(MOs: predOps(Pred: ARMCC::AL));
1134	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVTi16), DestReg: ARM::R4)
1135	.addReg(RegNo: ARM::R4)
1136	.addImm(Val: NumWords >> `16`)
1137	.setMIFlags(MachineInstr::FrameSetup)
1138	.add(MOs: predOps(Pred: ARMCC::AL));
1139	}
1140
1141	switch (TM.getCodeModel()) {
1142	case CodeModel::Tiny:
1143	llvm_unreachable("Tiny code model not available on ARM.");
1144	case CodeModel::Small:
1145	case CodeModel::Medium:
1146	case CodeModel::Kernel:
1147	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tBL))
1148	.add(MOs: predOps(Pred: ARMCC::AL))
1149	.addExternalSymbol(FnName: "__chkstk")
1150	.addReg(RegNo: ARM::R4, flags: RegState::Implicit)
1151	.setMIFlags(MachineInstr::FrameSetup);
1152	break;
1153	case CodeModel::Large:
1154	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVi32imm), DestReg: ARM::R12)
1155	.addExternalSymbol(FnName: "__chkstk")
1156	.setMIFlags(MachineInstr::FrameSetup);
1157
1158	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tBLXr))
1159	.add(MOs: predOps(Pred: ARMCC::AL))
1160	.addReg(RegNo: ARM::R12, flags: RegState::Kill)
1161	.addReg(RegNo: ARM::R4, flags: RegState::Implicit)
1162	.setMIFlags(MachineInstr::FrameSetup);
1163	break;
1164	}
1165
1166	MachineInstrBuilder Instr, SEH;
1167	Instr = BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2SUBrr), DestReg: ARM::SP)
1168	.addReg(RegNo: ARM::SP, flags: RegState::Kill)
1169	.addReg(RegNo: ARM::R4, flags: RegState::Kill)
1170	.setMIFlags(MachineInstr::FrameSetup)
1171	.add(MOs: predOps(Pred: ARMCC::AL))
1172	.add(MO: condCodeOp());
1173	if (NeedsWinCFIStackAlloc) {
1174	SEH = BuildMI(MF, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_StackAlloc))
1175	.addImm(Val: NumBytes)
1176	.addImm(/Wide=/Val: `1`)
1177	.setMIFlags(MachineInstr::FrameSetup);
1178	MBB.insertAfter(I: Instr, MI: SEH);
1179	}
1180	NumBytes = `0`;
1181	}
1182
1183	if (NumBytes) {
1184	// Adjust SP after all the callee-save spills.
1185	if (AFI->getNumAlignedDPRCS2Regs() == `0` &&
1186	tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*LastPush, NumBytes))
1187	DefCFAOffsetCandidates.addExtraBytes(I: LastPush, ExtraBytes: NumBytes);
1188	else {
1189	emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: -NumBytes,
1190	MIFlags: MachineInstr::FrameSetup);
1191	DefCFAOffsetCandidates.addInst(I: std::prev(x: MBBI), SPAdjust: NumBytes);
1192	}
1193
1194	if (HasFP && isARM)
1195	// Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1196	// Note it's not safe to do this in Thumb2 mode because it would have
1197	// taken two instructions:
1198	// mov sp, r7
1199	// sub sp, #24
1200	// If an interrupt is taken between the two instructions, then sp is in
1201	// an inconsistent state (pointing to the middle of callee-saved area).
1202	// The interrupt handler can end up clobbering the registers.
1203	AFI->setShouldRestoreSPFromFP(true);
1204	}
1205
1206	// Set FP to point to the stack slot that contains the previous FP.
1207	// For iOS, FP is R7, which has now been stored in spill area 1.
1208	// Otherwise, if this is not iOS, all the callee-saved registers go
1209	// into spill area 1, including the FP in R11. In either case, it
1210	// is in area one and the adjustment needs to take place just after
1211	// that push.
1212	MachineBasicBlock::iterator AfterPush;
1213	if (HasFP) {
1214	MachineBasicBlock::iterator FPPushInst;
1215	// Offset from SP immediately after the push which saved the FP to the FP
1216	// save slot.
1217	int64_t FPOffsetAfterPush;
1218	switch (FramePtrSpillArea) {
1219	case SpillArea::GPRCS1:
1220	FPPushInst = GPRCS1Push;
1221	FPOffsetAfterPush = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
1222	ArgRegsSaveSize + FPCXTSaveSize +
1223	sizeOfSPAdjustment(MI: *FPPushInst);
1224	LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS1, offset "
1225	<< FPOffsetAfterPush << " after that push\n");
1226	break;
1227	case SpillArea::GPRCS2:
1228	FPPushInst = GPRCS2Push;
1229	FPOffsetAfterPush = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
1230	ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1231	sizeOfSPAdjustment(MI: *FPPushInst);
1232	LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS2, offset "
1233	<< FPOffsetAfterPush << " after that push\n");
1234	break;
1235	case SpillArea::GPRCS3:
1236	FPPushInst = GPRCS3Push;
1237	FPOffsetAfterPush = MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) +
1238	ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1239	FPStatusSize + GPRCS2Size + DPRCS1Size + DPRGapSize +
1240	sizeOfSPAdjustment(MI: *FPPushInst);
1241	LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS3, offset "
1242	<< FPOffsetAfterPush << " after that push\n");
1243	break;
1244	default:
1245	llvm_unreachable("frame pointer in unknown spill area");
1246	break;
1247	}
1248	AfterPush = std::next(x: FPPushInst);
1249	if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1250	assert(FPOffsetAfterPush == `0`);
1251
1252	// Emit the MOV or ADD to set up the frame pointer register.
1253	emitRegPlusImmediate(isARM: !AFI->isThumbFunction(), MBB, MBBI&: AfterPush, dl, TII,
1254	DestReg: FramePtr, SrcReg: ARM::SP, NumBytes: FPOffsetAfterPush,
1255	MIFlags: MachineInstr::FrameSetup);
1256
1257	if (!NeedsWinCFI) {
1258	// Emit DWARF info to find the CFA using the frame pointer from this
1259	// point onward.
1260	CFIInstBuilder CFIBuilder(MBB, AfterPush, MachineInstr::FrameSetup);
1261	if (FPOffsetAfterPush != `0`)
1262	CFIBuilder.buildDefCFA(Reg: FramePtr, Offset: -MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI));
1263	else
1264	CFIBuilder.buildDefCFARegister(Reg: FramePtr);
1265	}
1266	}
1267
1268	// Emit a SEH opcode indicating the prologue end. The rest of the prologue
1269	// instructions below don't need to be replayed to unwind the stack.
1270	if (NeedsWinCFI && MBBI != MBB.begin()) {
1271	MachineBasicBlock::iterator End = MBBI;
1272	if (HasFP && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1273	End = AfterPush;
1274	insertSEHRange(MBB, Start: {}, End, TII, MIFlags: MachineInstr::FrameSetup);
1275	BuildMI(BB&: MBB, I: End, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_PrologEnd))
1276	.setMIFlag(MachineInstr::FrameSetup);
1277	MF.setHasWinCFI(true);
1278	}
1279
1280	// Now that the prologue's actual instructions are finalised, we can insert
1281	// the necessary DWARF cf instructions to describe the situation. Start by
1282	// recording where each register ended up:
1283	if (!NeedsWinCFI) {
1284	for (const auto &Entry : reverse(C: CSI)) {
1285	MCRegister Reg = Entry.getReg();
1286	int FI = Entry.getFrameIdx();
1287	MachineBasicBlock::iterator CFIPos;
1288	switch (getSpillArea(Reg, Variation: PushPopSplit, NumAlignedDPRCS2Regs: AFI->getNumAlignedDPRCS2Regs(),
1289	RegInfo)) {
1290	case SpillArea::GPRCS1:
1291	CFIPos = std::next(x: GPRCS1Push);
1292	break;
1293	case SpillArea::GPRCS2:
1294	CFIPos = std::next(x: GPRCS2Push);
1295	break;
1296	case SpillArea::DPRCS1:
1297	CFIPos = std::next(x: DPRCS1Push);
1298	break;
1299	case SpillArea::GPRCS3:
1300	CFIPos = std::next(x: GPRCS3Push);
1301	break;
1302	case SpillArea::FPStatus:
1303	case SpillArea::FPCXT:
1304	case SpillArea::DPRCS2:
1305	// FPCXT and DPRCS2 are not represented in the DWARF info.
1306	break;
1307	}
1308
1309	if (CFIPos.isValid()) {
1310	CFIInstBuilder (MBB, CFIPos, MachineInstr::FrameSetup)
1311	.buildOffset(Reg: Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg,
1312	Offset: MFI.getObjectOffset(ObjectIdx: FI));
1313	}
1314	}
1315	}
1316
1317	// Now we can emit descriptions of where the canonical frame address was
1318	// throughout the process. If we have a frame pointer, it takes over the job
1319	// half-way through, so only the first few .cfi_def_cfa_offset instructions
1320	// actually get emitted.
1321	if (!NeedsWinCFI) {
1322	LLVM_DEBUG(DefCFAOffsetCandidates.dump());
1323	DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, HasFP);
1324	}
1325
1326	if (STI.isTargetELF() && hasFP(MF))
1327	MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
1328	AFI->getFramePtrSpillOffset());
1329
1330	AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1331	AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1332	AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1333	AFI->setFPStatusSavesSize(FPStatusSize);
1334	AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1335	AFI->setDPRCalleeSavedArea1Size(DPRCS1Size);
1336	AFI->setGPRCalleeSavedArea3Size(GPRCS3Size);
1337
1338	// If we need dynamic stack realignment, do it here. Be paranoid and make
1339	// sure if we also have VLAs, we have a base pointer for frame access.
1340	// If aligned NEON registers were spilled, the stack has already been
1341	// realigned.
1342	if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1343	Align MaxAlign = MFI.getMaxAlign();
1344	assert(!AFI->isThumb1OnlyFunction());
1345	if (!AFI->isThumbFunction()) {
1346	emitAligningInstructions(MF, AFI, TII, MBB, MBBI, DL: dl, Reg: ARM::SP, Alignment: MaxAlign,
1347	MustBeSingleInstruction: false);
1348	} else {
1349	// We cannot use sp as source/dest register here, thus we're using r4 to
1350	// perform the calculations. We're emitting the following sequence:
1351	// mov r4, sp
1352	// -- use emitAligningInstructions to produce best sequence to zero
1353	// -- out lower bits in r4
1354	// mov sp, r4
1355	// FIXME: It will be better just to find spare register here.
1356	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::R4)
1357	.addReg(RegNo: ARM::SP, flags: RegState::Kill)
1358	.add(MOs: predOps(Pred: ARMCC::AL));
1359	emitAligningInstructions(MF, AFI, TII, MBB, MBBI, DL: dl, Reg: ARM::R4, Alignment: MaxAlign,
1360	MustBeSingleInstruction: false);
1361	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP)
1362	.addReg(RegNo: ARM::R4, flags: RegState::Kill)
1363	.add(MOs: predOps(Pred: ARMCC::AL));
1364	}
1365
1366	AFI->setShouldRestoreSPFromFP(true);
1367	}
1368
1369	// If we need a base pointer, set it up here. It's whatever the value
1370	// of the stack pointer is at this point. Any variable size objects
1371	// will be allocated after this, so we can still use the base pointer
1372	// to reference locals.
1373	// FIXME: Clarify FrameSetup flags here.
1374	if (RegInfo->hasBasePointer(MF)) {
1375	if (isARM)
1376	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::MOVr), DestReg: RegInfo->getBaseRegister())
1377	.addReg(RegNo: ARM::SP)
1378	.add(MOs: predOps(Pred: ARMCC::AL))
1379	.add(MO: condCodeOp());
1380	else
1381	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: RegInfo->getBaseRegister())
1382	.addReg(RegNo: ARM::SP)
1383	.add(MOs: predOps(Pred: ARMCC::AL));
1384	}
1385
1386	// If the frame has variable sized objects then the epilogue must restore
1387	// the sp from fp. We can assume there's an FP here since hasFP already
1388	// checks for hasVarSizedObjects.
1389	if (MFI.hasVarSizedObjects())
1390	AFI->setShouldRestoreSPFromFP(true);
1391	}
1392
1393	void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
1394	MachineBasicBlock &MBB) const {
1395	MachineFrameInfo &MFI = MF.getFrameInfo();
1396	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1397	const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1398	const ARMBaseInstrInfo &TII =
1399	*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1400	assert(!AFI->isThumb1OnlyFunction() &&
1401	"This emitEpilogue does not support Thumb1!");
1402	bool isARM = !AFI->isThumbFunction();
1403	ARMSubtarget::PushPopSplitVariation PushPopSplit =
1404	STI.getPushPopSplitVariation(MF);
1405
1406	LLVM_DEBUG(dbgs() << "Emitting epilogue for " << MF.getName() << "\n");
1407
1408	// Amount of stack space we reserved next to incoming args for either
1409	// varargs registers or stack arguments in tail calls made by this function.
1410	unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1411
1412	// How much of the stack used by incoming arguments this function is expected
1413	// to restore in this particular epilogue.
1414	int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1415	int NumBytes = (int)MFI.getStackSize();
1416	Register FramePtr = RegInfo->getFrameRegister(MF);
1417
1418	// All calls are tail calls in GHC calling conv, and functions have no
1419	// prologue/epilogue.
1420	if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1421	return;
1422
1423	// First put ourselves on the first (from top) terminator instructions.
1424	MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1425	DebugLoc dl = MBBI != MBB.end() ? MBBI ->getDebugLoc() : DebugLoc ();
1426
1427	MachineBasicBlock::iterator RangeStart;
1428	if (!AFI->hasStackFrame()) {
1429	if (MF.hasWinCFI()) {
1430	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_EpilogStart))
1431	.setMIFlag(MachineInstr::FrameDestroy);
1432	RangeStart = initMBBRange(MBB, MBBI);
1433	}
1434
1435	if (NumBytes + IncomingArgStackToRestore != `0`)
1436	emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1437	NumBytes: NumBytes + IncomingArgStackToRestore,
1438	MIFlags: MachineInstr::FrameDestroy);
1439	} else {
1440	// Unwind MBBI to point to first LDR / VLDRD.
1441	if (MBBI != MBB.begin()) {
1442	do {
1443	--MBBI;
1444	} while (MBBI != MBB.begin() &&
1445	MBBI ->getFlag(Flag: MachineInstr::FrameDestroy));
1446	if (!MBBI ->getFlag(Flag: MachineInstr::FrameDestroy))
1447	++MBBI;
1448	}
1449
1450	if (MF.hasWinCFI()) {
1451	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_EpilogStart))
1452	.setMIFlag(MachineInstr::FrameDestroy);
1453	RangeStart = initMBBRange(MBB, MBBI);
1454	}
1455
1456	// Move SP to start of FP callee save spill area.
1457	NumBytes -=
1458	(ReservedArgStack + AFI->getFPCXTSaveAreaSize() +
1459	AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() +
1460	AFI->getFPStatusSavesSize() + AFI->getDPRCalleeSavedGapSize() +
1461	AFI->getDPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea3Size());
1462
1463	// Reset SP based on frame pointer only if the stack frame extends beyond
1464	// frame pointer stack slot or target is ELF and the function has FP.
1465	if (AFI->shouldRestoreSPFromFP()) {
1466	NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1467	if (NumBytes) {
1468	if (isARM)
1469	emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg: ARM::SP, BaseReg: FramePtr, NumBytes: -NumBytes,
1470	Pred: ARMCC::AL, PredReg: `0`, TII,
1471	MIFlags: MachineInstr::FrameDestroy);
1472	else {
1473	// It's not possible to restore SP from FP in a single instruction.
1474	// For iOS, this looks like:
1475	// mov sp, r7
1476	// sub sp, #24
1477	// This is bad, if an interrupt is taken after the mov, sp is in an
1478	// inconsistent state.
1479	// Use the first callee-saved register as a scratch register.
1480	assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1481	"No scratch register to restore SP from FP!");
1482	emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg: ARM::R4, BaseReg: FramePtr, NumBytes: -NumBytes,
1483	Pred: ARMCC::AL, PredReg: `0`, TII, MIFlags: MachineInstr::FrameDestroy);
1484	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP)
1485	.addReg(RegNo: ARM::R4)
1486	.add(MOs: predOps(Pred: ARMCC::AL))
1487	.setMIFlag(MachineInstr::FrameDestroy);
1488	}
1489	} else {
1490	// Thumb2 or ARM.
1491	if (isARM)
1492	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::MOVr), DestReg: ARM::SP)
1493	.addReg(RegNo: FramePtr)
1494	.add(MOs: predOps(Pred: ARMCC::AL))
1495	.add(MO: condCodeOp())
1496	.setMIFlag(MachineInstr::FrameDestroy);
1497	else
1498	BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP)
1499	.addReg(RegNo: FramePtr)
1500	.add(MOs: predOps(Pred: ARMCC::AL))
1501	.setMIFlag(MachineInstr::FrameDestroy);
1502	}
1503	} else if (NumBytes &&
1504	!tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*MBBI, NumBytes))
1505	emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1506	MIFlags: MachineInstr::FrameDestroy);
1507
1508	// Increment past our save areas.
1509	if (AFI->getGPRCalleeSavedArea3Size()) {
1510	assert(PushPopSplit == ARMSubtarget::SplitR11WindowsSEH);
1511	(void)PushPopSplit;
1512	MBBI ++;
1513	}
1514
1515	if (MBBI != MBB.end() && AFI->getDPRCalleeSavedArea1Size()) {
1516	MBBI ++;
1517	// Since vpop register list cannot have gaps, there may be multiple vpop
1518	// instructions in the epilogue.
1519	while (MBBI != MBB.end() && MBBI ->getOpcode() == ARM::VLDMDIA_UPD)
1520	MBBI ++;
1521	}
1522	if (AFI->getDPRCalleeSavedGapSize()) {
1523	assert(AFI->getDPRCalleeSavedGapSize() == `4` &&
1524	"unexpected DPR alignment gap");
1525	emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes: AFI->getDPRCalleeSavedGapSize(),
1526	MIFlags: MachineInstr::FrameDestroy);
1527	}
1528
1529	if (AFI->getGPRCalleeSavedArea2Size()) {
1530	assert(PushPopSplit != ARMSubtarget::SplitR11WindowsSEH);
1531	(void)PushPopSplit;
1532	MBBI ++;
1533	}
1534	if (AFI->getGPRCalleeSavedArea1Size()) MBBI ++;
1535
1536	if (ReservedArgStack \|\| IncomingArgStackToRestore) {
1537	assert((int)ReservedArgStack + IncomingArgStackToRestore >= `0` &&
1538	"attempting to restore negative stack amount");
1539	emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1540	NumBytes: ReservedArgStack + IncomingArgStackToRestore,
1541	MIFlags: MachineInstr::FrameDestroy);
1542	}
1543
1544	// Validate PAC, It should have been already popped into R12. For CMSE entry
1545	// function, the validation instruction is emitted during expansion of the
1546	// tBXNS_RET, since the validation must use the value of SP at function
1547	// entry, before saving, resp. after restoring, FPCXTNS.
1548	if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
1549	BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc (), MCID: STI.getInstrInfo()->get(Opcode: ARM::t2AUT));
1550	}
1551
1552	if (MF.hasWinCFI()) {
1553	insertSEHRange(MBB, Start: RangeStart, End: MBB.end(), TII, MIFlags: MachineInstr::FrameDestroy);
1554	BuildMI(BB&: MBB, I: MBB.end(), MIMD: dl, MCID: TII.get(Opcode: ARM::SEH_EpilogEnd))
1555	.setMIFlag(MachineInstr::FrameDestroy);
1556	}
1557	}
1558
1559	/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1560	/// debug info. It's the same as what we use for resolving the code-gen
1561	/// references for now. FIXME: This can go wrong when references are
1562	/// SP-relative and simple call frames aren't used.
1563	StackOffset ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1564	int FI,
1565	Register &FrameReg) const {
1566	return StackOffset::getFixed(Fixed: ResolveFrameIndexReference(MF, FI, FrameReg, SPAdj: `0`));
1567	}
1568
1569	int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
1570	int FI, Register &FrameReg,
1571	int SPAdj) const {
1572	const MachineFrameInfo &MFI = MF.getFrameInfo();
1573	const ARMBaseRegisterInfo RegInfo = static_cast<const* ARMBaseRegisterInfo *>(
1574	MF.getSubtarget().getRegisterInfo());
1575	const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1576	int Offset = MFI.getObjectOffset(ObjectIdx: FI) + MFI.getStackSize();
1577	int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1578	bool isFixed = MFI.isFixedObjectIndex(ObjectIdx: FI);
1579
1580	FrameReg = ARM::SP;
1581	Offset += SPAdj;
1582
1583	// SP can move around if there are allocas. We may also lose track of SP
1584	// when emergency spilling inside a non-reserved call frame setup.
1585	bool hasMovingSP = !hasReservedCallFrame(MF);
1586
1587	// When dynamically realigning the stack, use the frame pointer for
1588	// parameters, and the stack/base pointer for locals.
1589	if (RegInfo->hasStackRealignment(MF)) {
1590	assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1591	if (isFixed) {
1592	FrameReg = RegInfo->getFrameRegister(MF);
1593	Offset = FPOffset;
1594	} else if (hasMovingSP) {
1595	assert(RegInfo->hasBasePointer(MF) &&
1596	"VLAs and dynamic stack alignment, but missing base pointer!");
1597	FrameReg = RegInfo->getBaseRegister();
1598	Offset -= SPAdj;
1599	}
1600	return Offset;
1601	}
1602
1603	// If there is a frame pointer, use it when we can.
1604	if (hasFP(MF) && AFI->hasStackFrame()) {
1605	// Use frame pointer to reference fixed objects. Use it for locals if
1606	// there are VLAs (and thus the SP isn't reliable as a base).
1607	if (isFixed \|\| (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1608	FrameReg = RegInfo->getFrameRegister(MF);
1609	return FPOffset;
1610	} else if (hasMovingSP) {
1611	assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1612	if (AFI->isThumb2Function()) {
1613	// Try to use the frame pointer if we can, else use the base pointer
1614	// since it's available. This is handy for the emergency spill slot, in
1615	// particular.
1616	if (FPOffset >= -`255` && FPOffset < `0`) {
1617	FrameReg = RegInfo->getFrameRegister(MF);
1618	return FPOffset;
1619	}
1620	}
1621	} else if (AFI->isThumbFunction()) {
1622	// Prefer SP to base pointer, if the offset is suitably aligned and in
1623	// range as the effective range of the immediate offset is bigger when
1624	// basing off SP.
1625	// Use add <rd>, sp, #<imm8>
1626	// ldr <rd>, [sp, #<imm8>]
1627	if (Offset >= `0` && (Offset & `3`) == `0` && Offset <= `1020`)
1628	return Offset;
1629	// In Thumb2 mode, the negative offset is very limited. Try to avoid
1630	// out of range references. ldr <rt>,[<rn>, #-<imm8>]
1631	if (AFI->isThumb2Function() && FPOffset >= -`255` && FPOffset < `0`) {
1632	FrameReg = RegInfo->getFrameRegister(MF);
1633	return FPOffset;
1634	}
1635	} else if (Offset > (FPOffset < `0` ? -FPOffset : FPOffset)) {
1636	// Otherwise, use SP or FP, whichever is closer to the stack slot.
1637	FrameReg = RegInfo->getFrameRegister(MF);
1638	return FPOffset;
1639	}
1640	}
1641	// Use the base pointer if we have one.
1642	// FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1643	// That can happen if we forced a base pointer for a large call frame.
1644	if (RegInfo->hasBasePointer(MF)) {
1645	FrameReg = RegInfo->getBaseRegister();
1646	Offset -= SPAdj;
1647	}
1648	return Offset;
1649	}
1650
1651	void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1652	MachineBasicBlock::iterator MI,
1653	ArrayRef<CalleeSavedInfo> CSI,
1654	unsigned StmOpc, unsigned StrOpc,
1655	bool NoGap,
1656	function_ref<bool(unsigned)> Func) const {
1657	MachineFunction &MF = *MBB.getParent();
1658	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1659	const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1660
1661	DebugLoc DL;
1662
1663	using RegAndKill = std::pair<unsigned, bool>;
1664
1665	SmallVector<RegAndKill, `4`> Regs;
1666	unsigned i = CSI.size();
1667	while (i != `0`) {
1668	unsigned LastReg = `0`;
1669	for (; i != `0`; --i) {
1670	MCRegister Reg = CSI [i-`1`].getReg();
1671	if (!Func (Reg))
1672	continue;
1673
1674	const MachineRegisterInfo &MRI = MF.getRegInfo();
1675	bool isLiveIn = MRI.isLiveIn(Reg);
1676	if (!isLiveIn && !MRI.isReserved(PhysReg: Reg))
1677	MBB.addLiveIn(PhysReg: Reg);
1678	// If NoGap is true, push consecutive registers and then leave the rest
1679	// for other instructions. e.g.
1680	// vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1681	if (NoGap && LastReg && LastReg != Reg-`1`)
1682	break;
1683	LastReg = Reg;
1684	// Do not set a kill flag on values that are also marked as live-in. This
1685	// happens with the @llvm-returnaddress intrinsic and with arguments
1686	// passed in callee saved registers.
1687	// Omitting the kill flags is conservatively correct even if the live-in
1688	// is not used after all.
1689	Regs.push_back(Elt: std::make_pair(x&: Reg, /isKill=/y: !isLiveIn));
1690	}
1691
1692	if (Regs.empty())
1693	continue;
1694
1695	llvm::sort(C&: Regs, Comp: [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1696	return TRI.getEncodingValue(Reg: LHS.first) < TRI.getEncodingValue(Reg: RHS.first);
1697	});
1698
1699	if (Regs.size() > `1` \|\| StrOpc== `0`) {
1700	MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: StmOpc), DestReg: ARM::SP)
1701	.addReg(RegNo: ARM::SP)
1702	.setMIFlags(MachineInstr::FrameSetup)
1703	.add(MOs: predOps(Pred: ARMCC::AL));
1704	for (const auto &[Reg, Kill] : Regs)
1705	MIB.addReg(RegNo: Reg, flags: getKillRegState(B: Kill));
1706	} else if (Regs.size() == `1`) {
1707	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: StrOpc), DestReg: ARM::SP)
1708	.addReg(RegNo: Regs [`0`].first, flags: getKillRegState(B: Regs [`0`].second))
1709	.addReg(RegNo: ARM::SP)
1710	.setMIFlags(MachineInstr::FrameSetup)
1711	.addImm(Val: -`4`)
1712	.add(MOs: predOps(Pred: ARMCC::AL));
1713	}
1714	Regs.clear();
1715
1716	// Put any subsequent vpush instructions before this one: they will refer to
1717	// higher register numbers so need to be pushed first in order to preserve
1718	// monotonicity.
1719	if (MI != MBB.begin())
1720	--MI;
1721	}
1722	}
1723
1724	void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1725	MachineBasicBlock::iterator MI,
1726	MutableArrayRef<CalleeSavedInfo> CSI,
1727	unsigned LdmOpc, unsigned LdrOpc,
1728	bool isVarArg, bool NoGap,
1729	function_ref<bool(unsigned)> Func) const {
1730	MachineFunction &MF = *MBB.getParent();
1731	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1732	const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1733	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1734	bool hasPAC = AFI->shouldSignReturnAddress();
1735	DebugLoc DL;
1736	bool isTailCall = false;
1737	bool isInterrupt = false;
1738	bool isTrap = false;
1739	bool isCmseEntry = false;
1740	ARMSubtarget::PushPopSplitVariation PushPopSplit =
1741	STI.getPushPopSplitVariation(MF);
1742	if (MBB.end() != MI) {
1743	DL = MI ->getDebugLoc();
1744	unsigned RetOpcode = MI ->getOpcode();
1745	isTailCall =
1746	(RetOpcode == ARM::TCRETURNdi \|\| RetOpcode == ARM::TCRETURNri \|\|
1747	RetOpcode == ARM::TCRETURNrinotr12);
1748	isInterrupt =
1749	RetOpcode == ARM::SUBS_PC_LR \|\| RetOpcode == ARM::t2SUBS_PC_LR;
1750	isTrap =
1751	RetOpcode == ARM::TRAP \|\| RetOpcode == ARM::TRAPNaCl \|\|
1752	RetOpcode == ARM::tTRAP;
1753	isCmseEntry = (RetOpcode == ARM::tBXNS \|\| RetOpcode == ARM::tBXNS_RET);
1754	}
1755
1756	SmallVector<unsigned, `4`> Regs;
1757	unsigned i = CSI.size();
1758	while (i != `0`) {
1759	unsigned LastReg = `0`;
1760	bool DeleteRet = false;
1761	for (; i != `0`; --i) {
1762	CalleeSavedInfo &Info = CSI [i-`1`];
1763	MCRegister Reg = Info.getReg();
1764	if (!Func (Reg))
1765	continue;
1766
1767	if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1768	!isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == `0` &&
1769	STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1770	(PushPopSplit != ARMSubtarget::SplitR11WindowsSEH &&
1771	PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) {
1772	Reg = ARM::PC;
1773	// Fold the return instruction into the LDM.
1774	DeleteRet = true;
1775	LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1776	}
1777
1778	// If NoGap is true, pop consecutive registers and then leave the rest
1779	// for other instructions. e.g.
1780	// vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1781	if (NoGap && LastReg && LastReg != Reg-`1`)
1782	break;
1783
1784	LastReg = Reg;
1785	Regs.push_back(Elt: Reg);
1786	}
1787
1788	if (Regs.empty())
1789	continue;
1790
1791	llvm::sort(C&: Regs, Comp: [&](unsigned LHS, unsigned RHS) {
1792	return TRI.getEncodingValue(Reg: LHS) < TRI.getEncodingValue(Reg: RHS);
1793	});
1794
1795	if (Regs.size() > `1` \|\| LdrOpc == `0`) {
1796	MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: LdmOpc), DestReg: ARM::SP)
1797	.addReg(RegNo: ARM::SP)
1798	.add(MOs: predOps(Pred: ARMCC::AL))
1799	.setMIFlags(MachineInstr::FrameDestroy);
1800	for (unsigned Reg : Regs)
1801	MIB.addReg(RegNo: Reg, flags: getDefRegState(B: true));
1802	if (DeleteRet) {
1803	if (MI != MBB.end()) {
1804	MIB.copyImplicitOps(OtherMI: *MI);
1805	MI ->eraseFromParent();
1806	}
1807	}
1808	MI = MIB;
1809	} else if (Regs.size() == `1`) {
1810	// If we adjusted the reg to PC from LR above, switch it back here. We
1811	// only do that for LDM.
1812	if (Regs [`0`] == ARM::PC)
1813	Regs [`0`] = ARM::LR;
1814	MachineInstrBuilder MIB =
1815	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: LdrOpc), DestReg: Regs [`0`])
1816	.addReg(RegNo: ARM::SP, flags: RegState::Define)
1817	.addReg(RegNo: ARM::SP)
1818	.setMIFlags(MachineInstr::FrameDestroy);
1819	// ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1820	// that refactoring is complete (eventually).
1821	if (LdrOpc == ARM::LDR_POST_REG \|\| LdrOpc == ARM::LDR_POST_IMM) {
1822	MIB.addReg(RegNo: `0`);
1823	MIB.addImm(Val: ARM_AM::getAM2Opc(Opc: ARM_AM::add, Imm12: `4`, SO: ARM_AM::no_shift));
1824	} else
1825	MIB.addImm(Val: `4`);
1826	MIB.add(MOs: predOps(Pred: ARMCC::AL));
1827	}
1828	Regs.clear();
1829
1830	// Put any subsequent vpop instructions after this one: they will refer to
1831	// higher register numbers so need to be popped afterwards.
1832	if (MI != MBB.end())
1833	++MI;
1834	}
1835	}
1836
1837	void ARMFrameLowering::emitFPStatusSaves(MachineBasicBlock &MBB,
1838	MachineBasicBlock::iterator MI,
1839	ArrayRef<CalleeSavedInfo> CSI,
1840	unsigned PushOpc) const {
1841	MachineFunction &MF = *MBB.getParent();
1842	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1843
1844	SmallVector<MCRegister> Regs;
1845	auto RegPresent = [&CSI](MCRegister Reg) {
1846	return llvm::any_of(Range&: CSI, P: [Reg](const CalleeSavedInfo &C) {
1847	return C.getReg() == Reg;
1848	});
1849	};
1850
1851	// If we need to save FPSCR, then we must move FPSCR into R4 with the VMRS
1852	// instruction.
1853	if (RegPresent (ARM::FPSCR)) {
1854	BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc (), MCID: TII.get(Opcode: ARM::VMRS), DestReg: ARM::R4)
1855	.add(MOs: predOps(Pred: ARMCC::AL))
1856	.setMIFlags(MachineInstr::FrameSetup);
1857
1858	Regs.push_back(Elt: ARM::R4);
1859	}
1860
1861	// If we need to save FPEXC, then we must move FPEXC into R5 with the
1862	// VMRS_FPEXC instruction.
1863	if (RegPresent (ARM::FPEXC)) {
1864	BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc (), MCID: TII.get(Opcode: ARM::VMRS_FPEXC), DestReg: ARM::R5)
1865	.add(MOs: predOps(Pred: ARMCC::AL))
1866	.setMIFlags(MachineInstr::FrameSetup);
1867
1868	Regs.push_back(Elt: ARM::R5);
1869	}
1870
1871	// If neither FPSCR and FPEXC are present, then do nothing.
1872	if (Regs.size() == `0`)
1873	return;
1874
1875	// Push both R4 and R5 onto the stack, if present.
1876	MachineInstrBuilder MIB =
1877	BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc (), MCID: TII.get(Opcode: PushOpc), DestReg: ARM::SP)
1878	.addReg(RegNo: ARM::SP)
1879	.add(MOs: predOps(Pred: ARMCC::AL))
1880	.setMIFlags(MachineInstr::FrameSetup);
1881
1882	for (Register Reg : Regs) {
1883	MIB.addReg(RegNo: Reg);
1884	}
1885	}
1886
1887	void ARMFrameLowering::emitFPStatusRestores(
1888	MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1889	MutableArrayRef<CalleeSavedInfo> CSI, unsigned LdmOpc) const {
1890	MachineFunction &MF = *MBB.getParent();
1891	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1892
1893	auto RegPresent = [&CSI](MCRegister Reg) {
1894	return llvm::any_of(Range&: CSI, P: [Reg](const CalleeSavedInfo &C) {
1895	return C.getReg() == Reg;
1896	});
1897	};
1898
1899	// Do nothing if we don't need to restore any FP status registers.
1900	if (!RegPresent (ARM::FPSCR) && !RegPresent (ARM::FPEXC))
1901	return;
1902
1903	// Pop registers off of the stack.
1904	MachineInstrBuilder MIB =
1905	BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc (), MCID: TII.get(Opcode: LdmOpc), DestReg: ARM::SP)
1906	.addReg(RegNo: ARM::SP)
1907	.add(MOs: predOps(Pred: ARMCC::AL))
1908	.setMIFlags(MachineInstr::FrameDestroy);
1909
1910	// If FPSCR was saved, it will be popped into R4.
1911	if (RegPresent (ARM::FPSCR)) {
1912	MIB.addReg(RegNo: ARM::R4, flags: RegState::Define);
1913	}
1914
1915	// If FPEXC was saved, it will be popped into R5.
1916	if (RegPresent (ARM::FPEXC)) {
1917	MIB.addReg(RegNo: ARM::R5, flags: RegState::Define);
1918	}
1919
1920	// Move the FPSCR value back into the register with the VMSR instruction.
1921	if (RegPresent (ARM::FPSCR)) {
1922	BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc (), MCID: STI.getInstrInfo()->get(Opcode: ARM::VMSR))
1923	.addReg(RegNo: ARM::R4)
1924	.add(MOs: predOps(Pred: ARMCC::AL))
1925	.setMIFlags(MachineInstr::FrameDestroy);
1926	}
1927
1928	// Move the FPEXC value back into the register with the VMSR_FPEXC
1929	// instruction.
1930	if (RegPresent (ARM::FPEXC)) {
1931	BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc (), MCID: STI.getInstrInfo()->get(Opcode: ARM::VMSR_FPEXC))
1932	.addReg(RegNo: ARM::R5)
1933	.add(MOs: predOps(Pred: ARMCC::AL))
1934	.setMIFlags(MachineInstr::FrameDestroy);
1935	}
1936	}
1937
1938	/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1939	/// starting from d8. Also insert stack realignment code and leave the stack
1940	/// pointer pointing to the d8 spill slot.
1941	static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
1942	MachineBasicBlock::iterator MI,
1943	unsigned NumAlignedDPRCS2Regs,
1944	ArrayRef<CalleeSavedInfo> CSI,
1945	const TargetRegisterInfo *TRI) {
1946	MachineFunction &MF = *MBB.getParent();
1947	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1948	DebugLoc DL = MI != MBB.end() ? MI ->getDebugLoc() : DebugLoc ();
1949	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1950	MachineFrameInfo &MFI = MF.getFrameInfo();
1951
1952	// Mark the D-register spill slots as properly aligned. Since MFI computes
1953	// stack slot layout backwards, this can actually mean that the d-reg stack
1954	// slot offsets can be wrong. The offset for d8 will always be correct.
1955	for (const CalleeSavedInfo &I : CSI) {
1956	unsigned DNum = I.getReg() - ARM::D8;
1957	if (DNum > NumAlignedDPRCS2Regs - `1`)
1958	continue;
1959	int FI = I.getFrameIdx();
1960	// The even-numbered registers will be 16-byte aligned, the odd-numbered
1961	// registers will be 8-byte aligned.
1962	MFI.setObjectAlignment(ObjectIdx: FI, Alignment: DNum % `2` ? Align (`8`) : Align (`16`));
1963
1964	// The stack slot for D8 needs to be maximally aligned because this is
1965	// actually the point where we align the stack pointer. MachineFrameInfo
1966	// computes all offsets relative to the incoming stack pointer which is a
1967	// bit weird when realigning the stack. Any extra padding for this
1968	// over-alignment is not realized because the code inserted below adjusts
1969	// the stack pointer by numregs 8 before aligning the stack pointer.*
1970	if (DNum == `0`)
1971	MFI.setObjectAlignment(ObjectIdx: FI, Alignment: MFI.getMaxAlign());
1972	}
1973
1974	// Move the stack pointer to the d8 spill slot, and align it at the same
1975	// time. Leave the stack slot address in the scratch register r4.
1976	//
1977	// sub r4, sp, #numregs 8*
1978	// bic r4, r4, #align - 1
1979	// mov sp, r4
1980	//
1981	bool isThumb = AFI->isThumbFunction();
1982	assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1983	AFI->setShouldRestoreSPFromFP(true);
1984
1985	// sub r4, sp, #numregs 8*
1986	// The immediate is <= 64, so it doesn't need any special encoding.
1987	unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1988	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: ARM::R4)
1989	.addReg(RegNo: ARM::SP)
1990	.addImm(Val: `8` * NumAlignedDPRCS2Regs)
1991	.add(MOs: predOps(Pred: ARMCC::AL))
1992	.add(MO: condCodeOp());
1993
1994	Align MaxAlign = MF.getFrameInfo().getMaxAlign();
1995	// We must set parameter MustBeSingleInstruction to true, since
1996	// skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1997	// stack alignment. Luckily, this can always be done since all ARM
1998	// architecture versions that support Neon also support the BFC
1999	// instruction.
2000	emitAligningInstructions(MF, AFI, TII, MBB, MBBI: MI, DL, Reg: ARM::R4, Alignment: MaxAlign, MustBeSingleInstruction: true);
2001
2002	// mov sp, r4
2003	// The stack pointer must be adjusted before spilling anything, otherwise
2004	// the stack slots could be clobbered by an interrupt handler.
2005	// Leave r4 live, it is used below.
2006	Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
2007	MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: ARM::SP)
2008	.addReg(RegNo: ARM::R4)
2009	.add(MOs: predOps(Pred: ARMCC::AL));
2010	if (!isThumb)
2011	MIB.add(MO: condCodeOp());
2012
2013	// Now spill NumAlignedDPRCS2Regs registers starting from d8.
2014	// r4 holds the stack slot address.
2015	unsigned NextReg = ARM::D8;
2016
2017	// 16-byte aligned vst1.64 with 4 d-regs and address writeback.
2018	// The writeback is only needed when emitting two vst1.64 instructions.
2019	if (NumAlignedDPRCS2Regs >= `6`) {
2020	MCRegister SupReg =
2021	TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QQPRRegClass);
2022	MBB.addLiveIn(PhysReg: SupReg);
2023	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VST1d64Qwb_fixed), DestReg: ARM::R4)
2024	.addReg(RegNo: ARM::R4, flags: RegState::Kill)
2025	.addImm(Val: `16`)
2026	.addReg(RegNo: NextReg)
2027	.addReg(RegNo: SupReg, flags: RegState::ImplicitKill)
2028	.add(MOs: predOps(Pred: ARMCC::AL));
2029	NextReg += `4`;
2030	NumAlignedDPRCS2Regs -= `4`;
2031	}
2032
2033	// We won't modify r4 beyond this point. It currently points to the next
2034	// register to be spilled.
2035	unsigned R4BaseReg = NextReg;
2036
2037	// 16-byte aligned vst1.64 with 4 d-regs, no writeback.
2038	if (NumAlignedDPRCS2Regs >= `4`) {
2039	MCRegister SupReg =
2040	TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QQPRRegClass);
2041	MBB.addLiveIn(PhysReg: SupReg);
2042	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VST1d64Q))
2043	.addReg(RegNo: ARM::R4)
2044	.addImm(Val: `16`)
2045	.addReg(RegNo: NextReg)
2046	.addReg(RegNo: SupReg, flags: RegState::ImplicitKill)
2047	.add(MOs: predOps(Pred: ARMCC::AL));
2048	NextReg += `4`;
2049	NumAlignedDPRCS2Regs -= `4`;
2050	}
2051
2052	// 16-byte aligned vst1.64 with 2 d-regs.
2053	if (NumAlignedDPRCS2Regs >= `2`) {
2054	MCRegister SupReg =
2055	TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QPRRegClass);
2056	MBB.addLiveIn(PhysReg: SupReg);
2057	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VST1q64))
2058	.addReg(RegNo: ARM::R4)
2059	.addImm(Val: `16`)
2060	.addReg(RegNo: SupReg)
2061	.add(MOs: predOps(Pred: ARMCC::AL));
2062	NextReg += `2`;
2063	NumAlignedDPRCS2Regs -= `2`;
2064	}
2065
2066	// Finally, use a vanilla vstr.64 for the odd last register.
2067	if (NumAlignedDPRCS2Regs) {
2068	MBB.addLiveIn(PhysReg: NextReg);
2069	// vstr.64 uses addrmode5 which has an offset scale of 4.
2070	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VSTRD))
2071	.addReg(RegNo: NextReg)
2072	.addReg(RegNo: ARM::R4)
2073	.addImm(Val: (NextReg - R4BaseReg) * `2`)
2074	.add(MOs: predOps(Pred: ARMCC::AL));
2075	}
2076
2077	// The last spill instruction inserted should kill the scratch register r4.
2078	std::prev(x: MI)->addRegisterKilled(IncomingReg: ARM::R4, RegInfo: TRI);
2079	}
2080
2081	/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
2082	/// iterator to the following instruction.
2083	static MachineBasicBlock::iterator
2084	skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
2085	unsigned NumAlignedDPRCS2Regs) {
2086	// sub r4, sp, #numregs 8*
2087	// bic r4, r4, #align - 1
2088	// mov sp, r4
2089	++MI; ++MI; ++MI;
2090	assert(MI->mayStore() && "Expecting spill instruction");
2091
2092	// These switches all fall through.
2093	switch(NumAlignedDPRCS2Regs) {
2094	case `7`:
2095	++MI;
2096	assert(MI->mayStore() && "Expecting spill instruction");
2097	[[fallthrough]];
2098	default:
2099	++MI;
2100	assert(MI->mayStore() && "Expecting spill instruction");
2101	[[fallthrough]];
2102	case `1`:
2103	case `2`:
2104	case `4`:
2105	assert(MI->killsRegister(ARM::R4, /TRI=/nullptr) && "Missed kill flag");
2106	++MI;
2107	}
2108	return MI;
2109	}
2110
2111	/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
2112	/// starting from d8. These instructions are assumed to execute while the
2113	/// stack is still aligned, unlike the code inserted by emitPopInst.
2114	static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
2115	MachineBasicBlock::iterator MI,
2116	unsigned NumAlignedDPRCS2Regs,
2117	ArrayRef<CalleeSavedInfo> CSI,
2118	const TargetRegisterInfo *TRI) {
2119	MachineFunction &MF = *MBB.getParent();
2120	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2121	DebugLoc DL = MI != MBB.end() ? MI ->getDebugLoc() : DebugLoc ();
2122	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2123
2124	// Find the frame index assigned to d8.
2125	int D8SpillFI = `0`;
2126	for (const CalleeSavedInfo &I : CSI)
2127	if (I.getReg() == ARM::D8) {
2128	D8SpillFI = I.getFrameIdx();
2129	break;
2130	}
2131
2132	// Materialize the address of the d8 spill slot into the scratch register r4.
2133	// This can be fairly complicated if the stack frame is large, so just use
2134	// the normal frame index elimination mechanism to do it. This code runs as
2135	// the initial part of the epilog where the stack and base pointers haven't
2136	// been changed yet.
2137	bool isThumb = AFI->isThumbFunction();
2138	assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
2139
2140	unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
2141	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: Opc), DestReg: ARM::R4)
2142	.addFrameIndex(Idx: D8SpillFI)
2143	.addImm(Val: `0`)
2144	.add(MOs: predOps(Pred: ARMCC::AL))
2145	.add(MO: condCodeOp());
2146
2147	// Now restore NumAlignedDPRCS2Regs registers starting from d8.
2148	unsigned NextReg = ARM::D8;
2149
2150	// 16-byte aligned vld1.64 with 4 d-regs and writeback.
2151	if (NumAlignedDPRCS2Regs >= `6`) {
2152	MCRegister SupReg =
2153	TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QQPRRegClass);
2154	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLD1d64Qwb_fixed), DestReg: NextReg)
2155	.addReg(RegNo: ARM::R4, flags: RegState::Define)
2156	.addReg(RegNo: ARM::R4, flags: RegState::Kill)
2157	.addImm(Val: `16`)
2158	.addReg(RegNo: SupReg, flags: RegState::ImplicitDefine)
2159	.add(MOs: predOps(Pred: ARMCC::AL));
2160	NextReg += `4`;
2161	NumAlignedDPRCS2Regs -= `4`;
2162	}
2163
2164	// We won't modify r4 beyond this point. It currently points to the next
2165	// register to be spilled.
2166	unsigned R4BaseReg = NextReg;
2167
2168	// 16-byte aligned vld1.64 with 4 d-regs, no writeback.
2169	if (NumAlignedDPRCS2Regs >= `4`) {
2170	MCRegister SupReg =
2171	TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QQPRRegClass);
2172	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLD1d64Q), DestReg: NextReg)
2173	.addReg(RegNo: ARM::R4)
2174	.addImm(Val: `16`)
2175	.addReg(RegNo: SupReg, flags: RegState::ImplicitDefine)
2176	.add(MOs: predOps(Pred: ARMCC::AL));
2177	NextReg += `4`;
2178	NumAlignedDPRCS2Regs -= `4`;
2179	}
2180
2181	// 16-byte aligned vld1.64 with 2 d-regs.
2182	if (NumAlignedDPRCS2Regs >= `2`) {
2183	MCRegister SupReg =
2184	TRI->getMatchingSuperReg(Reg: NextReg, SubIdx: ARM::dsub_0, RC: &ARM::QPRRegClass);
2185	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLD1q64), DestReg: SupReg)
2186	.addReg(RegNo: ARM::R4)
2187	.addImm(Val: `16`)
2188	.add(MOs: predOps(Pred: ARMCC::AL));
2189	NextReg += `2`;
2190	NumAlignedDPRCS2Regs -= `2`;
2191	}
2192
2193	// Finally, use a vanilla vldr.64 for the remaining odd register.
2194	if (NumAlignedDPRCS2Regs)
2195	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::VLDRD), DestReg: NextReg)
2196	.addReg(RegNo: ARM::R4)
2197	.addImm(Val: `2` * (NextReg - R4BaseReg))
2198	.add(MOs: predOps(Pred: ARMCC::AL));
2199
2200	// Last store kills r4.
2201	std::prev(x: MI)->addRegisterKilled(IncomingReg: ARM::R4, RegInfo: TRI);
2202	}
2203
2204	bool ARMFrameLowering::spillCalleeSavedRegisters(
2205	MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2206	ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo TRI) const* {
2207	if (CSI.empty())
2208	return false;
2209
2210	MachineFunction &MF = *MBB.getParent();
2211	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2212	ARMSubtarget::PushPopSplitVariation PushPopSplit =
2213	STI.getPushPopSplitVariation(MF);
2214	const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
2215
2216	unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
2217	unsigned PushOneOpc = AFI->isThumbFunction() ?
2218	ARM::t2STR_PRE : ARM::STR_PRE_IMM;
2219	unsigned FltOpc = ARM::VSTMDDB_UPD;
2220	unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2221	// Compute PAC in R12.
2222	if (AFI->shouldSignReturnAddress()) {
2223	BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc (), MCID: STI.getInstrInfo()->get(Opcode: ARM::t2PAC))
2224	.setMIFlags(MachineInstr::FrameSetup);
2225	}
2226	// Save the non-secure floating point context.
2227	if (llvm::any_of(Range&: CSI, P: [](const CalleeSavedInfo &C) {
2228	return C.getReg() == ARM::FPCXTNS;
2229	})) {
2230	BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc (), MCID: STI.getInstrInfo()->get(Opcode: ARM::VSTR_FPCXTNS_pre),
2231	DestReg: ARM::SP)
2232	.addReg(RegNo: ARM::SP)
2233	.addImm(Val: -`4`)
2234	.add(MOs: predOps(Pred: ARMCC::AL));
2235	}
2236
2237	auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
2238	RegInfo](unsigned Reg, SpillArea TestArea) {
2239	return getSpillArea(Reg, Variation: PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
2240	TestArea;
2241	};
2242	auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
2243	return CheckRegArea (Reg, SpillArea::GPRCS1);
2244	};
2245	auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
2246	return CheckRegArea (Reg, SpillArea::GPRCS2);
2247	};
2248	auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
2249	return CheckRegArea (Reg, SpillArea::DPRCS1);
2250	};
2251	auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
2252	return CheckRegArea (Reg, SpillArea::GPRCS3);
2253	};
2254
2255	emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false, Func: IsGPRCS1);
2256	emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false, Func: IsGPRCS2);
2257	emitFPStatusSaves(MBB, MI, CSI, PushOpc);
2258	emitPushInst(MBB, MI, CSI, StmOpc: FltOpc, StrOpc: `0`, NoGap: true, Func: IsDPRCS1);
2259	emitPushInst(MBB, MI, CSI, StmOpc: PushOpc, StrOpc: PushOneOpc, NoGap: false, Func: IsGPRCS3);
2260
2261	// The code above does not insert spill code for the aligned DPRCS2 registers.
2262	// The stack realignment code will be inserted between the push instructions
2263	// and these spills.
2264	if (NumAlignedDPRCS2Regs)
2265	emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2266
2267	return true;
2268	}
2269
2270	bool ARMFrameLowering::restoreCalleeSavedRegisters(
2271	MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2272	MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo TRI) const* {
2273	if (CSI.empty())
2274	return false;
2275
2276	MachineFunction &MF = *MBB.getParent();
2277	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2278	const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
2279
2280	bool isVarArg = AFI->getArgRegsSaveSize() > `0`;
2281	unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2282	ARMSubtarget::PushPopSplitVariation PushPopSplit =
2283	STI.getPushPopSplitVariation(MF);
2284
2285	// The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2286	// registers. Do that here instead.
2287	if (NumAlignedDPRCS2Regs)
2288	emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2289
2290	unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
2291	unsigned LdrOpc =
2292	AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
2293	unsigned FltOpc = ARM::VLDMDIA_UPD;
2294
2295	auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
2296	RegInfo](unsigned Reg, SpillArea TestArea) {
2297	return getSpillArea(Reg, Variation: PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
2298	TestArea;
2299	};
2300	auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
2301	return CheckRegArea (Reg, SpillArea::GPRCS1);
2302	};
2303	auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
2304	return CheckRegArea (Reg, SpillArea::GPRCS2);
2305	};
2306	auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
2307	return CheckRegArea (Reg, SpillArea::DPRCS1);
2308	};
2309	auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
2310	return CheckRegArea (Reg, SpillArea::GPRCS3);
2311	};
2312
2313	emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false, Func: IsGPRCS3);
2314	emitPopInst(MBB, MI, CSI, LdmOpc: FltOpc, LdrOpc: `0`, isVarArg, NoGap: true, Func: IsDPRCS1);
2315	emitFPStatusRestores(MBB, MI, CSI, LdmOpc: PopOpc);
2316	emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false, Func: IsGPRCS2);
2317	emitPopInst(MBB, MI, CSI, LdmOpc: PopOpc, LdrOpc, isVarArg, NoGap: false, Func: IsGPRCS1);
2318
2319	return true;
2320	}
2321
2322	// FIXME: Make generic?
2323	static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF,
2324	const ARMBaseInstrInfo &TII) {
2325	unsigned FnSize = `0`;
2326	for (auto &MBB : MF) {
2327	for (auto &MI : MBB)
2328	FnSize += TII.getInstSizeInBytes(MI);
2329	}
2330	if (MF.getJumpTableInfo())
2331	for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
2332	FnSize += Table.MBBs.size() * `4`;
2333	FnSize += MF.getConstantPool()->getConstants().size() * `4`;
2334	return FnSize;
2335	}
2336
2337	/// estimateRSStackSizeLimit - Look at each instruction that references stack
2338	/// frames and return the stack size limit beyond which some of these
2339	/// instructions will require a scratch register during their expansion later.
2340	// FIXME: Move to TII?
2341	static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
2342	const TargetFrameLowering *TFI,
2343	bool &HasNonSPFrameIndex) {
2344	const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2345	const ARMBaseInstrInfo &TII =
2346	*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2347	const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2348	unsigned Limit = (`1` << `12`) - `1`;
2349	for (auto &MBB : MF) {
2350	for (auto &MI : MBB) {
2351	if (MI.isDebugInstr())
2352	continue;
2353	for (unsigned i = `0`, e = MI.getNumOperands(); i != e; ++i) {
2354	if (!MI.getOperand(i).isFI())
2355	continue;
2356
2357	// When using ADDri to get the address of a stack object, 255 is the
2358	// largest offset guaranteed to fit in the immediate offset.
2359	if (MI.getOpcode() == ARM::ADDri) {
2360	Limit = std::min(a: Limit, b: (`1U` << `8`) - `1`);
2361	break;
2362	}
2363	// t2ADDri will not require an extra register, it can reuse the
2364	// destination.
2365	if (MI.getOpcode() == ARM::t2ADDri \|\| MI.getOpcode() == ARM::t2ADDri12)
2366	break;
2367
2368	const MCInstrDesc &MCID = MI.getDesc();
2369	const TargetRegisterClass *RegClass = TII.getRegClass(MCID, OpNum: i, TRI, MF);
2370	if (RegClass && !RegClass->contains(Reg: ARM::SP))
2371	HasNonSPFrameIndex = true;
2372
2373	// Otherwise check the addressing mode.
2374	switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
2375	case ARMII::AddrMode_i12:
2376	case ARMII::AddrMode2:
2377	// Default 12 bit limit.
2378	break;
2379	case ARMII::AddrMode3:
2380	case ARMII::AddrModeT2_i8neg:
2381	Limit = std::min(a: Limit, b: (`1U` << `8`) - `1`);
2382	break;
2383	case ARMII::AddrMode5FP16:
2384	Limit = std::min(a: Limit, b: ((`1U` << `8`) - `1`) * `2`);
2385	break;
2386	case ARMII::AddrMode5:
2387	case ARMII::AddrModeT2_i8s4:
2388	case ARMII::AddrModeT2_ldrex:
2389	Limit = std::min(a: Limit, b: ((`1U` << `8`) - `1`) * `4`);
2390	break;
2391	case ARMII::AddrModeT2_i12:
2392	// i12 supports only positive offset so these will be converted to
2393	// i8 opcodes. See llvm::rewriteT2FrameIndex.
2394	if (TFI->hasFP(MF) && AFI->hasStackFrame())
2395	Limit = std::min(a: Limit, b: (`1U` << `8`) - `1`);
2396	break;
2397	case ARMII::AddrMode4:
2398	case ARMII::AddrMode6:
2399	// Addressing modes 4 & 6 (load/store) instructions can't encode an
2400	// immediate offset for stack references.
2401	return `0`;
2402	case ARMII::AddrModeT2_i7:
2403	Limit = std::min(a: Limit, b: ((`1U` << `7`) - `1`) * `1`);
2404	break;
2405	case ARMII::AddrModeT2_i7s2:
2406	Limit = std::min(a: Limit, b: ((`1U` << `7`) - `1`) * `2`);
2407	break;
2408	case ARMII::AddrModeT2_i7s4:
2409	Limit = std::min(a: Limit, b: ((`1U` << `7`) - `1`) * `4`);
2410	break;
2411	default:
2412	llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2413	}
2414	break; // At most one FI per instruction
2415	}
2416	}
2417	}
2418
2419	return Limit;
2420	}
2421
2422	// In functions that realign the stack, it can be an advantage to spill the
2423	// callee-saved vector registers after realigning the stack. The vst1 and vld1
2424	// instructions take alignment hints that can improve performance.
2425	static void
2426	checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
2427	MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(`0`);
2428	if (!SpillAlignedNEONRegs)
2429	return;
2430
2431	// Naked functions don't spill callee-saved registers.
2432	if (MF.getFunction().hasFnAttribute(Kind: Attribute::Naked))
2433	return;
2434
2435	// We are planning to use NEON instructions vst1 / vld1.
2436	if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2437	return;
2438
2439	// Don't bother if the default stack alignment is sufficiently high.
2440	if (MF.getSubtarget().getFrameLowering()->getStackAlign() >= Align (`8`))
2441	return;
2442
2443	// Aligned spills require stack realignment.
2444	if (!static_cast<const ARMBaseRegisterInfo *>(
2445	MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))
2446	return;
2447
2448	// We always spill contiguous d-registers starting from d8. Count how many
2449	// needs spilling. The register allocator will almost always use the
2450	// callee-saved registers in order, but it can happen that there are holes in
2451	// the range. Registers above the hole will be spilled to the standard DPRCS
2452	// area.
2453	unsigned NumSpills = `0`;
2454	for (; NumSpills < `8`; ++NumSpills)
2455	if (!SavedRegs.test(Idx: ARM::D8 + NumSpills))
2456	break;
2457
2458	// Don't do this for just one d-register. It's not worth it.
2459	if (NumSpills < `2`)
2460	return;
2461
2462	// Spill the first NumSpills D-registers after realigning the stack.
2463	MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2464
2465	// A scratch register is required for the vst1 / vld1 instructions.
2466	SavedRegs.set(ARM::R4);
2467	}
2468
2469	bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2470	// For CMSE entry functions, we want to save the FPCXT_NS immediately
2471	// upon function entry (resp. restore it immmediately before return)
2472	if (STI.hasV8_1MMainlineOps() &&
2473	MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction())
2474	return false;
2475
2476	// We are disabling shrinkwrapping for now when PAC is enabled, as
2477	// shrinkwrapping can cause clobbering of r12 when the PAC code is
2478	// generated. A follow-up patch will fix this in a more performant manner.
2479	if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(
2480	SpillsLR: true / SpillsLR /))
2481	return false;
2482
2483	return true;
2484	}
2485
2486	bool ARMFrameLowering::requiresAAPCSFrameRecord(
2487	const MachineFunction &MF) const {
2488	const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
2489	return Subtarget.createAAPCSFrameChain() && hasFP(MF);
2490	}
2491
2492	// Thumb1 may require a spill when storing to a frame index through FP (or any
2493	// access with execute-only), for cases where FP is a high register (R11). This
2494	// scans the function for cases where this may happen.
2495	static bool canSpillOnFrameIndexAccess(const MachineFunction &MF,
2496	const TargetFrameLowering &TFI) {
2497	const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2498	if (!AFI->isThumb1OnlyFunction())
2499	return false;
2500
2501	const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
2502	for (const auto &MBB : MF)
2503	for (const auto &MI : MBB)
2504	if (MI.getOpcode() == ARM::tSTRspi \|\| MI.getOpcode() == ARM::tSTRi \|\|
2505	STI.genExecuteOnly())
2506	for (const auto &Op : MI.operands())
2507	if (Op.isFI()) {
2508	Register Reg;
2509	TFI.getFrameIndexReference(MF, FI: Op.getIndex(), FrameReg&: Reg);
2510	if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
2511	return true;
2512	}
2513	return false;
2514	}
2515
2516	void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
2517	BitVector &SavedRegs,
2518	RegScavenger RS) const* {
2519	TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2520	// This tells PEI to spill the FP as if it is any other callee-save register
2521	// to take advantage the eliminateFrameIndex machinery. This also ensures it
2522	// is spilled in the order specified by getCalleeSavedRegs() to make it easier
2523	// to combine multiple loads / stores.
2524	bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF)) &&
2525	!MF.getTarget().Options.DisableFramePointerElim(MF);
2526	bool CS1Spilled = false;
2527	bool LRSpilled = false;
2528	unsigned NumGPRSpills = `0`;
2529	unsigned NumFPRSpills = `0`;
2530	SmallVector<unsigned, `4`> UnspilledCS1GPRs;
2531	SmallVector<unsigned, `4`> UnspilledCS2GPRs;
2532	const Function &F = MF.getFunction();
2533	const ARMBaseRegisterInfo RegInfo = static_cast<const* ARMBaseRegisterInfo *>(
2534	MF.getSubtarget().getRegisterInfo());
2535	const ARMBaseInstrInfo &TII =
2536	*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2537	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2538	MachineFrameInfo &MFI = MF.getFrameInfo();
2539	MachineRegisterInfo &MRI = MF.getRegInfo();
2540	const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2541	(void)TRI; // Silence unused warning in non-assert builds.
2542	Register FramePtr = RegInfo->getFrameRegister(MF);
2543	ARMSubtarget::PushPopSplitVariation PushPopSplit =
2544	STI.getPushPopSplitVariation(MF);
2545
2546	// For a floating point interrupt, save these registers always, since LLVM
2547	// currently doesn't model reads/writes to these registers.
2548	if (F.hasFnAttribute(Kind: "interrupt") && F.hasFnAttribute(Kind: "save-fp")) {
2549	SavedRegs.set(ARM::FPSCR);
2550	SavedRegs.set(ARM::R4);
2551
2552	// This register will only be present on non-MClass registers.
2553	if (STI.isMClass()) {
2554	SavedRegs.reset(Idx: ARM::FPEXC);
2555	} else {
2556	SavedRegs.set(ARM::FPEXC);
2557	SavedRegs.set(ARM::R5);
2558	}
2559	}
2560
2561	// Spill R4 if Thumb2 function requires stack realignment - it will be used as
2562	// scratch register. Also spill R4 if Thumb2 function has varsized objects,
2563	// since it's not always possible to restore sp from fp in a single
2564	// instruction.
2565	// FIXME: It will be better just to find spare register here.
2566	if (AFI->isThumb2Function() &&
2567	(MFI.hasVarSizedObjects() \|\| RegInfo->hasStackRealignment(MF)))
2568	SavedRegs.set(ARM::R4);
2569
2570	// If a stack probe will be emitted, spill R4 and LR, since they are
2571	// clobbered by the stack probe call.
2572	// This estimate should be a safe, conservative estimate. The actual
2573	// stack probe is enabled based on the size of the local objects;
2574	// this estimate also includes the varargs store size.
2575	if (STI.isTargetWindows() &&
2576	WindowsRequiresStackProbe(MF, StackSizeInBytes: MFI.estimateStackSize(MF))) {
2577	SavedRegs.set(ARM::R4);
2578	SavedRegs.set(ARM::LR);
2579	}
2580
2581	if (AFI->isThumb1OnlyFunction()) {
2582	// Spill LR if Thumb1 function uses variable length argument lists.
2583	if (AFI->getArgRegsSaveSize() > `0`)
2584	SavedRegs.set(ARM::LR);
2585
2586	// Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2587	// requires stack alignment. We don't know for sure what the stack size
2588	// will be, but for this, an estimate is good enough. If there anything
2589	// changes it, it'll be a spill, which implies we've used all the registers
2590	// and so R4 is already used, so not marking it here will be OK.
2591	// FIXME: It will be better just to find spare register here.
2592	if (MFI.hasVarSizedObjects() \|\| RegInfo->hasStackRealignment(MF) \|\|
2593	MFI.estimateStackSize(MF) > `508`)
2594	SavedRegs.set(ARM::R4);
2595	}
2596
2597	// See if we can spill vector registers to aligned stack.
2598	checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2599
2600	// Spill the BasePtr if it's used.
2601	if (RegInfo->hasBasePointer(MF))
2602	SavedRegs.set(RegInfo->getBaseRegister());
2603
2604	// On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2605	if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2606	CanEliminateFrame = false;
2607
2608	// When return address signing is enabled R12 is treated as callee-saved.
2609	if (AFI->shouldSignReturnAddress())
2610	CanEliminateFrame = false;
2611
2612	// Don't spill FP if the frame can be eliminated. This is determined
2613	// by scanning the callee-save registers to see if any is modified.
2614	const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF: &MF);
2615	for (unsigned i = `0`; CSRegs[i]; ++i) {
2616	unsigned Reg = CSRegs[i];
2617	bool Spilled = false;
2618	if (SavedRegs.test(Idx: Reg)) {
2619	Spilled = true;
2620	CanEliminateFrame = false;
2621	}
2622
2623	if (!ARM::GPRRegClass.contains(Reg)) {
2624	if (Spilled) {
2625	if (ARM::SPRRegClass.contains(Reg))
2626	NumFPRSpills++;
2627	else if (ARM::DPRRegClass.contains(Reg))
2628	NumFPRSpills += `2`;
2629	else if (ARM::QPRRegClass.contains(Reg))
2630	NumFPRSpills += `4`;
2631	}
2632	continue;
2633	}
2634
2635	if (Spilled) {
2636	NumGPRSpills++;
2637
2638	if (PushPopSplit != ARMSubtarget::SplitR7) {
2639	if (Reg == ARM::LR)
2640	LRSpilled = true;
2641	CS1Spilled = true;
2642	continue;
2643	}
2644
2645	// Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2646	switch (Reg) {
2647	case ARM::LR:
2648	LRSpilled = true;
2649	[[fallthrough]];
2650	case ARM::R0: case ARM::R1:
2651	case ARM::R2: case ARM::R3:
2652	case ARM::R4: case ARM::R5:
2653	case ARM::R6: case ARM::R7:
2654	CS1Spilled = true;
2655	break;
2656	default:
2657	break;
2658	}
2659	} else {
2660	if (PushPopSplit != ARMSubtarget::SplitR7) {
2661	UnspilledCS1GPRs.push_back(Elt: Reg);
2662	continue;
2663	}
2664
2665	switch (Reg) {
2666	case ARM::R0: case ARM::R1:
2667	case ARM::R2: case ARM::R3:
2668	case ARM::R4: case ARM::R5:
2669	case ARM::R6: case ARM::R7:
2670	case ARM::LR:
2671	UnspilledCS1GPRs.push_back(Elt: Reg);
2672	break;
2673	default:
2674	UnspilledCS2GPRs.push_back(Elt: Reg);
2675	break;
2676	}
2677	}
2678	}
2679
2680	bool ForceLRSpill = false;
2681	if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2682	unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2683	// Force LR to be spilled if the Thumb function size is > 2048. This enables
2684	// use of BL to implement far jump.
2685	if (FnSize >= (`1` << `11`)) {
2686	CanEliminateFrame = false;
2687	ForceLRSpill = true;
2688	}
2689	}
2690
2691	// If any of the stack slot references may be out of range of an immediate
2692	// offset, make sure a register (or a spill slot) is available for the
2693	// register scavenger. Note that if we're indexing off the frame pointer, the
2694	// effective stack size is 4 bytes larger since the FP points to the stack
2695	// slot of the previous FP. Also, if we have variable sized objects in the
2696	// function, stack slot references will often be negative, and some of
2697	// our instructions are positive-offset only, so conservatively consider
2698	// that case to want a spill slot (or register) as well. Similarly, if
2699	// the function adjusts the stack pointer during execution and the
2700	// adjustments aren't already part of our stack size estimate, our offset
2701	// calculations may be off, so be conservative.
2702	// FIXME: We could add logic to be more precise about negative offsets
2703	// and which instructions will need a scratch register for them. Is it
2704	// worth the effort and added fragility?
2705	unsigned EstimatedStackSize =
2706	MFI.estimateStackSize(MF) + `4` * (NumGPRSpills + NumFPRSpills);
2707
2708	// Determine biggest (positive) SP offset in MachineFrameInfo.
2709	int MaxFixedOffset = `0`;
2710	for (int I = MFI.getObjectIndexBegin(); I < `0`; ++I) {
2711	int MaxObjectOffset = MFI.getObjectOffset(ObjectIdx: I) + MFI.getObjectSize(ObjectIdx: I);
2712	MaxFixedOffset = std::max(a: MaxFixedOffset, b: MaxObjectOffset);
2713	}
2714
2715	bool HasFP = hasFP(MF);
2716	if (HasFP) {
2717	if (AFI->hasStackFrame())
2718	EstimatedStackSize += `4`;
2719	} else {
2720	// If FP is not used, SP will be used to access arguments, so count the
2721	// size of arguments into the estimation.
2722	EstimatedStackSize += MaxFixedOffset;
2723	}
2724	EstimatedStackSize += `16`; // For possible paddings.
2725
2726	unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2727	bool HasNonSPFrameIndex = false;
2728	if (AFI->isThumb1OnlyFunction()) {
2729	// For Thumb1, don't bother to iterate over the function. The only
2730	// instruction that requires an emergency spill slot is a store to a
2731	// frame index.
2732	//
2733	// tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2734	// immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2735	// a 5-bit unsigned immediate.
2736	//
2737	// We could try to check if the function actually contains a tSTRspi
2738	// that might need the spill slot, but it's not really important.
2739	// Functions with VLAs or extremely large call frames are rare, and
2740	// if a function is allocating more than 1KB of stack, an extra 4-byte
2741	// slot probably isn't relevant.
2742	//
2743	// A special case is the scenario where r11 is used as FP, where accesses
2744	// to a frame index will require its value to be moved into a low reg.
2745	// This is handled later on, once we are able to determine if we have any
2746	// fp-relative accesses.
2747	if (RegInfo->hasBasePointer(MF))
2748	EstimatedRSStackSizeLimit = (`1U` << `5`) * `4`;
2749	else
2750	EstimatedRSStackSizeLimit = (`1U` << `8`) * `4`;
2751	EstimatedRSFixedSizeLimit = (`1U` << `5`) * `4`;
2752	} else {
2753	EstimatedRSStackSizeLimit =
2754	estimateRSStackSizeLimit(MF, TFI: this, HasNonSPFrameIndex);
2755	EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2756	}
2757	// Final estimate of whether sp or bp-relative accesses might require
2758	// scavenging.
2759	bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2760
2761	// If the stack pointer moves and we don't have a base pointer, the
2762	// estimate logic doesn't work. The actual offsets might be larger when
2763	// we're constructing a call frame, or we might need to use negative
2764	// offsets from fp.
2765	bool HasMovingSP = MFI.hasVarSizedObjects() \|\|
2766	(MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2767	bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) \|\| !HasMovingSP;
2768
2769	// If we have a frame pointer, we assume arguments will be accessed
2770	// relative to the frame pointer. Check whether fp-relative accesses to
2771	// arguments require scavenging.
2772	//
2773	// We could do slightly better on Thumb1; in some cases, an sp-relative
2774	// offset would be legal even though an fp-relative offset is not.
2775	int MaxFPOffset = getMaxFPOffset(STI, AFI: *AFI, MF);
2776	bool HasLargeArgumentList =
2777	HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2778
2779	bool BigFrameOffsets = HasLargeStack \|\| !HasBPOrFixedSP \|\|
2780	HasLargeArgumentList \|\| HasNonSPFrameIndex;
2781	LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2782	<< "; EstimatedStack: " << EstimatedStackSize
2783	<< "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2784	<< "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2785	if (BigFrameOffsets \|\|
2786	!CanEliminateFrame \|\| RegInfo->cannotEliminateFrame(MF)) {
2787	AFI->setHasStackFrame(true);
2788
2789	if (HasFP) {
2790	SavedRegs.set(FramePtr);
2791	// If the frame pointer is required by the ABI, also spill LR so that we
2792	// emit a complete frame record.
2793	if ((requiresAAPCSFrameRecord(MF) \|\|
2794	MF.getTarget().Options.DisableFramePointerElim(MF)) &&
2795	!LRSpilled) {
2796	SavedRegs.set(ARM::LR);
2797	LRSpilled = true;
2798	NumGPRSpills++;
2799	auto LRPos = llvm::find(Range&: UnspilledCS1GPRs, Val: ARM::LR);
2800	if (LRPos != UnspilledCS1GPRs.end())
2801	UnspilledCS1GPRs.erase(CI: LRPos);
2802	}
2803	auto FPPos = llvm::find(Range&: UnspilledCS1GPRs, Val: FramePtr);
2804	if (FPPos != UnspilledCS1GPRs.end())
2805	UnspilledCS1GPRs.erase(CI: FPPos);
2806	NumGPRSpills++;
2807	if (FramePtr == ARM::R7)
2808	CS1Spilled = true;
2809	}
2810
2811	// This is the number of extra spills inserted for callee-save GPRs which
2812	// would not otherwise be used by the function. When greater than zero it
2813	// guaranteees that it is possible to scavenge a register to hold the
2814	// address of a stack slot. On Thumb1, the register must be a valid operand
2815	// to tSTRi, i.e. r4-r7. For other subtargets, this is any GPR, i.e. r4-r11
2816	// or lr.
2817	//
2818	// If we don't insert a spill, we instead allocate an emergency spill
2819	// slot, which can be used by scavenging to spill an arbitrary register.
2820	//
2821	// We currently don't try to figure out whether any specific instruction
2822	// requires scavening an additional register.
2823	unsigned NumExtraCSSpill = `0`;
2824
2825	if (AFI->isThumb1OnlyFunction()) {
2826	// For Thumb1-only targets, we need some low registers when we save and
2827	// restore the high registers (which aren't allocatable, but could be
2828	// used by inline assembly) because the push/pop instructions can not
2829	// access high registers. If necessary, we might need to push more low
2830	// registers to ensure that there is at least one free that can be used
2831	// for the saving & restoring, and preferably we should ensure that as
2832	// many as are needed are available so that fewer push/pop instructions
2833	// are required.
2834
2835	// Low registers which are not currently pushed, but could be (r4-r7).
2836	SmallVector<unsigned, `4`> AvailableRegs;
2837
2838	// Unused argument registers (r0-r3) can be clobbered in the prologue for
2839	// free.
2840	int EntryRegDeficit = `0`;
2841	for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2842	if (!MF.getRegInfo().isLiveIn(Reg)) {
2843	--EntryRegDeficit;
2844	LLVM_DEBUG(dbgs()
2845	<< printReg(Reg, TRI)
2846	<< " is unused argument register, EntryRegDeficit = "
2847	<< EntryRegDeficit << "\n");
2848	}
2849	}
2850
2851	// Unused return registers can be clobbered in the epilogue for free.
2852	int ExitRegDeficit = AFI->getReturnRegsCount() - `4`;
2853	LLVM_DEBUG(dbgs() << AFI->getReturnRegsCount()
2854	<< " return regs used, ExitRegDeficit = "
2855	<< ExitRegDeficit << "\n");
2856
2857	int RegDeficit = std::max(a: EntryRegDeficit, b: ExitRegDeficit);
2858	LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2859
2860	// r4-r6 can be used in the prologue if they are pushed by the first push
2861	// instruction.
2862	for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2863	if (SavedRegs.test(Idx: Reg)) {
2864	--RegDeficit;
2865	LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2866	<< " is saved low register, RegDeficit = "
2867	<< RegDeficit << "\n");
2868	} else {
2869	AvailableRegs.push_back(Elt: Reg);
2870	LLVM_DEBUG(
2871	dbgs()
2872	<< printReg(Reg, TRI)
2873	<< " is non-saved low register, adding to AvailableRegs\n");
2874	}
2875	}
2876
2877	// r7 can be used if it is not being used as the frame pointer.
2878	if (!HasFP \|\| FramePtr != ARM::R7) {
2879	if (SavedRegs.test(Idx: ARM::R7)) {
2880	--RegDeficit;
2881	LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2882	<< RegDeficit << "\n");
2883	} else {
2884	AvailableRegs.push_back(Elt: ARM::R7);
2885	LLVM_DEBUG(
2886	dbgs()
2887	<< "%r7 is non-saved low register, adding to AvailableRegs\n");
2888	}
2889	}
2890
2891	// Each of r8-r11 needs to be copied to a low register, then pushed.
2892	for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2893	if (SavedRegs.test(Idx: Reg)) {
2894	++RegDeficit;
2895	LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2896	<< " is saved high register, RegDeficit = "
2897	<< RegDeficit << "\n");
2898	}
2899	}
2900
2901	// LR can only be used by PUSH, not POP, and can't be used at all if the
2902	// llvm.returnaddress intrinsic is used. This is only worth doing if we
2903	// are more limited at function entry than exit.
2904	if ((EntryRegDeficit > ExitRegDeficit) &&
2905	!(MF.getRegInfo().isLiveIn(Reg: ARM::LR) &&
2906	MF.getFrameInfo().isReturnAddressTaken())) {
2907	if (SavedRegs.test(Idx: ARM::LR)) {
2908	--RegDeficit;
2909	LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2910	<< RegDeficit << "\n");
2911	} else {
2912	AvailableRegs.push_back(Elt: ARM::LR);
2913	LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2914	}
2915	}
2916
2917	// If there are more high registers that need pushing than low registers
2918	// available, push some more low registers so that we can use fewer push
2919	// instructions. This might not reduce RegDeficit all the way to zero,
2920	// because we can only guarantee that r4-r6 are available, but r8-r11 may
2921	// need saving.
2922	LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2923	for (; RegDeficit > `0` && !AvailableRegs.empty(); --RegDeficit) {
2924	unsigned Reg = AvailableRegs.pop_back_val();
2925	LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2926	<< " to make up reg deficit\n");
2927	SavedRegs.set(Reg);
2928	NumGPRSpills++;
2929	CS1Spilled = true;
2930	assert(!MRI.isReserved(Reg) && "Should not be reserved");
2931	if (Reg != ARM::LR && !MRI.isPhysRegUsed(PhysReg: Reg))
2932	NumExtraCSSpill++;
2933	UnspilledCS1GPRs.erase(CI: llvm::find(Range&: UnspilledCS1GPRs, Val: Reg));
2934	if (Reg == ARM::LR)
2935	LRSpilled = true;
2936	}
2937	LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2938	<< "\n");
2939	}
2940
2941	// Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2942	// restore LR in that case.
2943	bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2944
2945	// If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2946	// Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2947	if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2948	SavedRegs.set(ARM::LR);
2949	NumGPRSpills++;
2950	SmallVectorImpl<unsigned>::iterator LRPos;
2951	LRPos = llvm::find(Range&: UnspilledCS1GPRs, Val: (unsigned)ARM::LR);
2952	if (LRPos != UnspilledCS1GPRs.end())
2953	UnspilledCS1GPRs.erase(CI: LRPos);
2954
2955	ForceLRSpill = false;
2956	if (!MRI.isReserved(PhysReg: ARM::LR) && !MRI.isPhysRegUsed(PhysReg: ARM::LR) &&
2957	!AFI->isThumb1OnlyFunction())
2958	NumExtraCSSpill++;
2959	}
2960
2961	// If stack and double are 8-byte aligned and we are spilling an odd number
2962	// of GPRs, spill one extra callee save GPR so we won't have to pad between
2963	// the integer and double callee save areas.
2964	LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2965	const Align TargetAlign = getStackAlign();
2966	if (TargetAlign >= Align (`8`) && (NumGPRSpills & `1`)) {
2967	if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2968	for (unsigned Reg : UnspilledCS1GPRs) {
2969	// Don't spill high register if the function is thumb. In the case of
2970	// Windows on ARM, accept R11 (frame pointer)
2971	if (!AFI->isThumbFunction() \|\|
2972	(STI.isTargetWindows() && Reg == ARM::R11) \|\|
2973	isARMLowRegister(Reg) \|\|
2974	(Reg == ARM::LR && !ExpensiveLRRestore)) {
2975	SavedRegs.set(Reg);
2976	LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2977	<< " to make up alignment\n");
2978	if (!MRI.isReserved(PhysReg: Reg) && !MRI.isPhysRegUsed(PhysReg: Reg) &&
2979	!(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2980	NumExtraCSSpill++;
2981	break;
2982	}
2983	}
2984	} else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
2985	unsigned Reg = UnspilledCS2GPRs.front();
2986	SavedRegs.set(Reg);
2987	LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2988	<< " to make up alignment\n");
2989	if (!MRI.isReserved(PhysReg: Reg) && !MRI.isPhysRegUsed(PhysReg: Reg))
2990	NumExtraCSSpill++;
2991	}
2992	}
2993
2994	// Estimate if we might need to scavenge registers at some point in order
2995	// to materialize a stack offset. If so, either spill one additional
2996	// callee-saved register or reserve a special spill slot to facilitate
2997	// register scavenging. Thumb1 needs a spill slot for stack pointer
2998	// adjustments and for frame index accesses when FP is high register,
2999	// even when the frame itself is small.
3000	unsigned RegsNeeded = `0`;
3001	if (BigFrameOffsets \|\| canSpillOnFrameIndexAccess(MF, TFI: *this)) {
3002	RegsNeeded++;
3003	// With thumb1 execute-only we may need an additional register for saving
3004	// and restoring the CPSR.
3005	if (AFI->isThumb1OnlyFunction() && STI.genExecuteOnly() && !STI.useMovt())
3006	RegsNeeded++;
3007	}
3008
3009	if (RegsNeeded > NumExtraCSSpill) {
3010	// If any non-reserved CS register isn't spilled, just spill one or two
3011	// extra. That should take care of it!
3012	unsigned NumExtras = TargetAlign.value() / `4`;
3013	SmallVector<unsigned, `2`> Extras;
3014	while (NumExtras && !UnspilledCS1GPRs.empty()) {
3015	unsigned Reg = UnspilledCS1GPRs.pop_back_val();
3016	if (!MRI.isReserved(PhysReg: Reg) &&
3017	(!AFI->isThumb1OnlyFunction() \|\| isARMLowRegister(Reg))) {
3018	Extras.push_back(Elt: Reg);
3019	NumExtras--;
3020	}
3021	}
3022	// For non-Thumb1 functions, also check for hi-reg CS registers
3023	if (!AFI->isThumb1OnlyFunction()) {
3024	while (NumExtras && !UnspilledCS2GPRs.empty()) {
3025	unsigned Reg = UnspilledCS2GPRs.pop_back_val();
3026	if (!MRI.isReserved(PhysReg: Reg)) {
3027	Extras.push_back(Elt: Reg);
3028	NumExtras--;
3029	}
3030	}
3031	}
3032	if (NumExtras == `0`) {
3033	for (unsigned Reg : Extras) {
3034	SavedRegs.set(Reg);
3035	if (!MRI.isPhysRegUsed(PhysReg: Reg))
3036	NumExtraCSSpill++;
3037	}
3038	}
3039	while ((RegsNeeded > NumExtraCSSpill) && RS) {
3040	// Reserve a slot closest to SP or frame pointer.
3041	LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
3042	const TargetRegisterClass &RC = ARM::GPRRegClass;
3043	unsigned Size = TRI->getSpillSize(RC);
3044	Align Alignment = TRI->getSpillAlign(RC);
3045	RS->addScavengingFrameIndex(
3046	FI: MFI.CreateSpillStackObject(Size, Alignment));
3047	--RegsNeeded;
3048	}
3049	}
3050	}
3051
3052	if (ForceLRSpill)
3053	SavedRegs.set(ARM::LR);
3054	AFI->setLRIsSpilled(SavedRegs.test(Idx: ARM::LR));
3055	}
3056
3057	void ARMFrameLowering::updateLRRestored(MachineFunction &MF) {
3058	MachineFrameInfo &MFI = MF.getFrameInfo();
3059	if (!MFI.isCalleeSavedInfoValid())
3060	return;
3061
3062	// Check if all terminators do not implicitly use LR. Then we can 'restore' LR
3063	// into PC so it is not live out of the return block: Clear the Restored bit
3064	// in that case.
3065	for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
3066	if (Info.getReg() != ARM::LR)
3067	continue;
3068	if (all_of(Range&: MF, P: [](const MachineBasicBlock &MBB) {
3069	return all_of(Range: MBB.terminators(), P: [](const MachineInstr &Term) {
3070	return !Term.isReturn() \|\| Term.getOpcode() == ARM::LDMIA_RET \|\|
3071	Term.getOpcode() == ARM::t2LDMIA_RET \|\|
3072	Term.getOpcode() == ARM::tPOP_RET;
3073	});
3074	})) {
3075	Info.setRestored(false);
3076	break;
3077	}
3078	}
3079	}
3080
3081	void ARMFrameLowering::processFunctionBeforeFrameFinalized(
3082	MachineFunction &MF, RegScavenger RS) const* {
3083	TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS);
3084	updateLRRestored(MF);
3085	}
3086
3087	void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF,
3088	BitVector &SavedRegs) const {
3089	TargetFrameLowering::getCalleeSaves(MF, SavedRegs);
3090
3091	// If we have the "returned" parameter attribute which guarantees that we
3092	// return the value which was passed in r0 unmodified (e.g. C++ 'structors),
3093	// record that fact for IPRA.
3094	const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3095	if (AFI->getPreservesR0())
3096	SavedRegs.set(ARM::R0);
3097	}
3098
3099	bool ARMFrameLowering::assignCalleeSavedSpillSlots(
3100	MachineFunction &MF, const TargetRegisterInfo *TRI,
3101	std::vector<CalleeSavedInfo> &CSI) const {
3102	// For CMSE entry functions, handle floating-point context as if it was a
3103	// callee-saved register.
3104	if (STI.hasV8_1MMainlineOps() &&
3105	MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction()) {
3106	CSI.emplace_back(args: ARM::FPCXTNS);
3107	CSI.back().setRestored(false);
3108	}
3109
3110	// For functions, which sign their return address, upon function entry, the
3111	// return address PAC is computed in R12. Treat R12 as a callee-saved register
3112	// in this case.
3113	const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
3114	if (AFI.shouldSignReturnAddress()) {
3115	// The order of register must match the order we push them, because the
3116	// PEI assigns frame indices in that order. That order depends on the
3117	// PushPopSplitVariation, there are only two cases which we use with return
3118	// address signing:
3119	switch (STI.getPushPopSplitVariation(MF)) {
3120	case ARMSubtarget::SplitR7:
3121	// LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
3122	CSI.insert(position: find_if(Range&: CSI,
3123	P: [=](const auto &CS) {
3124	MCRegister Reg = CS.getReg();
3125	return Reg == ARM::R10 \|\| Reg == ARM::R11 \|\|
3126	Reg == ARM::R8 \|\| Reg == ARM::R9 \|\|
3127	ARM::DPRRegClass.contains(Reg);
3128	}),
3129	x: CalleeSavedInfo (ARM::R12));
3130	break;
3131	case ARMSubtarget::SplitR11AAPCSSignRA:
3132	// With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR
3133	// on the stack.
3134	CSI.insert(position: CSI.begin(), x: CalleeSavedInfo (ARM::R12));
3135	break;
3136	case ARMSubtarget::NoSplit:
3137	assert(!MF.getTarget().Options.DisableFramePointerElim(MF) &&
3138	"ABI-required frame pointers need a CSR split when signing return "
3139	"address.");
3140	CSI.insert(position: find_if(Range&: CSI,
3141	P: [=](const auto &CS) {
3142	MCRegister Reg = CS.getReg();
3143	return Reg != ARM::LR;
3144	}),
3145	x: CalleeSavedInfo (ARM::R12));
3146	break;
3147	default:
3148	llvm_unreachable("Unexpected CSR split with return address signing");
3149	}
3150	}
3151
3152	return false;
3153	}
3154
3155	const TargetFrameLowering::SpillSlot *
3156	ARMFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
3157	static const SpillSlot FixedSpillOffsets[] = {{.Reg: ARM::FPCXTNS, .Offset: -`4`}};
3158	NumEntries = std::size(FixedSpillOffsets);
3159	return FixedSpillOffsets;
3160	}
3161
3162	MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
3163	MachineFunction &MF, MachineBasicBlock &MBB,
3164	MachineBasicBlock::iterator I) const {
3165	const ARMBaseInstrInfo &TII =
3166	*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3167	ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3168	bool isARM = !AFI->isThumbFunction();
3169	DebugLoc dl = I ->getDebugLoc();
3170	unsigned Opc = I ->getOpcode();
3171	bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
3172	unsigned CalleePopAmount = IsDestroy ? I ->getOperand(i: `1`).getImm() : `0`;
3173
3174	assert(!AFI->isThumb1OnlyFunction() &&
3175	"This eliminateCallFramePseudoInstr does not support Thumb1!");
3176
3177	int PIdx = I ->findFirstPredOperandIdx();
3178	ARMCC::CondCodes Pred = (PIdx == -`1`)
3179	? ARMCC::AL
3180	: (ARMCC::CondCodes)I ->getOperand(i: PIdx).getImm();
3181	unsigned PredReg = TII.getFramePred(MI: *I);
3182
3183	if (!hasReservedCallFrame(MF)) {
3184	// Bail early if the callee is expected to do the adjustment.
3185	if (IsDestroy && CalleePopAmount != -`1U`)
3186	return MBB.erase(I);
3187
3188	// If we have alloca, convert as follows:
3189	// ADJCALLSTACKDOWN -> sub, sp, sp, amount
3190	// ADJCALLSTACKUP -> add, sp, sp, amount
3191	unsigned Amount = TII.getFrameSize(I: *I);
3192	if (Amount != `0`) {
3193	// We need to keep the stack aligned properly. To do this, we round the
3194	// amount of space needed for the outgoing arguments up to the next
3195	// alignment boundary.
3196	Amount = alignSPAdjust(SPAdj: Amount);
3197
3198	if (Opc == ARM::ADJCALLSTACKDOWN \|\| Opc == ARM::tADJCALLSTACKDOWN) {
3199	emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: -Amount, MIFlags: MachineInstr::NoFlags,
3200	Pred, PredReg);
3201	} else {
3202	assert(Opc == ARM::ADJCALLSTACKUP \|\| Opc == ARM::tADJCALLSTACKUP);
3203	emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: Amount, MIFlags: MachineInstr::NoFlags,
3204	Pred, PredReg);
3205	}
3206	}
3207	} else if (CalleePopAmount != -`1U`) {
3208	// If the calling convention demands that the callee pops arguments from the
3209	// stack, we want to add it back if we have a reserved call frame.
3210	emitSPUpdate(isARM, MBB, MBBI&: I, dl, TII, NumBytes: -CalleePopAmount,
3211	MIFlags: MachineInstr::NoFlags, Pred, PredReg);
3212	}
3213	return MBB.erase(I);
3214	}
3215
3216	/// Get the minimum constant for ARM that is greater than or equal to the
3217	/// argument. In ARM, constants can have any value that can be produced by
3218	/// rotating an 8-bit value to the right by an even number of bits within a
3219	/// 32-bit word.
3220	static uint32_t alignToARMConstant(uint32_t Value) {
3221	unsigned Shifted = `0`;
3222
3223	if (Value == `0`)
3224	return `0`;
3225
3226	while (!(Value & `0xC0000000`)) {
3227	Value = Value << `2`;
3228	Shifted += `2`;
3229	}
3230
3231	bool Carry = (Value & `0x00FFFFFF`);
3232	Value = ((Value & `0xFF000000`) >> `24`) + Carry;
3233
3234	if (Value & `0x0000100`)
3235	Value = Value & `0x000001FC`;
3236
3237	if (Shifted > `24`)
3238	Value = Value >> (Shifted - `24`);
3239	else
3240	Value = Value << (`24` - Shifted);
3241
3242	return Value;
3243	}
3244
3245	// The stack limit in the TCB is set to this many bytes above the actual
3246	// stack limit.
3247	static const uint64_t kSplitStackAvailable = `256`;
3248
3249	// Adjust the function prologue to enable split stacks. This currently only
3250	// supports android and linux.
3251	//
3252	// The ABI of the segmented stack prologue is a little arbitrarily chosen, but
3253	// must be well defined in order to allow for consistent implementations of the
3254	// __morestack helper function. The ABI is also not a normal ABI in that it
3255	// doesn't follow the normal calling conventions because this allows the
3256	// prologue of each function to be optimized further.
3257	//
3258	// Currently, the ABI looks like (when calling __morestack)
3259	//
3260	// r4 holds the minimum stack size requested for this function call*
3261	// r5 holds the stack size of the arguments to the function*
3262	// the beginning of the function is 3 instructions after the call to*
3263	// __morestack
3264	//
3265	// Implementations of __morestack should use r4 to allocate a new stack, r5 to
3266	// place the arguments on to the new stack, and the 3-instruction knowledge to
3267	// jump directly to the body of the function when working on the new stack.
3268	//
3269	// An old (and possibly no longer compatible) implementation of __morestack for
3270	// ARM can be found at [1].
3271	//
3272	// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
3273	void ARMFrameLowering::adjustForSegmentedStacks(
3274	MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3275	unsigned Opcode;
3276	const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
3277	bool Thumb = ST->isThumb();
3278	bool Thumb2 = ST->isThumb2();
3279
3280	// Sadly, this currently doesn't support varargs, platforms other than
3281	// android/linux. Note that thumb1/thumb2 are support for android/linux.
3282	if (MF.getFunction().isVarArg())
3283	report_fatal_error(reason: "Segmented stacks do not support vararg functions.");
3284	if (!ST->isTargetAndroid() && !ST->isTargetLinux())
3285	report_fatal_error(reason: "Segmented stacks not supported on this platform.");
3286
3287	MachineFrameInfo &MFI = MF.getFrameInfo();
3288	const ARMBaseInstrInfo &TII =
3289	*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3290	ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
3291	DebugLoc DL;
3292
3293	if (!MFI.needsSplitStackProlog())
3294	return;
3295
3296	uint64_t StackSize = MFI.getStackSize();
3297
3298	// Use R4 and R5 as scratch registers.
3299	// We save R4 and R5 before use and restore them before leaving the function.
3300	unsigned ScratchReg0 = ARM::R4;
3301	unsigned ScratchReg1 = ARM::R5;
3302	unsigned MovOp = ST->useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
3303	uint64_t AlignedStackSize;
3304
3305	MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
3306	MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
3307	MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();
3308	MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
3309	MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
3310
3311	// Grab everything that reaches PrologueMBB to update there liveness as well.
3312	SmallPtrSet<MachineBasicBlock *, `8`> BeforePrologueRegion;
3313	SmallVector<MachineBasicBlock *, `2`> WalkList;
3314	WalkList.push_back(Elt: &PrologueMBB);
3315
3316	do {
3317	MachineBasicBlock *CurMBB = WalkList.pop_back_val();
3318	for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
3319	if (BeforePrologueRegion.insert(Ptr: PredBB).second)
3320	WalkList.push_back(Elt: PredBB);
3321	}
3322	} while (!WalkList.empty());
3323
3324	// The order in that list is important.
3325	// The blocks will all be inserted before PrologueMBB using that order.
3326	// Therefore the block that should appear first in the CFG should appear
3327	// first in the list.
3328	MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
3329	PostStackMBB};
3330
3331	BeforePrologueRegion.insert_range(R&: AddedBlocks);
3332
3333	for (const auto &LI : PrologueMBB.liveins()) {
3334	for (MachineBasicBlock *PredBB : BeforePrologueRegion)
3335	PredBB->addLiveIn(RegMaskPair: LI);
3336	}
3337
3338	// Remove the newly added blocks from the list, since we know
3339	// we do not have to do the following updates for them.
3340	for (MachineBasicBlock *B : AddedBlocks) {
3341	BeforePrologueRegion.erase(Ptr: B);
3342	MF.insert(MBBI: PrologueMBB.getIterator(), MBB: B);
3343	}
3344
3345	for (MachineBasicBlock *MBB : BeforePrologueRegion) {
3346	// Make sure the LiveIns are still sorted and unique.
3347	MBB->sortUniqueLiveIns();
3348	// Replace the edges to PrologueMBB by edges to the sequences
3349	// we are about to add, but only update for immediate predecessors.
3350	if (MBB->isSuccessor(MBB: &PrologueMBB))
3351	MBB->ReplaceUsesOfBlockWith(Old: &PrologueMBB, New: AddedBlocks[`0`]);
3352	}
3353
3354	// The required stack size that is aligned to ARM constant criterion.
3355	AlignedStackSize = alignToARMConstant(Value: StackSize);
3356
3357	// When the frame size is less than 256 we just compare the stack
3358	// boundary directly to the value of the stack pointer, per gcc.
3359	bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
3360
3361	// We will use two of the callee save registers as scratch registers so we
3362	// need to save those registers onto the stack.
3363	// We will use SR0 to hold stack limit and SR1 to hold the stack size
3364	// requested and arguments for __morestack().
3365	// SR0: Scratch Register #0
3366	// SR1: Scratch Register #1
3367	// push {SR0, SR1}
3368	if (Thumb) {
3369	BuildMI(BB: PrevStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPUSH))
3370	.add(MOs: predOps(Pred: ARMCC::AL))
3371	.addReg(RegNo: ScratchReg0)
3372	.addReg(RegNo: ScratchReg1);
3373	} else {
3374	BuildMI(BB: PrevStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::STMDB_UPD))
3375	.addReg(RegNo: ARM::SP, flags: RegState::Define)
3376	.addReg(RegNo: ARM::SP)
3377	.add(MOs: predOps(Pred: ARMCC::AL))
3378	.addReg(RegNo: ScratchReg0)
3379	.addReg(RegNo: ScratchReg1);
3380	}
3381
3382	// Emit the relevant DWARF information about the change in stack pointer as
3383	// well as where to find both r4 and r5 (the callee-save registers)
3384	if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3385	CFIInstBuilder CFIBuilder(PrevStackMBB, MachineInstr::NoFlags);
3386	CFIBuilder.buildDefCFAOffset(Offset: `8`);
3387	CFIBuilder.buildOffset(Reg: ScratchReg1, Offset: -`4`);
3388	CFIBuilder.buildOffset(Reg: ScratchReg0, Offset: -`8`);
3389	}
3390
3391	// mov SR1, sp
3392	if (Thumb) {
3393	BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ScratchReg1)
3394	.addReg(RegNo: ARM::SP)
3395	.add(MOs: predOps(Pred: ARMCC::AL));
3396	} else if (CompareStackPointer) {
3397	BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVr), DestReg: ScratchReg1)
3398	.addReg(RegNo: ARM::SP)
3399	.add(MOs: predOps(Pred: ARMCC::AL))
3400	.add(MO: condCodeOp());
3401	}
3402
3403	// sub SR1, sp, #StackSize
3404	if (!CompareStackPointer && Thumb) {
3405	if (AlignedStackSize < `256`) {
3406	BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tSUBi8), DestReg: ScratchReg1)
3407	.add(MO: condCodeOp())
3408	.addReg(RegNo: ScratchReg1)
3409	.addImm(Val: AlignedStackSize)
3410	.add(MOs: predOps(Pred: ARMCC::AL));
3411	} else {
3412	if (Thumb2 \|\| ST->genExecuteOnly()) {
3413	BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg0)
3414	.addImm(Val: AlignedStackSize);
3415	} else {
3416	auto MBBI = McrMBB->end();
3417	auto RegInfo = STI.getRegisterInfo();
3418	RegInfo->emitLoadConstPool(MBB&: *McrMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: `0`,
3419	Val: AlignedStackSize);
3420	}
3421	BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tSUBrr), DestReg: ScratchReg1)
3422	.add(MO: condCodeOp())
3423	.addReg(RegNo: ScratchReg1)
3424	.addReg(RegNo: ScratchReg0)
3425	.add(MOs: predOps(Pred: ARMCC::AL));
3426	}
3427	} else if (!CompareStackPointer) {
3428	if (AlignedStackSize < `256`) {
3429	BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::SUBri), DestReg: ScratchReg1)
3430	.addReg(RegNo: ARM::SP)
3431	.addImm(Val: AlignedStackSize)
3432	.add(MOs: predOps(Pred: ARMCC::AL))
3433	.add(MO: condCodeOp());
3434	} else {
3435	auto MBBI = McrMBB->end();
3436	auto RegInfo = STI.getRegisterInfo();
3437	RegInfo->emitLoadConstPool(MBB&: *McrMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: `0`,
3438	Val: AlignedStackSize);
3439	BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::SUBrr), DestReg: ScratchReg1)
3440	.addReg(RegNo: ARM::SP)
3441	.addReg(RegNo: ScratchReg0)
3442	.add(MOs: predOps(Pred: ARMCC::AL))
3443	.add(MO: condCodeOp());
3444	}
3445	}
3446
3447	if (Thumb && ST->isThumb1Only()) {
3448	if (ST->genExecuteOnly()) {
3449	BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg0)
3450	.addExternalSymbol(FnName: "__STACK_LIMIT");
3451	} else {
3452	unsigned PCLabelId = ARMFI->createPICLabelUId();
3453	ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(
3454	C&: MF.getFunction().getContext(), s: "__STACK_LIMIT", ID: PCLabelId, PCAdj: `0`);
3455	MachineConstantPool *MCP = MF.getConstantPool();
3456	unsigned CPI = MCP->getConstantPoolIndex(V: NewCPV, Alignment: Align (`4`));
3457
3458	// ldr SR0, [pc, offset(STACK_LIMIT)]
3459	BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tLDRpci), DestReg: ScratchReg0)
3460	.addConstantPoolIndex(Idx: CPI)
3461	.add(MOs: predOps(Pred: ARMCC::AL));
3462	}
3463
3464	// ldr SR0, [SR0]
3465	BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tLDRi), DestReg: ScratchReg0)
3466	.addReg(RegNo: ScratchReg0)
3467	.addImm(Val: `0`)
3468	.add(MOs: predOps(Pred: ARMCC::AL));
3469	} else {
3470	// Get TLS base address from the coprocessor
3471	// mrc p15, #0, SR0, c13, c0, #3
3472	BuildMI(BB: McrMBB, MIMD: DL, MCID: TII.get(Opcode: Thumb ? ARM::t2MRC : ARM::MRC),
3473	DestReg: ScratchReg0)
3474	.addImm(Val: `15`)
3475	.addImm(Val: `0`)
3476	.addImm(Val: `13`)
3477	.addImm(Val: `0`)
3478	.addImm(Val: `3`)
3479	.add(MOs: predOps(Pred: ARMCC::AL));
3480
3481	// Use the last tls slot on android and a private field of the TCP on linux.
3482	assert(ST->isTargetAndroid() \|\| ST->isTargetLinux());
3483	unsigned TlsOffset = ST->isTargetAndroid() ? `63` : `1`;
3484
3485	// Get the stack limit from the right offset
3486	// ldr SR0, [sr0, #4 TlsOffset]*
3487	BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode: Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
3488	DestReg: ScratchReg0)
3489	.addReg(RegNo: ScratchReg0)
3490	.addImm(Val: `4` * TlsOffset)
3491	.add(MOs: predOps(Pred: ARMCC::AL));
3492	}
3493
3494	// Compare stack limit with stack size requested.
3495	// cmp SR0, SR1
3496	Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
3497	BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode))
3498	.addReg(RegNo: ScratchReg0)
3499	.addReg(RegNo: ScratchReg1)
3500	.add(MOs: predOps(Pred: ARMCC::AL));
3501
3502	// This jump is taken if StackLimit <= SP - stack required.
3503	Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
3504	BuildMI(BB: GetMBB, MIMD: DL, MCID: TII.get(Opcode))
3505	.addMBB(MBB: PostStackMBB)
3506	.addImm(Val: ARMCC::LS)
3507	.addReg(RegNo: ARM::CPSR);
3508
3509	// Calling __morestack(StackSize, Size of stack arguments).
3510	// __morestack knows that the stack size requested is in SR0(r4)
3511	// and amount size of stack arguments is in SR1(r5).
3512
3513	// Pass first argument for the __morestack by Scratch Register #0.
3514	// The amount size of stack required
3515	if (Thumb) {
3516	if (AlignedStackSize < `256`) {
3517	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVi8), DestReg: ScratchReg0)
3518	.add(MO: condCodeOp())
3519	.addImm(Val: AlignedStackSize)
3520	.add(MOs: predOps(Pred: ARMCC::AL));
3521	} else {
3522	if (Thumb2 \|\| ST->genExecuteOnly()) {
3523	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg0)
3524	.addImm(Val: AlignedStackSize);
3525	} else {
3526	auto MBBI = AllocMBB->end();
3527	auto RegInfo = STI.getRegisterInfo();
3528	RegInfo->emitLoadConstPool(MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: `0`,
3529	Val: AlignedStackSize);
3530	}
3531	}
3532	} else {
3533	if (AlignedStackSize < `256`) {
3534	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVi), DestReg: ScratchReg0)
3535	.addImm(Val: AlignedStackSize)
3536	.add(MOs: predOps(Pred: ARMCC::AL))
3537	.add(MO: condCodeOp());
3538	} else {
3539	auto MBBI = AllocMBB->end();
3540	auto RegInfo = STI.getRegisterInfo();
3541	RegInfo->emitLoadConstPool(MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg0, SubIdx: `0`,
3542	Val: AlignedStackSize);
3543	}
3544	}
3545
3546	// Pass second argument for the __morestack by Scratch Register #1.
3547	// The amount size of stack consumed to save function arguments.
3548	if (Thumb) {
3549	if (ARMFI->getArgumentStackSize() < `256`) {
3550	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVi8), DestReg: ScratchReg1)
3551	.add(MO: condCodeOp())
3552	.addImm(Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()))
3553	.add(MOs: predOps(Pred: ARMCC::AL));
3554	} else {
3555	if (Thumb2 \|\| ST->genExecuteOnly()) {
3556	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: MovOp), DestReg: ScratchReg1)
3557	.addImm(Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3558	} else {
3559	auto MBBI = AllocMBB->end();
3560	auto RegInfo = STI.getRegisterInfo();
3561	RegInfo->emitLoadConstPool(
3562	MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg1, SubIdx: `0`,
3563	Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3564	}
3565	}
3566	} else {
3567	if (alignToARMConstant(Value: ARMFI->getArgumentStackSize()) < `256`) {
3568	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::MOVi), DestReg: ScratchReg1)
3569	.addImm(Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()))
3570	.add(MOs: predOps(Pred: ARMCC::AL))
3571	.add(MO: condCodeOp());
3572	} else {
3573	auto MBBI = AllocMBB->end();
3574	auto RegInfo = STI.getRegisterInfo();
3575	RegInfo->emitLoadConstPool(
3576	MBB&: *AllocMBB, MBBI, dl: DL, DestReg: ScratchReg1, SubIdx: `0`,
3577	Val: alignToARMConstant(Value: ARMFI->getArgumentStackSize()));
3578	}
3579	}
3580
3581	// push {lr} - Save return address of this function.
3582	if (Thumb) {
3583	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPUSH))
3584	.add(MOs: predOps(Pred: ARMCC::AL))
3585	.addReg(RegNo: ARM::LR);
3586	} else {
3587	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::STMDB_UPD))
3588	.addReg(RegNo: ARM::SP, flags: RegState::Define)
3589	.addReg(RegNo: ARM::SP)
3590	.add(MOs: predOps(Pred: ARMCC::AL))
3591	.addReg(RegNo: ARM::LR);
3592	}
3593
3594	// Emit the DWARF info about the change in stack as well as where to find the
3595	// previous link register
3596	if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3597	CFIInstBuilder CFIBuilder(AllocMBB, MachineInstr::NoFlags);
3598	CFIBuilder.buildDefCFAOffset(Offset: `12`);
3599	CFIBuilder.buildOffset(Reg: ARM::LR, Offset: -`12`);
3600	}
3601
3602	// Call __morestack().
3603	if (Thumb) {
3604	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tBL))
3605	.add(MOs: predOps(Pred: ARMCC::AL))
3606	.addExternalSymbol(FnName: "__morestack");
3607	} else {
3608	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::BL))
3609	.addExternalSymbol(FnName: "__morestack");
3610	}
3611
3612	// pop {lr} - Restore return address of this original function.
3613	if (Thumb) {
3614	if (ST->isThumb1Only()) {
3615	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP))
3616	.add(MOs: predOps(Pred: ARMCC::AL))
3617	.addReg(RegNo: ScratchReg0);
3618	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::LR)
3619	.addReg(RegNo: ScratchReg0)
3620	.add(MOs: predOps(Pred: ARMCC::AL));
3621	} else {
3622	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::t2LDR_POST))
3623	.addReg(RegNo: ARM::LR, flags: RegState::Define)
3624	.addReg(RegNo: ARM::SP, flags: RegState::Define)
3625	.addReg(RegNo: ARM::SP)
3626	.addImm(Val: `4`)
3627	.add(MOs: predOps(Pred: ARMCC::AL));
3628	}
3629	} else {
3630	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::LDMIA_UPD))
3631	.addReg(RegNo: ARM::SP, flags: RegState::Define)
3632	.addReg(RegNo: ARM::SP)
3633	.add(MOs: predOps(Pred: ARMCC::AL))
3634	.addReg(RegNo: ARM::LR);
3635	}
3636
3637	// Restore SR0 and SR1 in case of __morestack() was called.
3638	// __morestack() will skip PostStackMBB block so we need to restore
3639	// scratch registers from here.
3640	// pop {SR0, SR1}
3641	if (Thumb) {
3642	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP))
3643	.add(MOs: predOps(Pred: ARMCC::AL))
3644	.addReg(RegNo: ScratchReg0)
3645	.addReg(RegNo: ScratchReg1);
3646	} else {
3647	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::LDMIA_UPD))
3648	.addReg(RegNo: ARM::SP, flags: RegState::Define)
3649	.addReg(RegNo: ARM::SP)
3650	.add(MOs: predOps(Pred: ARMCC::AL))
3651	.addReg(RegNo: ScratchReg0)
3652	.addReg(RegNo: ScratchReg1);
3653	}
3654
3655	// Update the CFA offset now that we've popped
3656	if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
3657	CFIInstBuilder (AllocMBB, MachineInstr::NoFlags).buildDefCFAOffset(Offset: `0`);
3658
3659	// Return from this function.
3660	BuildMI(BB: AllocMBB, MIMD: DL, MCID: TII.get(Opcode: ST->getReturnOpcode())).add(MOs: predOps(Pred: ARMCC::AL));
3661
3662	// Restore SR0 and SR1 in case of __morestack() was not called.
3663	// pop {SR0, SR1}
3664	if (Thumb) {
3665	BuildMI(BB: PostStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP))
3666	.add(MOs: predOps(Pred: ARMCC::AL))
3667	.addReg(RegNo: ScratchReg0)
3668	.addReg(RegNo: ScratchReg1);
3669	} else {
3670	BuildMI(BB: PostStackMBB, MIMD: DL, MCID: TII.get(Opcode: ARM::LDMIA_UPD))
3671	.addReg(RegNo: ARM::SP, flags: RegState::Define)
3672	.addReg(RegNo: ARM::SP)
3673	.add(MOs: predOps(Pred: ARMCC::AL))
3674	.addReg(RegNo: ScratchReg0)
3675	.addReg(RegNo: ScratchReg1);
3676	}
3677
3678	// Update the CFA offset now that we've popped
3679	if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3680	CFIInstBuilder CFIBuilder(PostStackMBB, MachineInstr::NoFlags);
3681	CFIBuilder.buildDefCFAOffset(Offset: `0`);
3682
3683	// Tell debuggers that r4 and r5 are now the same as they were in the
3684	// previous function, that they're the "Same Value".
3685	CFIBuilder.buildSameValue(Reg: ScratchReg0);
3686	CFIBuilder.buildSameValue(Reg: ScratchReg1);
3687	}
3688
3689	// Organizing MBB lists
3690	PostStackMBB->addSuccessor(Succ: &PrologueMBB);
3691
3692	AllocMBB->addSuccessor(Succ: PostStackMBB);
3693
3694	GetMBB->addSuccessor(Succ: PostStackMBB);
3695	GetMBB->addSuccessor(Succ: AllocMBB);
3696
3697	McrMBB->addSuccessor(Succ: GetMBB);
3698
3699	PrevStackMBB->addSuccessor(Succ: McrMBB);
3700
3701	#ifdef EXPENSIVE_CHECKS
3702	MF.verify();
3703	#endif
3704	}
3705

Browse the source code of llvm_projects/llvm/lib/Target/ARM/ARMFrameLowering.cpp