AArch64InstrInfo.cpp source code [llvm_projects/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp]

1	//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains the AArch64 implementation of the TargetInstrInfo class.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "AArch64InstrInfo.h"
14	#include "AArch64ExpandImm.h"
15	#include "AArch64MachineFunctionInfo.h"
16	#include "AArch64PointerAuth.h"
17	#include "AArch64Subtarget.h"
18	#include "MCTargetDesc/AArch64AddressingModes.h"
19	#include "MCTargetDesc/AArch64MCTargetDesc.h"
20	#include "Utils/AArch64BaseInfo.h"
21	#include "llvm/ADT/ArrayRef.h"
22	#include "llvm/ADT/STLExtras.h"
23	#include "llvm/ADT/SmallVector.h"
24	#include "llvm/CodeGen/CFIInstBuilder.h"
25	#include "llvm/CodeGen/LivePhysRegs.h"
26	#include "llvm/CodeGen/MachineBasicBlock.h"
27	#include "llvm/CodeGen/MachineCombinerPattern.h"
28	#include "llvm/CodeGen/MachineFrameInfo.h"
29	#include "llvm/CodeGen/MachineFunction.h"
30	#include "llvm/CodeGen/MachineInstr.h"
31	#include "llvm/CodeGen/MachineInstrBuilder.h"
32	#include "llvm/CodeGen/MachineMemOperand.h"
33	#include "llvm/CodeGen/MachineModuleInfo.h"
34	#include "llvm/CodeGen/MachineOperand.h"
35	#include "llvm/CodeGen/MachineRegisterInfo.h"
36	#include "llvm/CodeGen/RegisterScavenging.h"
37	#include "llvm/CodeGen/StackMaps.h"
38	#include "llvm/CodeGen/TargetRegisterInfo.h"
39	#include "llvm/CodeGen/TargetSubtargetInfo.h"
40	#include "llvm/IR/DebugInfoMetadata.h"
41	#include "llvm/IR/DebugLoc.h"
42	#include "llvm/IR/GlobalValue.h"
43	#include "llvm/IR/Module.h"
44	#include "llvm/MC/MCAsmInfo.h"
45	#include "llvm/MC/MCInst.h"
46	#include "llvm/MC/MCInstBuilder.h"
47	#include "llvm/MC/MCInstrDesc.h"
48	#include "llvm/Support/Casting.h"
49	#include "llvm/Support/CodeGen.h"
50	#include "llvm/Support/CommandLine.h"
51	#include "llvm/Support/ErrorHandling.h"
52	#include "llvm/Support/LEB128.h"
53	#include "llvm/Support/MathExtras.h"
54	#include "llvm/Target/TargetMachine.h"
55	#include "llvm/Target/TargetOptions.h"
56	#include <cassert>
57	#include <cstdint>
58	#include <iterator>
59	#include <utility>
60
61	using namespace llvm;
62
63	#define GET_INSTRINFO_CTOR_DTOR
64	#include "AArch64GenInstrInfo.inc"
65
66	static cl::opt<unsigned>
67	CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(Val: `9`),
68	cl::desc ("Restrict range of CB instructions (DEBUG)"));
69
70	static cl::opt<unsigned> TBZDisplacementBits(
71	"aarch64-tbz-offset-bits", cl::Hidden, cl::init(Val: `14`),
72	cl::desc ("Restrict range of TB[N]Z instructions (DEBUG)"));
73
74	static cl::opt<unsigned> CBZDisplacementBits(
75	"aarch64-cbz-offset-bits", cl::Hidden, cl::init(Val: `19`),
76	cl::desc ("Restrict range of CB[N]Z instructions (DEBUG)"));
77
78	static cl::opt<unsigned>
79	BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(Val: `19`),
80	cl::desc ("Restrict range of Bcc instructions (DEBUG)"));
81
82	static cl::opt<unsigned>
83	BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(Val: `26`),
84	cl::desc ("Restrict range of B instructions (DEBUG)"));
85
86	AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
87	: AArch64GenInstrInfo (AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
88	AArch64::CATCHRET),
89	RI (STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}
90
91	/// GetInstSize - Return the number of bytes of code the specified
92	/// instruction may be. This returns the maximum number of bytes.
93	unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
94	const MachineBasicBlock &MBB = *MI.getParent();
95	const MachineFunction *MF = MBB.getParent();
96	const Function &F = MF->getFunction();
97	const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
98
99	{
100	auto Op = MI.getOpcode();
101	if (Op == AArch64::INLINEASM \|\| Op == AArch64::INLINEASM_BR)
102	return getInlineAsmLength(Str: MI.getOperand(i: `0`).getSymbolName(), MAI: *MAI);
103	}
104
105	// Meta-instructions emit no code.
106	if (MI.isMetaInstruction())
107	return `0`;
108
109	// FIXME: We currently only handle pseudoinstructions that don't get expanded
110	// before the assembly printer.
111	unsigned NumBytes = `0`;
112	const MCInstrDesc &Desc = MI.getDesc();
113
114	if (!MI.isBundle() && isTailCallReturnInst(MI)) {
115	NumBytes = Desc.getSize() ? Desc.getSize() : `4`;
116
117	const auto *MFI = MF->getInfo<AArch64FunctionInfo>();
118	if (!MFI->shouldSignReturnAddress(SpillsLR: MF))
119	return NumBytes;
120
121	const auto &STI = MF->getSubtarget<AArch64Subtarget>();
122	auto Method = STI.getAuthenticatedLRCheckMethod(MF: *MF);
123	NumBytes += AArch64PAuth::getCheckerSizeInBytes(Method);
124	return NumBytes;
125	}
126
127	// Size should be preferably set in
128	// llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
129	// Specific cases handle instructions of variable sizes
130	switch (Desc.getOpcode()) {
131	default:
132	if (Desc.getSize())
133	return Desc.getSize();
134
135	// Anything not explicitly designated otherwise (i.e. pseudo-instructions
136	// with fixed constant size but not specified in .td file) is a normal
137	// 4-byte insn.
138	NumBytes = `4`;
139	break;
140	case TargetOpcode::STACKMAP:
141	// The upper bound for a stackmap intrinsic is the full length of its shadow
142	NumBytes = StackMapOpers (&MI).getNumPatchBytes();
143	assert(NumBytes % `4` == `0` && "Invalid number of NOP bytes requested!");
144	break;
145	case TargetOpcode::PATCHPOINT:
146	// The size of the patchpoint intrinsic is the number of bytes requested
147	NumBytes = PatchPointOpers (&MI).getNumPatchBytes();
148	assert(NumBytes % `4` == `0` && "Invalid number of NOP bytes requested!");
149	break;
150	case TargetOpcode::STATEPOINT:
151	NumBytes = StatepointOpers (&MI).getNumPatchBytes();
152	assert(NumBytes % `4` == `0` && "Invalid number of NOP bytes requested!");
153	// No patch bytes means a normal call inst is emitted
154	if (NumBytes == `0`)
155	NumBytes = `4`;
156	break;
157	case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
158	// If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
159	// instructions are expanded to the specified number of NOPs. Otherwise,
160	// they are expanded to 36-byte XRay sleds.
161	NumBytes =
162	F.getFnAttributeAsParsedInteger(Kind: "patchable-function-entry", Default: `9`) * `4`;
163	break;
164	case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
165	case TargetOpcode::PATCHABLE_TAIL_CALL:
166	case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
167	// An XRay sled can be 4 bytes of alignment plus a 32-byte block.
168	NumBytes = `36`;
169	break;
170	case TargetOpcode::PATCHABLE_EVENT_CALL:
171	// EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
172	NumBytes = `24`;
173	break;
174
175	case AArch64::SPACE:
176	NumBytes = MI.getOperand(i: `1`).getImm();
177	break;
178	case TargetOpcode::BUNDLE:
179	NumBytes = getInstBundleLength(MI);
180	break;
181	}
182
183	return NumBytes;
184	}
185
186	unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
187	unsigned Size = `0`;
188	MachineBasicBlock::const_instr_iterator I = MI.getIterator();
189	MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
190	while (++I != E && I ->isInsideBundle()) {
191	assert(!I->isBundle() && "No nested bundle!");
192	Size += getInstSizeInBytes(MI: *I);
193	}
194	return Size;
195	}
196
197	static void parseCondBranch(MachineInstr LastInst, MachineBasicBlock &Target,
198	SmallVectorImpl<MachineOperand> &Cond) {
199	// Block ends with fall-through condbranch.
200	switch (LastInst->getOpcode()) {
201	default:
202	llvm_unreachable("Unknown branch instruction?");
203	case AArch64::Bcc:
204	Target = LastInst->getOperand(i: `1`).getMBB();
205	Cond.push_back(Elt: LastInst->getOperand(i: `0`));
206	break;
207	case AArch64::CBZW:
208	case AArch64::CBZX:
209	case AArch64::CBNZW:
210	case AArch64::CBNZX:
211	Target = LastInst->getOperand(i: `1`).getMBB();
212	Cond.push_back(Elt: MachineOperand::CreateImm(Val: -`1`));
213	Cond.push_back(Elt: MachineOperand::CreateImm(Val: LastInst->getOpcode()));
214	Cond.push_back(Elt: LastInst->getOperand(i: `0`));
215	break;
216	case AArch64::TBZW:
217	case AArch64::TBZX:
218	case AArch64::TBNZW:
219	case AArch64::TBNZX:
220	Target = LastInst->getOperand(i: `2`).getMBB();
221	Cond.push_back(Elt: MachineOperand::CreateImm(Val: -`1`));
222	Cond.push_back(Elt: MachineOperand::CreateImm(Val: LastInst->getOpcode()));
223	Cond.push_back(Elt: LastInst->getOperand(i: `0`));
224	Cond.push_back(Elt: LastInst->getOperand(i: `1`));
225	break;
226	case AArch64::CBWPri:
227	case AArch64::CBXPri:
228	case AArch64::CBWPrr:
229	case AArch64::CBXPrr:
230	Target = LastInst->getOperand(i: `3`).getMBB();
231	Cond.push_back(Elt: MachineOperand::CreateImm(Val: -`1`));
232	Cond.push_back(Elt: MachineOperand::CreateImm(Val: LastInst->getOpcode()));
233	Cond.push_back(Elt: LastInst->getOperand(i: `0`));
234	Cond.push_back(Elt: LastInst->getOperand(i: `1`));
235	Cond.push_back(Elt: LastInst->getOperand(i: `2`));
236	break;
237	}
238	}
239
240	static unsigned getBranchDisplacementBits(unsigned Opc) {
241	switch (Opc) {
242	default:
243	llvm_unreachable("unexpected opcode!");
244	case AArch64::B:
245	return BDisplacementBits;
246	case AArch64::TBNZW:
247	case AArch64::TBZW:
248	case AArch64::TBNZX:
249	case AArch64::TBZX:
250	return TBZDisplacementBits;
251	case AArch64::CBNZW:
252	case AArch64::CBZW:
253	case AArch64::CBNZX:
254	case AArch64::CBZX:
255	return CBZDisplacementBits;
256	case AArch64::Bcc:
257	return BCCDisplacementBits;
258	case AArch64::CBWPri:
259	case AArch64::CBXPri:
260	case AArch64::CBWPrr:
261	case AArch64::CBXPrr:
262	return CBDisplacementBits;
263	}
264	}
265
266	bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
267	int64_t BrOffset) const {
268	unsigned Bits = getBranchDisplacementBits(Opc: BranchOp);
269	assert(Bits >= `3` && "max branch displacement must be enough to jump"
270	"over conditional branch expansion");
271	return isIntN(N: Bits, x: BrOffset / `4`);
272	}
273
274	MachineBasicBlock *
275	AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
276	switch (MI.getOpcode()) {
277	default:
278	llvm_unreachable("unexpected opcode!");
279	case AArch64::B:
280	return MI.getOperand(i: `0`).getMBB();
281	case AArch64::TBZW:
282	case AArch64::TBNZW:
283	case AArch64::TBZX:
284	case AArch64::TBNZX:
285	return MI.getOperand(i: `2`).getMBB();
286	case AArch64::CBZW:
287	case AArch64::CBNZW:
288	case AArch64::CBZX:
289	case AArch64::CBNZX:
290	case AArch64::Bcc:
291	return MI.getOperand(i: `1`).getMBB();
292	case AArch64::CBWPri:
293	case AArch64::CBXPri:
294	case AArch64::CBWPrr:
295	case AArch64::CBXPrr:
296	return MI.getOperand(i: `3`).getMBB();
297	}
298	}
299
300	void AArch64InstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
301	MachineBasicBlock &NewDestBB,
302	MachineBasicBlock &RestoreBB,
303	const DebugLoc &DL,
304	int64_t BrOffset,
305	RegScavenger RS) const* {
306	assert(RS && "RegScavenger required for long branching");
307	assert(MBB.empty() &&
308	"new block should be inserted for expanding unconditional branch");
309	assert(MBB.pred_size() == `1`);
310	assert(RestoreBB.empty() &&
311	"restore block should be inserted for restoring clobbered registers");
312
313	auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
314	// Offsets outside of the signed 33-bit range are not supported for ADRP +
315	// ADD.
316	if (!isInt<`33`>(x: BrOffset))
317	report_fatal_error(
318	reason: "Branch offsets outside of the signed 33-bit range not supported");
319
320	BuildMI(BB&: MBB, I: MBB.end(), MIMD: DL, MCID: get(Opcode: AArch64::ADRP), DestReg: Reg)
321	.addSym(Sym: DestBB.getSymbol(), TargetFlags: AArch64II::MO_PAGE);
322	BuildMI(BB&: MBB, I: MBB.end(), MIMD: DL, MCID: get(Opcode: AArch64::ADDXri), DestReg: Reg)
323	.addReg(RegNo: Reg)
324	.addSym(Sym: DestBB.getSymbol(), TargetFlags: AArch64II::MO_PAGEOFF \| AArch64II::MO_NC)
325	.addImm(Val: `0`);
326	BuildMI(BB&: MBB, I: MBB.end(), MIMD: DL, MCID: get(Opcode: AArch64::BR)).addReg(RegNo: Reg);
327	};
328
329	RS->enterBasicBlockEnd(MBB);
330	// If X16 is unused, we can rely on the linker to insert a range extension
331	// thunk if NewDestBB is out of range of a single B instruction.
332	constexpr Register Reg = AArch64::X16;
333	if (!RS->isRegUsed(Reg)) {
334	insertUnconditionalBranch(MBB, DestBB: &NewDestBB, DL);
335	RS->setRegUsed(Reg);
336	return;
337	}
338
339	// If there's a free register and it's worth inflating the code size,
340	// manually insert the indirect branch.
341	Register Scavenged = RS->FindUnusedReg(RC: &AArch64::GPR64RegClass);
342	if (Scavenged != AArch64::NoRegister &&
343	MBB.getSectionID() == MBBSectionID::ColdSectionID) {
344	buildIndirectBranch (Scavenged, NewDestBB);
345	RS->setRegUsed(Reg: Scavenged);
346	return;
347	}
348
349	// Note: Spilling X16 briefly moves the stack pointer, making it incompatible
350	// with red zones.
351	AArch64FunctionInfo *AFI = MBB.getParent()->getInfo<AArch64FunctionInfo>();
352	if (!AFI \|\| AFI->hasRedZone().value_or(u: true))
353	report_fatal_error(
354	reason: "Unable to insert indirect branch inside function that has red zone");
355
356	// Otherwise, spill X16 and defer range extension to the linker.
357	BuildMI(BB&: MBB, I: MBB.end(), MIMD: DL, MCID: get(Opcode: AArch64::STRXpre))
358	.addReg(RegNo: AArch64::SP, flags: RegState::Define)
359	.addReg(RegNo: Reg)
360	.addReg(RegNo: AArch64::SP)
361	.addImm(Val: -`16`);
362
363	BuildMI(BB&: MBB, I: MBB.end(), MIMD: DL, MCID: get(Opcode: AArch64::B)).addMBB(MBB: &RestoreBB);
364
365	BuildMI(BB&: RestoreBB, I: RestoreBB.end(), MIMD: DL, MCID: get(Opcode: AArch64::LDRXpost))
366	.addReg(RegNo: AArch64::SP, flags: RegState::Define)
367	.addReg(RegNo: Reg, flags: RegState::Define)
368	.addReg(RegNo: AArch64::SP)
369	.addImm(Val: `16`);
370	}
371
372	// Branch analysis.
373	bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
374	MachineBasicBlock *&TBB,
375	MachineBasicBlock *&FBB,
376	SmallVectorImpl<MachineOperand> &Cond,
377	bool AllowModify) const {
378	// If the block has no terminators, it just falls into the block after it.
379	MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
380	if (I == MBB.end())
381	return false;
382
383	// Skip over SpeculationBarrierEndBB terminators
384	if (I ->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB \|\|
385	I ->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
386	--I;
387	}
388
389	if (!isUnpredicatedTerminator(MI: *I))
390	return false;
391
392	// Get the last instruction in the block.
393	MachineInstr LastInst = &I;
394
395	// If there is only one terminator instruction, process it.
396	unsigned LastOpc = LastInst->getOpcode();
397	if (I == MBB.begin() \|\| !isUnpredicatedTerminator(MI: *--I)) {
398	if (isUncondBranchOpcode(Opc: LastOpc)) {
399	TBB = LastInst->getOperand(i: `0`).getMBB();
400	return false;
401	}
402	if (isCondBranchOpcode(Opc: LastOpc)) {
403	// Block ends with fall-through condbranch.
404	parseCondBranch(LastInst, Target&: TBB, Cond);
405	return false;
406	}
407	return true; // Can't handle indirect branch.
408	}
409
410	// Get the instruction before it if it is a terminator.
411	MachineInstr SecondLastInst = &I;
412	unsigned SecondLastOpc = SecondLastInst->getOpcode();
413
414	// If AllowModify is true and the block ends with two or more unconditional
415	// branches, delete all but the first unconditional branch.
416	if (AllowModify && isUncondBranchOpcode(Opc: LastOpc)) {
417	while (isUncondBranchOpcode(Opc: SecondLastOpc)) {
418	LastInst->eraseFromParent();
419	LastInst = SecondLastInst;
420	LastOpc = LastInst->getOpcode();
421	if (I == MBB.begin() \|\| !isUnpredicatedTerminator(MI: *--I)) {
422	// Return now the only terminator is an unconditional branch.
423	TBB = LastInst->getOperand(i: `0`).getMBB();
424	return false;
425	}
426	SecondLastInst = &*I;
427	SecondLastOpc = SecondLastInst->getOpcode();
428	}
429	}
430
431	// If we're allowed to modify and the block ends in a unconditional branch
432	// which could simply fallthrough, remove the branch. (Note: This case only
433	// matters when we can't understand the whole sequence, otherwise it's also
434	// handled by BranchFolding.cpp.)
435	if (AllowModify && isUncondBranchOpcode(Opc: LastOpc) &&
436	MBB.isLayoutSuccessor(MBB: getBranchDestBlock(MI: *LastInst))) {
437	LastInst->eraseFromParent();
438	LastInst = SecondLastInst;
439	LastOpc = LastInst->getOpcode();
440	if (I == MBB.begin() \|\| !isUnpredicatedTerminator(MI: *--I)) {
441	assert(!isUncondBranchOpcode(LastOpc) &&
442	"unreachable unconditional branches removed above");
443
444	if (isCondBranchOpcode(Opc: LastOpc)) {
445	// Block ends with fall-through condbranch.
446	parseCondBranch(LastInst, Target&: TBB, Cond);
447	return false;
448	}
449	return true; // Can't handle indirect branch.
450	}
451	SecondLastInst = &*I;
452	SecondLastOpc = SecondLastInst->getOpcode();
453	}
454
455	// If there are three terminators, we don't know what sort of block this is.
456	if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(MI: *--I))
457	return true;
458
459	// If the block ends with a B and a Bcc, handle it.
460	if (isCondBranchOpcode(Opc: SecondLastOpc) && isUncondBranchOpcode(Opc: LastOpc)) {
461	parseCondBranch(LastInst: SecondLastInst, Target&: TBB, Cond);
462	FBB = LastInst->getOperand(i: `0`).getMBB();
463	return false;
464	}
465
466	// If the block ends with two unconditional branches, handle it. The second
467	// one is not executed, so remove it.
468	if (isUncondBranchOpcode(Opc: SecondLastOpc) && isUncondBranchOpcode(Opc: LastOpc)) {
469	TBB = SecondLastInst->getOperand(i: `0`).getMBB();
470	I = LastInst;
471	if (AllowModify)
472	I ->eraseFromParent();
473	return false;
474	}
475
476	// ...likewise if it ends with an indirect branch followed by an unconditional
477	// branch.
478	if (isIndirectBranchOpcode(Opc: SecondLastOpc) && isUncondBranchOpcode(Opc: LastOpc)) {
479	I = LastInst;
480	if (AllowModify)
481	I ->eraseFromParent();
482	return true;
483	}
484
485	// Otherwise, can't handle this.
486	return true;
487	}
488
489	bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
490	MachineBranchPredicate &MBP,
491	bool AllowModify) const {
492	// For the moment, handle only a block which ends with a cb(n)zx followed by
493	// a fallthrough. Why this? Because it is a common form.
494	// TODO: Should we handle b.cc?
495
496	MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
497	if (I == MBB.end())
498	return true;
499
500	// Skip over SpeculationBarrierEndBB terminators
501	if (I ->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB \|\|
502	I ->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
503	--I;
504	}
505
506	if (!isUnpredicatedTerminator(MI: *I))
507	return true;
508
509	// Get the last instruction in the block.
510	MachineInstr LastInst = &I;
511	unsigned LastOpc = LastInst->getOpcode();
512	if (!isCondBranchOpcode(Opc: LastOpc))
513	return true;
514
515	switch (LastOpc) {
516	default:
517	return true;
518	case AArch64::CBZW:
519	case AArch64::CBZX:
520	case AArch64::CBNZW:
521	case AArch64::CBNZX:
522	break;
523	};
524
525	MBP.TrueDest = LastInst->getOperand(i: `1`).getMBB();
526	assert(MBP.TrueDest && "expected!");
527	MBP.FalseDest = MBB.getNextNode();
528
529	MBP.ConditionDef = nullptr;
530	MBP.SingleUseCondition = false;
531
532	MBP.LHS = LastInst->getOperand(i: `0`);
533	MBP.RHS = MachineOperand::CreateImm(Val: `0`);
534	MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
535	: MachineBranchPredicate::PRED_EQ;
536	return false;
537	}
538
539	bool AArch64InstrInfo::reverseBranchCondition(
540	SmallVectorImpl<MachineOperand> &Cond) const {
541	if (Cond [`0`].getImm() != -`1`) {
542	// Regular Bcc
543	AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond [`0`].getImm();
544	Cond [`0`].setImm(AArch64CC::getInvertedCondCode(Code: CC));
545	} else {
546	// Folded compare-and-branch
547	switch (Cond [`1`].getImm()) {
548	default:
549	llvm_unreachable("Unknown conditional branch!");
550	case AArch64::CBZW:
551	Cond [`1`].setImm(AArch64::CBNZW);
552	break;
553	case AArch64::CBNZW:
554	Cond [`1`].setImm(AArch64::CBZW);
555	break;
556	case AArch64::CBZX:
557	Cond [`1`].setImm(AArch64::CBNZX);
558	break;
559	case AArch64::CBNZX:
560	Cond [`1`].setImm(AArch64::CBZX);
561	break;
562	case AArch64::TBZW:
563	Cond [`1`].setImm(AArch64::TBNZW);
564	break;
565	case AArch64::TBNZW:
566	Cond [`1`].setImm(AArch64::TBZW);
567	break;
568	case AArch64::TBZX:
569	Cond [`1`].setImm(AArch64::TBNZX);
570	break;
571	case AArch64::TBNZX:
572	Cond [`1`].setImm(AArch64::TBZX);
573	break;
574
575	// Cond is { -1, Opcode, CC, Op0, Op1 }
576	case AArch64::CBWPri:
577	case AArch64::CBXPri:
578	case AArch64::CBWPrr:
579	case AArch64::CBXPrr: {
580	// Pseudos using standard 4bit Arm condition codes
581	AArch64CC::CondCode CC =
582	static_cast<AArch64CC::CondCode>(Cond [`2`].getImm());
583	Cond [`2`].setImm(AArch64CC::getInvertedCondCode(Code: CC));
584	}
585	}
586	}
587
588	return false;
589	}
590
591	unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
592	int BytesRemoved) const* {
593	MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
594	if (I == MBB.end())
595	return `0`;
596
597	if (!isUncondBranchOpcode(Opc: I ->getOpcode()) &&
598	!isCondBranchOpcode(Opc: I ->getOpcode()))
599	return `0`;
600
601	// Remove the branch.
602	I ->eraseFromParent();
603
604	I = MBB.end();
605
606	if (I == MBB.begin()) {
607	if (BytesRemoved)
608	*BytesRemoved = `4`;
609	return `1`;
610	}
611	--I;
612	if (!isCondBranchOpcode(Opc: I ->getOpcode())) {
613	if (BytesRemoved)
614	*BytesRemoved = `4`;
615	return `1`;
616	}
617
618	// Remove the branch.
619	I ->eraseFromParent();
620	if (BytesRemoved)
621	*BytesRemoved = `8`;
622
623	return `2`;
624	}
625
626	void AArch64InstrInfo::instantiateCondBranch(
627	MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
628	ArrayRef<MachineOperand> Cond) const {
629	if (Cond [`0`].getImm() != -`1`) {
630	// Regular Bcc
631	BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: AArch64::Bcc)).addImm(Val: Cond [`0`].getImm()).addMBB(MBB: TBB);
632	} else {
633	// Folded compare-and-branch
634	// Note that we use addOperand instead of addReg to keep the flags.
635
636	// cbz, cbnz
637	const MachineInstrBuilder MIB =
638	BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: Cond [`1`].getImm())).add(MO: Cond [`2`]);
639
640	// tbz/tbnz
641	if (Cond.size() > `3`)
642	MIB.add(MO: Cond [`3`]);
643
644	// cb
645	if (Cond.size() > `4`)
646	MIB.add(MO: Cond [`4`]);
647
648	MIB.addMBB(MBB: TBB);
649	}
650	}
651
652	unsigned AArch64InstrInfo::insertBranch(
653	MachineBasicBlock &MBB, MachineBasicBlock TBB, MachineBasicBlock FBB,
654	ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int BytesAdded) const* {
655	// Shouldn't be a fall through.
656	assert(TBB && "insertBranch must not be told to insert a fallthrough");
657
658	if (!FBB) {
659	if (Cond.empty()) // Unconditional branch?
660	BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: AArch64::B)).addMBB(MBB: TBB);
661	else
662	instantiateCondBranch(MBB, DL, TBB, Cond);
663
664	if (BytesAdded)
665	*BytesAdded = `4`;
666
667	return `1`;
668	}
669
670	// Two-way conditional branch.
671	instantiateCondBranch(MBB, DL, TBB, Cond);
672	BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: AArch64::B)).addMBB(MBB: FBB);
673
674	if (BytesAdded)
675	*BytesAdded = `8`;
676
677	return `2`;
678	}
679
680	// Find the original register that VReg is copied from.
681	static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
682	while (Register::isVirtualRegister(Reg: VReg)) {
683	const MachineInstr *DefMI = MRI.getVRegDef(Reg: VReg);
684	if (!DefMI->isFullCopy())
685	return VReg;
686	VReg = DefMI->getOperand(i: `1`).getReg();
687	}
688	return VReg;
689	}
690
691	// Determine if VReg is defined by an instruction that can be folded into a
692	// csel instruction. If so, return the folded opcode, and the replacement
693	// register.
694	static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
695	unsigned NewVReg = nullptr*) {
696	VReg = removeCopies(MRI, VReg);
697	if (!Register::isVirtualRegister(Reg: VReg))
698	return `0`;
699
700	bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(RC: MRI.getRegClass(Reg: VReg));
701	const MachineInstr *DefMI = MRI.getVRegDef(Reg: VReg);
702	unsigned Opc = `0`;
703	unsigned SrcOpNum = `0`;
704	switch (DefMI->getOpcode()) {
705	case AArch64::ADDSXri:
706	case AArch64::ADDSWri:
707	// if NZCV is used, do not fold.
708	if (DefMI->findRegisterDefOperandIdx(Reg: AArch64::NZCV, /TRI=/nullptr,
709	isDead: true) == -`1`)
710	return `0`;
711	// fall-through to ADDXri and ADDWri.
712	[[fallthrough]];
713	case AArch64::ADDXri:
714	case AArch64::ADDWri:
715	// add x, 1 -> csinc.
716	if (!DefMI->getOperand(i: `2`).isImm() \|\| DefMI->getOperand(i: `2`).getImm() != `1` \|\|
717	DefMI->getOperand(i: `3`).getImm() != `0`)
718	return `0`;
719	SrcOpNum = `1`;
720	Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
721	break;
722
723	case AArch64::ORNXrr:
724	case AArch64::ORNWrr: {
725	// not x -> csinv, represented as orn dst, xzr, src.
726	unsigned ZReg = removeCopies(MRI, VReg: DefMI->getOperand(i: `1`).getReg());
727	if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
728	return `0`;
729	SrcOpNum = `2`;
730	Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
731	break;
732	}
733
734	case AArch64::SUBSXrr:
735	case AArch64::SUBSWrr:
736	// if NZCV is used, do not fold.
737	if (DefMI->findRegisterDefOperandIdx(Reg: AArch64::NZCV, /TRI=/nullptr,
738	isDead: true) == -`1`)
739	return `0`;
740	// fall-through to SUBXrr and SUBWrr.
741	[[fallthrough]];
742	case AArch64::SUBXrr:
743	case AArch64::SUBWrr: {
744	// neg x -> csneg, represented as sub dst, xzr, src.
745	unsigned ZReg = removeCopies(MRI, VReg: DefMI->getOperand(i: `1`).getReg());
746	if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
747	return `0`;
748	SrcOpNum = `2`;
749	Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
750	break;
751	}
752	default:
753	return `0`;
754	}
755	assert(Opc && SrcOpNum && "Missing parameters");
756
757	if (NewVReg)
758	*NewVReg = DefMI->getOperand(i: SrcOpNum).getReg();
759	return Opc;
760	}
761
762	bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
763	ArrayRef<MachineOperand> Cond,
764	Register DstReg, Register TrueReg,
765	Register FalseReg, int &CondCycles,
766	int &TrueCycles,
767	int &FalseCycles) const {
768	// Check register classes.
769	const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
770	const TargetRegisterClass *RC =
771	RI.getCommonSubClass(A: MRI.getRegClass(Reg: TrueReg), B: MRI.getRegClass(Reg: FalseReg));
772	if (!RC)
773	return false;
774
775	// Also need to check the dest regclass, in case we're trying to optimize
776	// something like:
777	// %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
778	if (!RI.getCommonSubClass(A: RC, B: MRI.getRegClass(Reg: DstReg)))
779	return false;
780
781	// Expanding cbz/tbz requires an extra cycle of latency on the condition.
782	unsigned ExtraCondLat = Cond.size() != `1`;
783
784	// GPRs are handled by csel.
785	// FIXME: Fold in x+1, -x, and ~x when applicable.
786	if (AArch64::GPR64allRegClass.hasSubClassEq(RC) \|\|
787	AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
788	// Single-cycle csel, csinc, csinv, and csneg.
789	CondCycles = `1` + ExtraCondLat;
790	TrueCycles = FalseCycles = `1`;
791	if (canFoldIntoCSel(MRI, VReg: TrueReg))
792	TrueCycles = `0`;
793	else if (canFoldIntoCSel(MRI, VReg: FalseReg))
794	FalseCycles = `0`;
795	return true;
796	}
797
798	// Scalar floating point is handled by fcsel.
799	// FIXME: Form fabs, fmin, and fmax when applicable.
800	if (AArch64::FPR64RegClass.hasSubClassEq(RC) \|\|
801	AArch64::FPR32RegClass.hasSubClassEq(RC)) {
802	CondCycles = `5` + ExtraCondLat;
803	TrueCycles = FalseCycles = `2`;
804	return true;
805	}
806
807	// Can't do vectors.
808	return false;
809	}
810
811	void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
812	MachineBasicBlock::iterator I,
813	const DebugLoc &DL, Register DstReg,
814	ArrayRef<MachineOperand> Cond,
815	Register TrueReg, Register FalseReg) const {
816	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
817
818	// Parse the condition code, see parseCondBranch() above.
819	AArch64CC::CondCode CC;
820	switch (Cond.size()) {
821	default:
822	llvm_unreachable("Unknown condition opcode in Cond");
823	case `1`: // b.cc
824	CC = AArch64CC::CondCode(Cond [`0`].getImm());
825	break;
826	case `3`: { // cbz/cbnz
827	// We must insert a compare against 0.
828	bool Is64Bit;
829	switch (Cond [`1`].getImm()) {
830	default:
831	llvm_unreachable("Unknown branch opcode in Cond");
832	case AArch64::CBZW:
833	Is64Bit = false;
834	CC = AArch64CC::EQ;
835	break;
836	case AArch64::CBZX:
837	Is64Bit = true;
838	CC = AArch64CC::EQ;
839	break;
840	case AArch64::CBNZW:
841	Is64Bit = false;
842	CC = AArch64CC::NE;
843	break;
844	case AArch64::CBNZX:
845	Is64Bit = true;
846	CC = AArch64CC::NE;
847	break;
848	}
849	Register SrcReg = Cond [`2`].getReg();
850	if (Is64Bit) {
851	// cmp reg, #0 is actually subs xzr, reg, #0.
852	MRI.constrainRegClass(Reg: SrcReg, RC: &AArch64::GPR64spRegClass);
853	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::SUBSXri), DestReg: AArch64::XZR)
854	.addReg(RegNo: SrcReg)
855	.addImm(Val: `0`)
856	.addImm(Val: `0`);
857	} else {
858	MRI.constrainRegClass(Reg: SrcReg, RC: &AArch64::GPR32spRegClass);
859	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::SUBSWri), DestReg: AArch64::WZR)
860	.addReg(RegNo: SrcReg)
861	.addImm(Val: `0`)
862	.addImm(Val: `0`);
863	}
864	break;
865	}
866	case `4`: { // tbz/tbnz
867	// We must insert a tst instruction.
868	switch (Cond [`1`].getImm()) {
869	default:
870	llvm_unreachable("Unknown branch opcode in Cond");
871	case AArch64::TBZW:
872	case AArch64::TBZX:
873	CC = AArch64CC::EQ;
874	break;
875	case AArch64::TBNZW:
876	case AArch64::TBNZX:
877	CC = AArch64CC::NE;
878	break;
879	}
880	// cmp reg, #foo is actually ands xzr, reg, #1<<foo.
881	if (Cond [`1`].getImm() == AArch64::TBZW \|\| Cond [`1`].getImm() == AArch64::TBNZW)
882	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ANDSWri), DestReg: AArch64::WZR)
883	.addReg(RegNo: Cond [`2`].getReg())
884	.addImm(
885	Val: AArch64_AM::encodeLogicalImmediate(imm: `1ull` << Cond [`3`].getImm(), regSize: `32`));
886	else
887	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ANDSXri), DestReg: AArch64::XZR)
888	.addReg(RegNo: Cond [`2`].getReg())
889	.addImm(
890	Val: AArch64_AM::encodeLogicalImmediate(imm: `1ull` << Cond [`3`].getImm(), regSize: `64`));
891	break;
892	}
893	case `5`: { // cb
894	// We must insert a cmp, that is a subs
895	// 0 1 2 3 4
896	// Cond is { -1, Opcode, CC, Op0, Op1 }
897	unsigned SUBSOpC, SUBSDestReg;
898	bool IsImm = false;
899	CC = static_cast<AArch64CC::CondCode>(Cond [`2`].getImm());
900	switch (Cond [`1`].getImm()) {
901	default:
902	llvm_unreachable("Unknown branch opcode in Cond");
903	case AArch64::CBWPri:
904	SUBSOpC = AArch64::SUBSWri;
905	SUBSDestReg = AArch64::WZR;
906	IsImm = true;
907	break;
908	case AArch64::CBXPri:
909	SUBSOpC = AArch64::SUBSXri;
910	SUBSDestReg = AArch64::XZR;
911	IsImm = true;
912	break;
913	case AArch64::CBWPrr:
914	SUBSOpC = AArch64::SUBSWrr;
915	SUBSDestReg = AArch64::WZR;
916	IsImm = false;
917	break;
918	case AArch64::CBXPrr:
919	SUBSOpC = AArch64::SUBSXrr;
920	SUBSDestReg = AArch64::XZR;
921	IsImm = false;
922	break;
923	}
924
925	if (IsImm)
926	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: SUBSOpC), DestReg: SUBSDestReg)
927	.addReg(RegNo: Cond [`3`].getReg())
928	.addImm(Val: Cond [`4`].getImm())
929	.addImm(Val: `0`);
930	else
931	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: SUBSOpC), DestReg: SUBSDestReg)
932	.addReg(RegNo: Cond [`3`].getReg())
933	.addReg(RegNo: Cond [`4`].getReg());
934	}
935	}
936
937	unsigned Opc = `0`;
938	const TargetRegisterClass RC = nullptr*;
939	bool TryFold = false;
940	if (MRI.constrainRegClass(Reg: DstReg, RC: &AArch64::GPR64RegClass)) {
941	RC = &AArch64::GPR64RegClass;
942	Opc = AArch64::CSELXr;
943	TryFold = true;
944	} else if (MRI.constrainRegClass(Reg: DstReg, RC: &AArch64::GPR32RegClass)) {
945	RC = &AArch64::GPR32RegClass;
946	Opc = AArch64::CSELWr;
947	TryFold = true;
948	} else if (MRI.constrainRegClass(Reg: DstReg, RC: &AArch64::FPR64RegClass)) {
949	RC = &AArch64::FPR64RegClass;
950	Opc = AArch64::FCSELDrrr;
951	} else if (MRI.constrainRegClass(Reg: DstReg, RC: &AArch64::FPR32RegClass)) {
952	RC = &AArch64::FPR32RegClass;
953	Opc = AArch64::FCSELSrrr;
954	}
955	assert(RC && "Unsupported regclass");
956
957	// Try folding simple instructions into the csel.
958	if (TryFold) {
959	unsigned NewVReg = `0`;
960	unsigned FoldedOpc = canFoldIntoCSel(MRI, VReg: TrueReg, NewVReg: &NewVReg);
961	if (FoldedOpc) {
962	// The folded opcodes csinc, csinc and csneg apply the operation to
963	// FalseReg, so we need to invert the condition.
964	CC = AArch64CC::getInvertedCondCode(Code: CC);
965	TrueReg = FalseReg;
966	} else
967	FoldedOpc = canFoldIntoCSel(MRI, VReg: FalseReg, NewVReg: &NewVReg);
968
969	// Fold the operation. Leave any dead instructions for DCE to clean up.
970	if (FoldedOpc) {
971	FalseReg = NewVReg;
972	Opc = FoldedOpc;
973	// The extends the live range of NewVReg.
974	MRI.clearKillFlags(Reg: NewVReg);
975	}
976	}
977
978	// Pull all virtual register into the appropriate class.
979	MRI.constrainRegClass(Reg: TrueReg, RC);
980	MRI.constrainRegClass(Reg: FalseReg, RC);
981
982	// Insert the csel.
983	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: Opc), DestReg: DstReg)
984	.addReg(RegNo: TrueReg)
985	.addReg(RegNo: FalseReg)
986	.addImm(Val: CC);
987	}
988
989	// Return true if Imm can be loaded into a register by a "cheap" sequence of
990	// instructions. For now, "cheap" means at most two instructions.
991	static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
992	if (BitSize == `32`)
993	return true;
994
995	assert(BitSize == `64` && "Only bit sizes of 32 or 64 allowed");
996	uint64_t Imm = static_cast<uint64_t>(MI.getOperand(i: `1`).getImm());
997	SmallVector<AArch64_IMM::ImmInsnModel, `4`> Is;
998	AArch64_IMM::expandMOVImm(Imm, BitSize, Insn&: Is);
999
1000	return Is.size() <= `2`;
1001	}
1002
1003	// FIXME: this implementation should be micro-architecture dependent, so a
1004	// micro-architecture target hook should be introduced here in future.
1005	bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
1006	if (Subtarget.hasExynosCheapAsMoveHandling()) {
1007	if (isExynosCheapAsMove(MI))
1008	return true;
1009	return MI.isAsCheapAsAMove();
1010	}
1011
1012	switch (MI.getOpcode()) {
1013	default:
1014	return MI.isAsCheapAsAMove();
1015
1016	case AArch64::ADDWrs:
1017	case AArch64::ADDXrs:
1018	case AArch64::SUBWrs:
1019	case AArch64::SUBXrs:
1020	return Subtarget.hasALULSLFast() && MI.getOperand(i: `3`).getImm() <= `4`;
1021
1022	// If MOVi32imm or MOVi64imm can be expanded into ORRWri or
1023	// ORRXri, it is as cheap as MOV.
1024	// Likewise if it can be expanded to MOVZ/MOVN/MOVK.
1025	case AArch64::MOVi32imm:
1026	return isCheapImmediate(MI, BitSize: `32`);
1027	case AArch64::MOVi64imm:
1028	return isCheapImmediate(MI, BitSize: `64`);
1029	}
1030	}
1031
1032	bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
1033	switch (MI.getOpcode()) {
1034	default:
1035	return false;
1036
1037	case AArch64::ADDWrs:
1038	case AArch64::ADDXrs:
1039	case AArch64::ADDSWrs:
1040	case AArch64::ADDSXrs: {
1041	unsigned Imm = MI.getOperand(i: `3`).getImm();
1042	unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1043	if (ShiftVal == `0`)
1044	return true;
1045	return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= `5`;
1046	}
1047
1048	case AArch64::ADDWrx:
1049	case AArch64::ADDXrx:
1050	case AArch64::ADDXrx64:
1051	case AArch64::ADDSWrx:
1052	case AArch64::ADDSXrx:
1053	case AArch64::ADDSXrx64: {
1054	unsigned Imm = MI.getOperand(i: `3`).getImm();
1055	switch (AArch64_AM::getArithExtendType(Imm)) {
1056	default:
1057	return false;
1058	case AArch64_AM::UXTB:
1059	case AArch64_AM::UXTH:
1060	case AArch64_AM::UXTW:
1061	case AArch64_AM::UXTX:
1062	return AArch64_AM::getArithShiftValue(Imm) <= `4`;
1063	}
1064	}
1065
1066	case AArch64::SUBWrs:
1067	case AArch64::SUBSWrs: {
1068	unsigned Imm = MI.getOperand(i: `3`).getImm();
1069	unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1070	return ShiftVal == `0` \|\|
1071	(AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == `31`);
1072	}
1073
1074	case AArch64::SUBXrs:
1075	case AArch64::SUBSXrs: {
1076	unsigned Imm = MI.getOperand(i: `3`).getImm();
1077	unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1078	return ShiftVal == `0` \|\|
1079	(AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == `63`);
1080	}
1081
1082	case AArch64::SUBWrx:
1083	case AArch64::SUBXrx:
1084	case AArch64::SUBXrx64:
1085	case AArch64::SUBSWrx:
1086	case AArch64::SUBSXrx:
1087	case AArch64::SUBSXrx64: {
1088	unsigned Imm = MI.getOperand(i: `3`).getImm();
1089	switch (AArch64_AM::getArithExtendType(Imm)) {
1090	default:
1091	return false;
1092	case AArch64_AM::UXTB:
1093	case AArch64_AM::UXTH:
1094	case AArch64_AM::UXTW:
1095	case AArch64_AM::UXTX:
1096	return AArch64_AM::getArithShiftValue(Imm) == `0`;
1097	}
1098	}
1099
1100	case AArch64::LDRBBroW:
1101	case AArch64::LDRBBroX:
1102	case AArch64::LDRBroW:
1103	case AArch64::LDRBroX:
1104	case AArch64::LDRDroW:
1105	case AArch64::LDRDroX:
1106	case AArch64::LDRHHroW:
1107	case AArch64::LDRHHroX:
1108	case AArch64::LDRHroW:
1109	case AArch64::LDRHroX:
1110	case AArch64::LDRQroW:
1111	case AArch64::LDRQroX:
1112	case AArch64::LDRSBWroW:
1113	case AArch64::LDRSBWroX:
1114	case AArch64::LDRSBXroW:
1115	case AArch64::LDRSBXroX:
1116	case AArch64::LDRSHWroW:
1117	case AArch64::LDRSHWroX:
1118	case AArch64::LDRSHXroW:
1119	case AArch64::LDRSHXroX:
1120	case AArch64::LDRSWroW:
1121	case AArch64::LDRSWroX:
1122	case AArch64::LDRSroW:
1123	case AArch64::LDRSroX:
1124	case AArch64::LDRWroW:
1125	case AArch64::LDRWroX:
1126	case AArch64::LDRXroW:
1127	case AArch64::LDRXroX:
1128	case AArch64::PRFMroW:
1129	case AArch64::PRFMroX:
1130	case AArch64::STRBBroW:
1131	case AArch64::STRBBroX:
1132	case AArch64::STRBroW:
1133	case AArch64::STRBroX:
1134	case AArch64::STRDroW:
1135	case AArch64::STRDroX:
1136	case AArch64::STRHHroW:
1137	case AArch64::STRHHroX:
1138	case AArch64::STRHroW:
1139	case AArch64::STRHroX:
1140	case AArch64::STRQroW:
1141	case AArch64::STRQroX:
1142	case AArch64::STRSroW:
1143	case AArch64::STRSroX:
1144	case AArch64::STRWroW:
1145	case AArch64::STRWroX:
1146	case AArch64::STRXroW:
1147	case AArch64::STRXroX: {
1148	unsigned IsSigned = MI.getOperand(i: `3`).getImm();
1149	return !IsSigned;
1150	}
1151	}
1152	}
1153
1154	bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
1155	unsigned Opc = MI.getOpcode();
1156	switch (Opc) {
1157	default:
1158	return false;
1159	case AArch64::SEH_StackAlloc:
1160	case AArch64::SEH_SaveFPLR:
1161	case AArch64::SEH_SaveFPLR_X:
1162	case AArch64::SEH_SaveReg:
1163	case AArch64::SEH_SaveReg_X:
1164	case AArch64::SEH_SaveRegP:
1165	case AArch64::SEH_SaveRegP_X:
1166	case AArch64::SEH_SaveFReg:
1167	case AArch64::SEH_SaveFReg_X:
1168	case AArch64::SEH_SaveFRegP:
1169	case AArch64::SEH_SaveFRegP_X:
1170	case AArch64::SEH_SetFP:
1171	case AArch64::SEH_AddFP:
1172	case AArch64::SEH_Nop:
1173	case AArch64::SEH_PrologEnd:
1174	case AArch64::SEH_EpilogStart:
1175	case AArch64::SEH_EpilogEnd:
1176	case AArch64::SEH_PACSignLR:
1177	case AArch64::SEH_SaveAnyRegQP:
1178	case AArch64::SEH_SaveAnyRegQPX:
1179	case AArch64::SEH_AllocZ:
1180	case AArch64::SEH_SaveZReg:
1181	case AArch64::SEH_SavePReg:
1182	return true;
1183	}
1184	}
1185
1186	bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
1187	Register &SrcReg, Register &DstReg,
1188	unsigned &SubIdx) const {
1189	switch (MI.getOpcode()) {
1190	default:
1191	return false;
1192	case AArch64::SBFMXri: // aka sxtw
1193	case AArch64::UBFMXri: // aka uxtw
1194	// Check for the 32 -> 64 bit extension case, these instructions can do
1195	// much more.
1196	if (MI.getOperand(i: `2`).getImm() != `0` \|\| MI.getOperand(i: `3`).getImm() != `31`)
1197	return false;
1198	// This is a signed or unsigned 32 -> 64 bit extension.
1199	SrcReg = MI.getOperand(i: `1`).getReg();
1200	DstReg = MI.getOperand(i: `0`).getReg();
1201	SubIdx = AArch64::sub_32;
1202	return true;
1203	}
1204	}
1205
1206	bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
1207	const MachineInstr &MIa, const MachineInstr &MIb) const {
1208	const TargetRegisterInfo *TRI = &getRegisterInfo();
1209	const MachineOperand BaseOpA = nullptr, BaseOpB = nullptr;
1210	int64_t OffsetA = `0`, OffsetB = `0`;
1211	TypeSize WidthA(`0`, false), WidthB(`0`, false);
1212	bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1213
1214	assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1215	assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1216
1217	if (MIa.hasUnmodeledSideEffects() \|\| MIb.hasUnmodeledSideEffects() \|\|
1218	MIa.hasOrderedMemoryRef() \|\| MIb.hasOrderedMemoryRef())
1219	return false;
1220
1221	// Retrieve the base, offset from the base and width. Width
1222	// is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1223	// base are identical, and the offset of a lower memory access +
1224	// the width doesn't overlap the offset of a higher memory access,
1225	// then the memory accesses are different.
1226	// If OffsetAIsScalable and OffsetBIsScalable are both true, they
1227	// are assumed to have the same scale (vscale).
1228	if (getMemOperandWithOffsetWidth(MI: MIa, BaseOp&: BaseOpA, Offset&: OffsetA, OffsetIsScalable&: OffsetAIsScalable,
1229	Width&: WidthA, TRI) &&
1230	getMemOperandWithOffsetWidth(MI: MIb, BaseOp&: BaseOpB, Offset&: OffsetB, OffsetIsScalable&: OffsetBIsScalable,
1231	Width&: WidthB, TRI)) {
1232	if (BaseOpA->isIdenticalTo(Other: *BaseOpB) &&
1233	OffsetAIsScalable == OffsetBIsScalable) {
1234	int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1235	int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1236	TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1237	if (LowWidth.isScalable() == OffsetAIsScalable &&
1238	LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1239	return true;
1240	}
1241	}
1242	return false;
1243	}
1244
1245	bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
1246	const MachineBasicBlock *MBB,
1247	const MachineFunction &MF) const {
1248	if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
1249	return true;
1250
1251	// Do not move an instruction that can be recognized as a branch target.
1252	if (hasBTISemantics(MI))
1253	return true;
1254
1255	switch (MI.getOpcode()) {
1256	case AArch64::HINT:
1257	// CSDB hints are scheduling barriers.
1258	if (MI.getOperand(i: `0`).getImm() == `0x14`)
1259	return true;
1260	break;
1261	case AArch64::DSB:
1262	case AArch64::ISB:
1263	// DSB and ISB also are scheduling barriers.
1264	return true;
1265	case AArch64::MSRpstatesvcrImm1:
1266	// SMSTART and SMSTOP are also scheduling barriers.
1267	return true;
1268	default:;
1269	}
1270	if (isSEHInstruction(MI))
1271	return true;
1272	auto Next = std::next(x: MI.getIterator());
1273	return Next != MBB->end() && Next ->isCFIInstruction();
1274	}
1275
1276	/// analyzeCompare - For a comparison instruction, return the source registers
1277	/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1278	/// Return true if the comparison instruction can be analyzed.
1279	bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
1280	Register &SrcReg2, int64_t &CmpMask,
1281	int64_t &CmpValue) const {
1282	// The first operand can be a frame index where we'd normally expect a
1283	// register.
1284	// FIXME: Pass subregisters out of analyzeCompare
1285	assert(MI.getNumOperands() >= `2` && "All AArch64 cmps should have 2 operands");
1286	if (!MI.getOperand(i: `1`).isReg() \|\| MI.getOperand(i: `1`).getSubReg())
1287	return false;
1288
1289	switch (MI.getOpcode()) {
1290	default:
1291	break;
1292	case AArch64::PTEST_PP:
1293	case AArch64::PTEST_PP_ANY:
1294	SrcReg = MI.getOperand(i: `0`).getReg();
1295	SrcReg2 = MI.getOperand(i: `1`).getReg();
1296	if (MI.getOperand(i: `2`).getSubReg())
1297	return false;
1298
1299	// Not sure about the mask and value for now...
1300	CmpMask = ~`0`;
1301	CmpValue = `0`;
1302	return true;
1303	case AArch64::SUBSWrr:
1304	case AArch64::SUBSWrs:
1305	case AArch64::SUBSWrx:
1306	case AArch64::SUBSXrr:
1307	case AArch64::SUBSXrs:
1308	case AArch64::SUBSXrx:
1309	case AArch64::ADDSWrr:
1310	case AArch64::ADDSWrs:
1311	case AArch64::ADDSWrx:
1312	case AArch64::ADDSXrr:
1313	case AArch64::ADDSXrs:
1314	case AArch64::ADDSXrx:
1315	// Replace SUBSWrr with SUBWrr if NZCV is not used.
1316	SrcReg = MI.getOperand(i: `1`).getReg();
1317	SrcReg2 = MI.getOperand(i: `2`).getReg();
1318
1319	// FIXME: Pass subregisters out of analyzeCompare
1320	if (MI.getOperand(i: `2`).getSubReg())
1321	return false;
1322
1323	CmpMask = ~`0`;
1324	CmpValue = `0`;
1325	return true;
1326	case AArch64::SUBSWri:
1327	case AArch64::ADDSWri:
1328	case AArch64::SUBSXri:
1329	case AArch64::ADDSXri:
1330	SrcReg = MI.getOperand(i: `1`).getReg();
1331	SrcReg2 = `0`;
1332	CmpMask = ~`0`;
1333	CmpValue = MI.getOperand(i: `2`).getImm();
1334	return true;
1335	case AArch64::ANDSWri:
1336	case AArch64::ANDSXri:
1337	// ANDS does not use the same encoding scheme as the others xxxS
1338	// instructions.
1339	SrcReg = MI.getOperand(i: `1`).getReg();
1340	SrcReg2 = `0`;
1341	CmpMask = ~`0`;
1342	CmpValue = AArch64_AM::decodeLogicalImmediate(
1343	val: MI.getOperand(i: `2`).getImm(),
1344	regSize: MI.getOpcode() == AArch64::ANDSWri ? `32` : `64`);
1345	return true;
1346	}
1347
1348	return false;
1349	}
1350
1351	static bool UpdateOperandRegClass(MachineInstr &Instr) {
1352	MachineBasicBlock *MBB = Instr.getParent();
1353	assert(MBB && "Can't get MachineBasicBlock here");
1354	MachineFunction *MF = MBB->getParent();
1355	assert(MF && "Can't get MachineFunction here");
1356	const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1357	const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1358	MachineRegisterInfo *MRI = &MF->getRegInfo();
1359
1360	for (unsigned OpIdx = `0`, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1361	++OpIdx) {
1362	MachineOperand &MO = Instr.getOperand(i: OpIdx);
1363	const TargetRegisterClass *OpRegCstraints =
1364	Instr.getRegClassConstraint(OpIdx, TII, TRI);
1365
1366	// If there's no constraint, there's nothing to do.
1367	if (!OpRegCstraints)
1368	continue;
1369	// If the operand is a frame index, there's nothing to do here.
1370	// A frame index operand will resolve correctly during PEI.
1371	if (MO.isFI())
1372	continue;
1373
1374	assert(MO.isReg() &&
1375	"Operand has register constraints without being a register!");
1376
1377	Register Reg = MO.getReg();
1378	if (Reg.isPhysical()) {
1379	if (!OpRegCstraints->contains(Reg))
1380	return false;
1381	} else if (!OpRegCstraints->hasSubClassEq(RC: MRI->getRegClass(Reg)) &&
1382	!MRI->constrainRegClass(Reg, RC: OpRegCstraints))
1383	return false;
1384	}
1385
1386	return true;
1387	}
1388
1389	/// Return the opcode that does not set flags when possible - otherwise
1390	/// return the original opcode. The caller is responsible to do the actual
1391	/// substitution and legality checking.
1392	static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1393	// Don't convert all compare instructions, because for some the zero register
1394	// encoding becomes the sp register.
1395	bool MIDefinesZeroReg = false;
1396	if (MI.definesRegister(Reg: AArch64::WZR, /TRI=/nullptr) \|\|
1397	MI.definesRegister(Reg: AArch64::XZR, /TRI=/nullptr))
1398	MIDefinesZeroReg = true;
1399
1400	switch (MI.getOpcode()) {
1401	default:
1402	return MI.getOpcode();
1403	case AArch64::ADDSWrr:
1404	return AArch64::ADDWrr;
1405	case AArch64::ADDSWri:
1406	return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1407	case AArch64::ADDSWrs:
1408	return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1409	case AArch64::ADDSWrx:
1410	return AArch64::ADDWrx;
1411	case AArch64::ADDSXrr:
1412	return AArch64::ADDXrr;
1413	case AArch64::ADDSXri:
1414	return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1415	case AArch64::ADDSXrs:
1416	return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1417	case AArch64::ADDSXrx:
1418	return AArch64::ADDXrx;
1419	case AArch64::SUBSWrr:
1420	return AArch64::SUBWrr;
1421	case AArch64::SUBSWri:
1422	return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1423	case AArch64::SUBSWrs:
1424	return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1425	case AArch64::SUBSWrx:
1426	return AArch64::SUBWrx;
1427	case AArch64::SUBSXrr:
1428	return AArch64::SUBXrr;
1429	case AArch64::SUBSXri:
1430	return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1431	case AArch64::SUBSXrs:
1432	return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1433	case AArch64::SUBSXrx:
1434	return AArch64::SUBXrx;
1435	}
1436	}
1437
1438	enum AccessKind { AK_Write = `0x01`, AK_Read = `0x10`, AK_All = `0x11` };
1439
1440	/// True when condition flags are accessed (either by writing or reading)
1441	/// on the instruction trace starting at From and ending at To.
1442	///
1443	/// Note: If From and To are from different blocks it's assumed CC are accessed
1444	/// on the path.
1445	static bool areCFlagsAccessedBetweenInstrs(
1446	MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
1447	const TargetRegisterInfo TRI, const* AccessKind AccessToCheck = AK_All) {
1448	// Early exit if To is at the beginning of the BB.
1449	if (To == To ->getParent()->begin())
1450	return true;
1451
1452	// Check whether the instructions are in the same basic block
1453	// If not, assume the condition flags might get modified somewhere.
1454	if (To ->getParent() != From ->getParent())
1455	return true;
1456
1457	// From must be above To.
1458	assert(std::any_of(
1459	++To.getReverse(), To->getParent()->rend(),
1460	[From](MachineInstr &MI) { return MI.getIterator() == From; }));
1461
1462	// We iterate backward starting at \p To until we hit \p From.
1463	for (const MachineInstr &Instr :
1464	instructionsWithoutDebug(It: ++To.getReverse(), End: From.getReverse())) {
1465	if (((AccessToCheck & AK_Write) &&
1466	Instr.modifiesRegister(Reg: AArch64::NZCV, TRI)) \|\|
1467	((AccessToCheck & AK_Read) && Instr.readsRegister(Reg: AArch64::NZCV, TRI)))
1468	return true;
1469	}
1470	return false;
1471	}
1472
1473	std::optional<unsigned>
1474	AArch64InstrInfo::canRemovePTestInstr(MachineInstr PTest, MachineInstr Mask,
1475	MachineInstr *Pred,
1476	const MachineRegisterInfo MRI) const* {
1477	unsigned MaskOpcode = Mask->getOpcode();
1478	unsigned PredOpcode = Pred->getOpcode();
1479	bool PredIsPTestLike = isPTestLikeOpcode(Opc: PredOpcode);
1480	bool PredIsWhileLike = isWhileOpcode(Opc: PredOpcode);
1481
1482	if (PredIsWhileLike) {
1483	// For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1484	// instruction and the condition is "any" since WHILcc does an implicit
1485	// PTEST(ALL, PG) check and PG is always a subset of ALL.
1486	if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1487	return PredOpcode;
1488
1489	// For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1490	// redundant since WHILE performs an implicit PTEST with an all active
1491	// mask.
1492	if (isPTrueOpcode(Opc: MaskOpcode) && Mask->getOperand(i: `1`).getImm() == `31` &&
1493	getElementSizeForOpcode(Opc: MaskOpcode) ==
1494	getElementSizeForOpcode(Opc: PredOpcode))
1495	return PredOpcode;
1496
1497	return {};
1498	}
1499
1500	if (PredIsPTestLike) {
1501	// For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1502	// instruction that sets the flags as PTEST would and the condition is
1503	// "any" since PG is always a subset of the governing predicate of the
1504	// ptest-like instruction.
1505	if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1506	return PredOpcode;
1507
1508	auto PTestLikeMask = MRI->getUniqueVRegDef(Reg: Pred->getOperand(i: `1`).getReg());
1509
1510	// If the PTEST like instruction's general predicate is not `Mask`, attempt
1511	// to look through a copy and try again. This is because some instructions
1512	// take a predicate whose register class is a subset of its result class.
1513	if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&
1514	PTestLikeMask->getOperand(i: `1`).getReg().isVirtual())
1515	PTestLikeMask =
1516	MRI->getUniqueVRegDef(Reg: PTestLikeMask->getOperand(i: `1`).getReg());
1517
1518	// For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1519	// the element size matches and either the PTEST_LIKE instruction uses
1520	// the same all active mask or the condition is "any".
1521	if (isPTrueOpcode(Opc: MaskOpcode) && Mask->getOperand(i: `1`).getImm() == `31` &&
1522	getElementSizeForOpcode(Opc: MaskOpcode) ==
1523	getElementSizeForOpcode(Opc: PredOpcode)) {
1524	if (Mask == PTestLikeMask \|\| PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1525	return PredOpcode;
1526	}
1527
1528	// For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1529	// flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1530	// on 8-bit predicates like the PTEST. Otherwise, for instructions like
1531	// compare that also support 16/32/64-bit predicates, the implicit PTEST
1532	// performed by the compare could consider fewer lanes for these element
1533	// sizes.
1534	//
1535	// For example, consider
1536	//
1537	// ptrue p0.b ; P0=1111-1111-1111-1111
1538	// index z0.s, #0, #1 ; Z0=<0,1,2,3>
1539	// index z1.s, #1, #1 ; Z1=<1,2,3,4>
1540	// cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1541	// ; ^ last active
1542	// ptest p0, p1.b ; P1=0001-0001-0001-0001
1543	// ; ^ last active
1544	//
1545	// where the compare generates a canonical all active 32-bit predicate
1546	// (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1547	// active flag, whereas the PTEST instruction with the same mask doesn't.
1548	// For PTEST_ANY this doesn't apply as the flags in this case would be
1549	// identical regardless of element size.
1550	uint64_t PredElementSize = getElementSizeForOpcode(Opc: PredOpcode);
1551	if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB \|\|
1552	PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1553	return PredOpcode;
1554
1555	return {};
1556	}
1557
1558	// If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1559	// opcode so the PTEST becomes redundant.
1560	switch (PredOpcode) {
1561	case AArch64::AND_PPzPP:
1562	case AArch64::BIC_PPzPP:
1563	case AArch64::EOR_PPzPP:
1564	case AArch64::NAND_PPzPP:
1565	case AArch64::NOR_PPzPP:
1566	case AArch64::ORN_PPzPP:
1567	case AArch64::ORR_PPzPP:
1568	case AArch64::BRKA_PPzP:
1569	case AArch64::BRKPA_PPzPP:
1570	case AArch64::BRKB_PPzP:
1571	case AArch64::BRKPB_PPzPP:
1572	case AArch64::RDFFR_PPz: {
1573	// Check to see if our mask is the same. If not the resulting flag bits
1574	// may be different and we can't remove the ptest.
1575	auto *PredMask = MRI->getUniqueVRegDef(Reg: Pred->getOperand(i: `1`).getReg());
1576	if (Mask != PredMask)
1577	return {};
1578	break;
1579	}
1580	case AArch64::BRKN_PPzP: {
1581	// BRKN uses an all active implicit mask to set flags unlike the other
1582	// flag-setting instructions.
1583	// PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1584	if ((MaskOpcode != AArch64::PTRUE_B) \|\|
1585	(Mask->getOperand(i: `1`).getImm() != `31`))
1586	return {};
1587	break;
1588	}
1589	case AArch64::PTRUE_B:
1590	// PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1591	break;
1592	default:
1593	// Bail out if we don't recognize the input
1594	return {};
1595	}
1596
1597	return convertToFlagSettingOpc(Opc: PredOpcode);
1598	}
1599
1600	/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1601	/// operation which could set the flags in an identical manner
1602	bool AArch64InstrInfo::optimizePTestInstr(
1603	MachineInstr PTest, unsigned* MaskReg, unsigned PredReg,
1604	const MachineRegisterInfo MRI) const* {
1605	auto *Mask = MRI->getUniqueVRegDef(Reg: MaskReg);
1606	auto *Pred = MRI->getUniqueVRegDef(Reg: PredReg);
1607	unsigned PredOpcode = Pred->getOpcode();
1608	auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1609	if (!NewOp)
1610	return false;
1611
1612	const TargetRegisterInfo *TRI = &getRegisterInfo();
1613
1614	// If another instruction between Pred and PTest accesses flags, don't remove
1615	// the ptest or update the earlier instruction to modify them.
1616	if (areCFlagsAccessedBetweenInstrs(From: Pred, To: PTest, TRI))
1617	return false;
1618
1619	// If we pass all the checks, it's safe to remove the PTEST and use the flags
1620	// as they are prior to PTEST. Sometimes this requires the tested PTEST
1621	// operand to be replaced with an equivalent instruction that also sets the
1622	// flags.
1623	PTest->eraseFromParent();
1624	if (*NewOp != PredOpcode) {
1625	Pred->setDesc(get(Opcode: *NewOp));
1626	bool succeeded = UpdateOperandRegClass(Instr&: *Pred);
1627	(void)succeeded;
1628	assert(succeeded && "Operands have incompatible register classes!");
1629	Pred->addRegisterDefined(Reg: AArch64::NZCV, RegInfo: TRI);
1630	}
1631
1632	// Ensure that the flags def is live.
1633	if (Pred->registerDefIsDead(Reg: AArch64::NZCV, TRI)) {
1634	unsigned i = `0`, e = Pred->getNumOperands();
1635	for (; i != e; ++i) {
1636	MachineOperand &MO = Pred->getOperand(i);
1637	if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1638	MO.setIsDead(false);
1639	break;
1640	}
1641	}
1642	}
1643	return true;
1644	}
1645
1646	/// Try to optimize a compare instruction. A compare instruction is an
1647	/// instruction which produces AArch64::NZCV. It can be truly compare
1648	/// instruction
1649	/// when there are no uses of its destination register.
1650	///
1651	/// The following steps are tried in order:
1652	/// 1. Convert CmpInstr into an unconditional version.
1653	/// 2. Remove CmpInstr if above there is an instruction producing a needed
1654	/// condition code or an instruction which can be converted into such an
1655	/// instruction.
1656	/// Only comparison with zero is supported.
1657	bool AArch64InstrInfo::optimizeCompareInstr(
1658	MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1659	int64_t CmpValue, const MachineRegisterInfo MRI) const* {
1660	assert(CmpInstr.getParent());
1661	assert(MRI);
1662
1663	// Replace SUBSWrr with SUBWrr if NZCV is not used.
1664	int DeadNZCVIdx =
1665	CmpInstr.findRegisterDefOperandIdx(Reg: AArch64::NZCV, /TRI=/nullptr, isDead: true);
1666	if (DeadNZCVIdx != -`1`) {
1667	if (CmpInstr.definesRegister(Reg: AArch64::WZR, /TRI=/nullptr) \|\|
1668	CmpInstr.definesRegister(Reg: AArch64::XZR, /TRI=/nullptr)) {
1669	CmpInstr.eraseFromParent();
1670	return true;
1671	}
1672	unsigned Opc = CmpInstr.getOpcode();
1673	unsigned NewOpc = convertToNonFlagSettingOpc(MI: CmpInstr);
1674	if (NewOpc == Opc)
1675	return false;
1676	const MCInstrDesc &MCID = get(Opcode: NewOpc);
1677	CmpInstr.setDesc(MCID);
1678	CmpInstr.removeOperand(OpNo: DeadNZCVIdx);
1679	bool succeeded = UpdateOperandRegClass(Instr&: CmpInstr);
1680	(void)succeeded;
1681	assert(succeeded && "Some operands reg class are incompatible!");
1682	return true;
1683	}
1684
1685	if (CmpInstr.getOpcode() == AArch64::PTEST_PP \|\|
1686	CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
1687	return optimizePTestInstr(PTest: &CmpInstr, MaskReg: SrcReg, PredReg: SrcReg2, MRI);
1688
1689	if (SrcReg2 != `0`)
1690	return false;
1691
1692	// CmpInstr is a Compare instruction if destination register is not used.
1693	if (!MRI->use_nodbg_empty(RegNo: CmpInstr.getOperand(i: `0`).getReg()))
1694	return false;
1695
1696	if (CmpValue == `0` && substituteCmpToZero(CmpInstr, SrcReg, MRI: *MRI))
1697	return true;
1698	return (CmpValue == `0` \|\| CmpValue == `1`) &&
1699	removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, MRI: *MRI);
1700	}
1701
1702	/// Get opcode of S version of Instr.
1703	/// If Instr is S version its opcode is returned.
1704	/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1705	/// or we are not interested in it.
1706	static unsigned sForm(MachineInstr &Instr) {
1707	switch (Instr.getOpcode()) {
1708	default:
1709	return AArch64::INSTRUCTION_LIST_END;
1710
1711	case AArch64::ADDSWrr:
1712	case AArch64::ADDSWri:
1713	case AArch64::ADDSXrr:
1714	case AArch64::ADDSXri:
1715	case AArch64::SUBSWrr:
1716	case AArch64::SUBSWri:
1717	case AArch64::SUBSXrr:
1718	case AArch64::SUBSXri:
1719	return Instr.getOpcode();
1720
1721	case AArch64::ADDWrr:
1722	return AArch64::ADDSWrr;
1723	case AArch64::ADDWri:
1724	return AArch64::ADDSWri;
1725	case AArch64::ADDXrr:
1726	return AArch64::ADDSXrr;
1727	case AArch64::ADDXri:
1728	return AArch64::ADDSXri;
1729	case AArch64::ADCWr:
1730	return AArch64::ADCSWr;
1731	case AArch64::ADCXr:
1732	return AArch64::ADCSXr;
1733	case AArch64::SUBWrr:
1734	return AArch64::SUBSWrr;
1735	case AArch64::SUBWri:
1736	return AArch64::SUBSWri;
1737	case AArch64::SUBXrr:
1738	return AArch64::SUBSXrr;
1739	case AArch64::SUBXri:
1740	return AArch64::SUBSXri;
1741	case AArch64::SBCWr:
1742	return AArch64::SBCSWr;
1743	case AArch64::SBCXr:
1744	return AArch64::SBCSXr;
1745	case AArch64::ANDWri:
1746	return AArch64::ANDSWri;
1747	case AArch64::ANDXri:
1748	return AArch64::ANDSXri;
1749	}
1750	}
1751
1752	/// Check if AArch64::NZCV should be alive in successors of MBB.
1753	static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB) {
1754	for (auto *BB : MBB->successors())
1755	if (BB->isLiveIn(Reg: AArch64::NZCV))
1756	return true;
1757	return false;
1758	}
1759
1760	/// \returns The condition code operand index for \p Instr if it is a branch
1761	/// or select and -1 otherwise.
1762	static int
1763	findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr) {
1764	switch (Instr.getOpcode()) {
1765	default:
1766	return -`1`;
1767
1768	case AArch64::Bcc: {
1769	int Idx = Instr.findRegisterUseOperandIdx(Reg: AArch64::NZCV, /TRI=/nullptr);
1770	assert(Idx >= `2`);
1771	return Idx - `2`;
1772	}
1773
1774	case AArch64::CSINVWr:
1775	case AArch64::CSINVXr:
1776	case AArch64::CSINCWr:
1777	case AArch64::CSINCXr:
1778	case AArch64::CSELWr:
1779	case AArch64::CSELXr:
1780	case AArch64::CSNEGWr:
1781	case AArch64::CSNEGXr:
1782	case AArch64::FCSELSrrr:
1783	case AArch64::FCSELDrrr: {
1784	int Idx = Instr.findRegisterUseOperandIdx(Reg: AArch64::NZCV, /TRI=/nullptr);
1785	assert(Idx >= `1`);
1786	return Idx - `1`;
1787	}
1788	}
1789	}
1790
1791	/// Find a condition code used by the instruction.
1792	/// Returns AArch64CC::Invalid if either the instruction does not use condition
1793	/// codes or we don't optimize CmpInstr in the presence of such instructions.
1794	static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1795	int CCIdx = findCondCodeUseOperandIdxForBranchOrSelect(Instr);
1796	return CCIdx >= `0` ? static_cast<AArch64CC::CondCode>(
1797	Instr.getOperand(i: CCIdx).getImm())
1798	: AArch64CC::Invalid;
1799	}
1800
1801	static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1802	assert(CC != AArch64CC::Invalid);
1803	UsedNZCV UsedFlags;
1804	switch (CC) {
1805	default:
1806	break;
1807
1808	case AArch64CC::EQ: // Z set
1809	case AArch64CC::NE: // Z clear
1810	UsedFlags.Z = true;
1811	break;
1812
1813	case AArch64CC::HI: // Z clear and C set
1814	case AArch64CC::LS: // Z set or C clear
1815	UsedFlags.Z = true;
1816	[[fallthrough]];
1817	case AArch64CC::HS: // C set
1818	case AArch64CC::LO: // C clear
1819	UsedFlags.C = true;
1820	break;
1821
1822	case AArch64CC::MI: // N set
1823	case AArch64CC::PL: // N clear
1824	UsedFlags.N = true;
1825	break;
1826
1827	case AArch64CC::VS: // V set
1828	case AArch64CC::VC: // V clear
1829	UsedFlags.V = true;
1830	break;
1831
1832	case AArch64CC::GT: // Z clear, N and V the same
1833	case AArch64CC::LE: // Z set, N and V differ
1834	UsedFlags.Z = true;
1835	[[fallthrough]];
1836	case AArch64CC::GE: // N and V the same
1837	case AArch64CC::LT: // N and V differ
1838	UsedFlags.N = true;
1839	UsedFlags.V = true;
1840	break;
1841	}
1842	return UsedFlags;
1843	}
1844
1845	/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1846	/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1847	/// \returns std::nullopt otherwise.
1848	///
1849	/// Collect instructions using that flags in \p CCUseInstrs if provided.
1850	std::optional<UsedNZCV>
1851	llvm::examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
1852	const TargetRegisterInfo &TRI,
1853	SmallVectorImpl<MachineInstr > CCUseInstrs) {
1854	MachineBasicBlock *CmpParent = CmpInstr.getParent();
1855	if (MI.getParent() != CmpParent)
1856	return std::nullopt;
1857
1858	if (areCFlagsAliveInSuccessors(MBB: CmpParent))
1859	return std::nullopt;
1860
1861	UsedNZCV NZCVUsedAfterCmp;
1862	for (MachineInstr &Instr : instructionsWithoutDebug(
1863	It: std::next(x: CmpInstr.getIterator()), End: CmpParent->instr_end())) {
1864	if (Instr.readsRegister(Reg: AArch64::NZCV, TRI: &TRI)) {
1865	AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1866	if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1867	return std::nullopt;
1868	NZCVUsedAfterCmp \|= getUsedNZCV(CC);
1869	if (CCUseInstrs)
1870	CCUseInstrs->push_back(Elt: &Instr);
1871	}
1872	if (Instr.modifiesRegister(Reg: AArch64::NZCV, TRI: &TRI))
1873	break;
1874	}
1875	return NZCVUsedAfterCmp;
1876	}
1877
1878	static bool isADDSRegImm(unsigned Opcode) {
1879	return Opcode == AArch64::ADDSWri \|\| Opcode == AArch64::ADDSXri;
1880	}
1881
1882	static bool isSUBSRegImm(unsigned Opcode) {
1883	return Opcode == AArch64::SUBSWri \|\| Opcode == AArch64::SUBSXri;
1884	}
1885
1886	/// Check if CmpInstr can be substituted by MI.
1887	///
1888	/// CmpInstr can be substituted:
1889	/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1890	/// - and, MI and CmpInstr are from the same MachineBB
1891	/// - and, condition flags are not alive in successors of the CmpInstr parent
1892	/// - and, if MI opcode is the S form there must be no defs of flags between
1893	/// MI and CmpInstr
1894	/// or if MI opcode is not the S form there must be neither defs of flags
1895	/// nor uses of flags between MI and CmpInstr.
1896	/// - and, if C/V flags are not used after CmpInstr
1897	/// or if N flag is used but MI produces poison value if signed overflow
1898	/// occurs.
1899	static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
1900	const TargetRegisterInfo &TRI) {
1901	// NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1902	// that may or may not set flags.
1903	assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1904
1905	const unsigned CmpOpcode = CmpInstr.getOpcode();
1906	if (!isADDSRegImm(Opcode: CmpOpcode) && !isSUBSRegImm(Opcode: CmpOpcode))
1907	return false;
1908
1909	assert((CmpInstr.getOperand(`2`).isImm() &&
1910	CmpInstr.getOperand(`2`).getImm() == `0`) &&
1911	"Caller guarantees that CmpInstr compares with constant 0");
1912
1913	std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1914	if (!NZVCUsed \|\| NZVCUsed ->C)
1915	return false;
1916
1917	// CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1918	// '%vreg = add ...' or '%vreg = sub ...'.
1919	// Condition flag V is used to indicate signed overflow.
1920	// 1) MI and CmpInstr set N and V to the same value.
1921	// 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1922	// signed overflow occurs, so CmpInstr could still be simplified away.
1923	if (NZVCUsed ->V && !MI.getFlag(Flag: MachineInstr::NoSWrap))
1924	return false;
1925
1926	AccessKind AccessToCheck = AK_Write;
1927	if (sForm(Instr&: MI) != MI.getOpcode())
1928	AccessToCheck = AK_All;
1929	return !areCFlagsAccessedBetweenInstrs(From: &MI, To: &CmpInstr, TRI: &TRI, AccessToCheck);
1930	}
1931
1932	/// Substitute an instruction comparing to zero with another instruction
1933	/// which produces needed condition flags.
1934	///
1935	/// Return true on success.
1936	bool AArch64InstrInfo::substituteCmpToZero(
1937	MachineInstr &CmpInstr, unsigned SrcReg,
1938	const MachineRegisterInfo &MRI) const {
1939	// Get the unique definition of SrcReg.
1940	MachineInstr *MI = MRI.getUniqueVRegDef(Reg: SrcReg);
1941	if (!MI)
1942	return false;
1943
1944	const TargetRegisterInfo &TRI = getRegisterInfo();
1945
1946	unsigned NewOpc = sForm(Instr&: *MI);
1947	if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1948	return false;
1949
1950	if (!canInstrSubstituteCmpInstr(MI&: *MI, CmpInstr, TRI))
1951	return false;
1952
1953	// Update the instruction to set NZCV.
1954	MI->setDesc(get(Opcode: NewOpc));
1955	CmpInstr.eraseFromParent();
1956	bool succeeded = UpdateOperandRegClass(Instr&: *MI);
1957	(void)succeeded;
1958	assert(succeeded && "Some operands reg class are incompatible!");
1959	MI->addRegisterDefined(Reg: AArch64::NZCV, RegInfo: &TRI);
1960	return true;
1961	}
1962
1963	/// \returns True if \p CmpInstr can be removed.
1964	///
1965	/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1966	/// codes used in \p CCUseInstrs must be inverted.
1967	static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr,
1968	int CmpValue, const TargetRegisterInfo &TRI,
1969	SmallVectorImpl<MachineInstr *> &CCUseInstrs,
1970	bool &IsInvertCC) {
1971	assert((CmpValue == `0` \|\| CmpValue == `1`) &&
1972	"Only comparisons to 0 or 1 considered for removal!");
1973
1974	// MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1975	unsigned MIOpc = MI.getOpcode();
1976	if (MIOpc == AArch64::CSINCWr) {
1977	if (MI.getOperand(i: `1`).getReg() != AArch64::WZR \|\|
1978	MI.getOperand(i: `2`).getReg() != AArch64::WZR)
1979	return false;
1980	} else if (MIOpc == AArch64::CSINCXr) {
1981	if (MI.getOperand(i: `1`).getReg() != AArch64::XZR \|\|
1982	MI.getOperand(i: `2`).getReg() != AArch64::XZR)
1983	return false;
1984	} else {
1985	return false;
1986	}
1987	AArch64CC::CondCode MICC = findCondCodeUsedByInstr(Instr: MI);
1988	if (MICC == AArch64CC::Invalid)
1989	return false;
1990
1991	// NZCV needs to be defined
1992	if (MI.findRegisterDefOperandIdx(Reg: AArch64::NZCV, /TRI=/nullptr, isDead: true) != -`1`)
1993	return false;
1994
1995	// CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
1996	const unsigned CmpOpcode = CmpInstr.getOpcode();
1997	bool IsSubsRegImm = isSUBSRegImm(Opcode: CmpOpcode);
1998	if (CmpValue && !IsSubsRegImm)
1999	return false;
2000	if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(Opcode: CmpOpcode))
2001	return false;
2002
2003	// MI conditions allowed: eq, ne, mi, pl
2004	UsedNZCV MIUsedNZCV = getUsedNZCV(CC: MICC);
2005	if (MIUsedNZCV.C \|\| MIUsedNZCV.V)
2006	return false;
2007
2008	std::optional<UsedNZCV> NZCVUsedAfterCmp =
2009	examineCFlagsUse(MI, CmpInstr, TRI, CCUseInstrs: &CCUseInstrs);
2010	// Condition flags are not used in CmpInstr basic block successors and only
2011	// Z or N flags allowed to be used after CmpInstr within its basic block
2012	if (!NZCVUsedAfterCmp \|\| NZCVUsedAfterCmp ->C \|\| NZCVUsedAfterCmp ->V)
2013	return false;
2014	// Z or N flag used after CmpInstr must correspond to the flag used in MI
2015	if ((MIUsedNZCV.Z && NZCVUsedAfterCmp ->N) \|\|
2016	(MIUsedNZCV.N && NZCVUsedAfterCmp ->Z))
2017	return false;
2018	// If CmpInstr is comparison to zero MI conditions are limited to eq, ne
2019	if (MIUsedNZCV.N && !CmpValue)
2020	return false;
2021
2022	// There must be no defs of flags between MI and CmpInstr
2023	if (areCFlagsAccessedBetweenInstrs(From: &MI, To: &CmpInstr, TRI: &TRI, AccessToCheck: AK_Write))
2024	return false;
2025
2026	// Condition code is inverted in the following cases:
2027	// 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2028	// 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
2029	IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ \|\| MICC == AArch64CC::PL)) \|\|
2030	(!CmpValue && MICC == AArch64CC::NE);
2031	return true;
2032	}
2033
2034	/// Remove comparison in csinc-cmp sequence
2035	///
2036	/// Examples:
2037	/// 1. \code
2038	/// csinc w9, wzr, wzr, ne
2039	/// cmp w9, #0
2040	/// b.eq
2041	/// \endcode
2042	/// to
2043	/// \code
2044	/// csinc w9, wzr, wzr, ne
2045	/// b.ne
2046	/// \endcode
2047	///
2048	/// 2. \code
2049	/// csinc x2, xzr, xzr, mi
2050	/// cmp x2, #1
2051	/// b.pl
2052	/// \endcode
2053	/// to
2054	/// \code
2055	/// csinc x2, xzr, xzr, mi
2056	/// b.pl
2057	/// \endcode
2058	///
2059	/// \param CmpInstr comparison instruction
2060	/// \return True when comparison removed
2061	bool AArch64InstrInfo::removeCmpToZeroOrOne(
2062	MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
2063	const MachineRegisterInfo &MRI) const {
2064	MachineInstr *MI = MRI.getUniqueVRegDef(Reg: SrcReg);
2065	if (!MI)
2066	return false;
2067	const TargetRegisterInfo &TRI = getRegisterInfo();
2068	SmallVector<MachineInstr *, `4`> CCUseInstrs;
2069	bool IsInvertCC = false;
2070	if (!canCmpInstrBeRemoved(MI&: *MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
2071	IsInvertCC))
2072	return false;
2073	// Make transformation
2074	CmpInstr.eraseFromParent();
2075	if (IsInvertCC) {
2076	// Invert condition codes in CmpInstr CC users
2077	for (MachineInstr *CCUseInstr : CCUseInstrs) {
2078	int Idx = findCondCodeUseOperandIdxForBranchOrSelect(Instr: *CCUseInstr);
2079	assert(Idx >= `0` && "Unexpected instruction using CC.");
2080	MachineOperand &CCOperand = CCUseInstr->getOperand(i: Idx);
2081	AArch64CC::CondCode CCUse = AArch64CC::getInvertedCondCode(
2082	Code: static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
2083	CCOperand.setImm(CCUse);
2084	}
2085	}
2086	return true;
2087	}
2088
2089	bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
2090	if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
2091	MI.getOpcode() != AArch64::CATCHRET)
2092	return false;
2093
2094	MachineBasicBlock &MBB = *MI.getParent();
2095	auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
2096	auto TRI = Subtarget.getRegisterInfo();
2097	DebugLoc DL = MI.getDebugLoc();
2098
2099	if (MI.getOpcode() == AArch64::CATCHRET) {
2100	// Skip to the first instruction before the epilog.
2101	const TargetInstrInfo *TII =
2102	MBB.getParent()->getSubtarget().getInstrInfo();
2103	MachineBasicBlock *TargetMBB = MI.getOperand(i: `0`).getMBB();
2104	auto MBBI = MachineBasicBlock::iterator (MI);
2105	MachineBasicBlock::iterator FirstEpilogSEH = std::prev(x: MBBI);
2106	while (FirstEpilogSEH ->getFlag(Flag: MachineInstr::FrameDestroy) &&
2107	FirstEpilogSEH != MBB.begin())
2108	FirstEpilogSEH = std::prev(x: FirstEpilogSEH);
2109	if (FirstEpilogSEH != MBB.begin())
2110	FirstEpilogSEH = std::next(x: FirstEpilogSEH);
2111	BuildMI(BB&: MBB, I: FirstEpilogSEH, MIMD: DL, MCID: TII->get(Opcode: AArch64::ADRP))
2112	.addReg(RegNo: AArch64::X0, flags: RegState::Define)
2113	.addMBB(MBB: TargetMBB);
2114	BuildMI(BB&: MBB, I: FirstEpilogSEH, MIMD: DL, MCID: TII->get(Opcode: AArch64::ADDXri))
2115	.addReg(RegNo: AArch64::X0, flags: RegState::Define)
2116	.addReg(RegNo: AArch64::X0)
2117	.addMBB(MBB: TargetMBB)
2118	.addImm(Val: `0`);
2119	TargetMBB->setMachineBlockAddressTaken();
2120	return true;
2121	}
2122
2123	Register Reg = MI.getOperand(i: `0`).getReg();
2124	Module &M = *MBB.getParent()->getFunction().getParent();
2125	if (M.getStackProtectorGuard() == "sysreg") {
2126	const AArch64SysReg::SysReg *SrcReg =
2127	AArch64SysReg::lookupSysRegByName(Name: M.getStackProtectorGuardReg());
2128	if (!SrcReg)
2129	report_fatal_error(reason: "Unknown SysReg for Stack Protector Guard Register");
2130
2131	// mrs xN, sysreg
2132	BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::MRS))
2133	.addDef(RegNo: Reg, Flags: RegState::Renamable)
2134	.addImm(Val: SrcReg->Encoding);
2135	int Offset = M.getStackProtectorGuardOffset();
2136	if (Offset >= `0` && Offset <= `32760` && Offset % `8` == `0`) {
2137	// ldr xN, [xN, #offset]
2138	BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::LDRXui))
2139	.addDef(RegNo: Reg)
2140	.addUse(RegNo: Reg, Flags: RegState::Kill)
2141	.addImm(Val: Offset / `8`);
2142	} else if (Offset >= -`256` && Offset <= `255`) {
2143	// ldur xN, [xN, #offset]
2144	BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::LDURXi))
2145	.addDef(RegNo: Reg)
2146	.addUse(RegNo: Reg, Flags: RegState::Kill)
2147	.addImm(Val: Offset);
2148	} else if (Offset >= -`4095` && Offset <= `4095`) {
2149	if (Offset > `0`) {
2150	// add xN, xN, #offset
2151	BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::ADDXri))
2152	.addDef(RegNo: Reg)
2153	.addUse(RegNo: Reg, Flags: RegState::Kill)
2154	.addImm(Val: Offset)
2155	.addImm(Val: `0`);
2156	} else {
2157	// sub xN, xN, #offset
2158	BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::SUBXri))
2159	.addDef(RegNo: Reg)
2160	.addUse(RegNo: Reg, Flags: RegState::Kill)
2161	.addImm(Val: -Offset)
2162	.addImm(Val: `0`);
2163	}
2164	// ldr xN, [xN]
2165	BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::LDRXui))
2166	.addDef(RegNo: Reg)
2167	.addUse(RegNo: Reg, Flags: RegState::Kill)
2168	.addImm(Val: `0`);
2169	} else {
2170	// Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2171	// than 23760.
2172	// It might be nice to use AArch64::MOVi32imm here, which would get
2173	// expanded in PreSched2 after PostRA, but our lone scratch Reg already
2174	// contains the MRS result. findScratchNonCalleeSaveRegister() in
2175	// AArch64FrameLowering might help us find such a scratch register
2176	// though. If we failed to find a scratch register, we could emit a
2177	// stream of add instructions to build up the immediate. Or, we could try
2178	// to insert a AArch64::MOVi32imm before register allocation so that we
2179	// didn't need to scavenge for a scratch register.
2180	report_fatal_error(reason: "Unable to encode Stack Protector Guard Offset");
2181	}
2182	MBB.erase(I: MI);
2183	return true;
2184	}
2185
2186	const GlobalValue *GV =
2187	cast<GlobalValue>(Val: (*MI.memoperands_begin())->getValue());
2188	const TargetMachine &TM = MBB.getParent()->getTarget();
2189	unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2190	const unsigned char MO_NC = AArch64II::MO_NC;
2191
2192	if ((OpFlags & AArch64II::MO_GOT) != `0`) {
2193	BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::LOADgot), DestReg: Reg)
2194	.addGlobalAddress(GV, Offset: `0`, TargetFlags: OpFlags);
2195	if (Subtarget.isTargetILP32()) {
2196	unsigned Reg32 = TRI->getSubReg(Reg, Idx: AArch64::sub_32);
2197	BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::LDRWui))
2198	.addDef(RegNo: Reg32, Flags: RegState::Dead)
2199	.addUse(RegNo: Reg, Flags: RegState::Kill)
2200	.addImm(Val: `0`)
2201	.addMemOperand(MMO: *MI.memoperands_begin())
2202	.addDef(RegNo: Reg, Flags: RegState::Implicit);
2203	} else {
2204	BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::LDRXui), DestReg: Reg)
2205	.addReg(RegNo: Reg, flags: RegState::Kill)
2206	.addImm(Val: `0`)
2207	.addMemOperand(MMO: *MI.memoperands_begin());
2208	}
2209	} else if (TM.getCodeModel() == CodeModel::Large) {
2210	assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2211	BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::MOVZXi), DestReg: Reg)
2212	.addGlobalAddress(GV, Offset: `0`, TargetFlags: AArch64II::MO_G0 \| MO_NC)
2213	.addImm(Val: `0`);
2214	BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::MOVKXi), DestReg: Reg)
2215	.addReg(RegNo: Reg, flags: RegState::Kill)
2216	.addGlobalAddress(GV, Offset: `0`, TargetFlags: AArch64II::MO_G1 \| MO_NC)
2217	.addImm(Val: `16`);
2218	BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::MOVKXi), DestReg: Reg)
2219	.addReg(RegNo: Reg, flags: RegState::Kill)
2220	.addGlobalAddress(GV, Offset: `0`, TargetFlags: AArch64II::MO_G2 \| MO_NC)
2221	.addImm(Val: `32`);
2222	BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::MOVKXi), DestReg: Reg)
2223	.addReg(RegNo: Reg, flags: RegState::Kill)
2224	.addGlobalAddress(GV, Offset: `0`, TargetFlags: AArch64II::MO_G3)
2225	.addImm(Val: `48`);
2226	BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::LDRXui), DestReg: Reg)
2227	.addReg(RegNo: Reg, flags: RegState::Kill)
2228	.addImm(Val: `0`)
2229	.addMemOperand(MMO: *MI.memoperands_begin());
2230	} else if (TM.getCodeModel() == CodeModel::Tiny) {
2231	BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::ADR), DestReg: Reg)
2232	.addGlobalAddress(GV, Offset: `0`, TargetFlags: OpFlags);
2233	} else {
2234	BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::ADRP), DestReg: Reg)
2235	.addGlobalAddress(GV, Offset: `0`, TargetFlags: OpFlags \| AArch64II::MO_PAGE);
2236	unsigned char LoFlags = OpFlags \| AArch64II::MO_PAGEOFF \| MO_NC;
2237	if (Subtarget.isTargetILP32()) {
2238	unsigned Reg32 = TRI->getSubReg(Reg, Idx: AArch64::sub_32);
2239	BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::LDRWui))
2240	.addDef(RegNo: Reg32, Flags: RegState::Dead)
2241	.addUse(RegNo: Reg, Flags: RegState::Kill)
2242	.addGlobalAddress(GV, Offset: `0`, TargetFlags: LoFlags)
2243	.addMemOperand(MMO: *MI.memoperands_begin())
2244	.addDef(RegNo: Reg, Flags: RegState::Implicit);
2245	} else {
2246	BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::LDRXui), DestReg: Reg)
2247	.addReg(RegNo: Reg, flags: RegState::Kill)
2248	.addGlobalAddress(GV, Offset: `0`, TargetFlags: LoFlags)
2249	.addMemOperand(MMO: *MI.memoperands_begin());
2250	}
2251	}
2252
2253	MBB.erase(I: MI);
2254
2255	return true;
2256	}
2257
2258	// Return true if this instruction simply sets its single destination register
2259	// to zero. This is equivalent to a register rename of the zero-register.
2260	bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
2261	switch (MI.getOpcode()) {
2262	default:
2263	break;
2264	case AArch64::MOVZWi:
2265	case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2266	if (MI.getOperand(i: `1`).isImm() && MI.getOperand(i: `1`).getImm() == `0`) {
2267	assert(MI.getDesc().getNumOperands() == `3` &&
2268	MI.getOperand(`2`).getImm() == `0` && "invalid MOVZi operands");
2269	return true;
2270	}
2271	break;
2272	case AArch64::ANDWri: // and Rd, Rzr, #imm
2273	return MI.getOperand(i: `1`).getReg() == AArch64::WZR;
2274	case AArch64::ANDXri:
2275	return MI.getOperand(i: `1`).getReg() == AArch64::XZR;
2276	case TargetOpcode::COPY:
2277	return MI.getOperand(i: `1`).getReg() == AArch64::WZR;
2278	}
2279	return false;
2280	}
2281
2282	// Return true if this instruction simply renames a general register without
2283	// modifying bits.
2284	bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
2285	switch (MI.getOpcode()) {
2286	default:
2287	break;
2288	case TargetOpcode::COPY: {
2289	// GPR32 copies will by lowered to ORRXrs
2290	Register DstReg = MI.getOperand(i: `0`).getReg();
2291	return (AArch64::GPR32RegClass.contains(Reg: DstReg) \|\|
2292	AArch64::GPR64RegClass.contains(Reg: DstReg));
2293	}
2294	case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2295	if (MI.getOperand(i: `1`).getReg() == AArch64::XZR) {
2296	assert(MI.getDesc().getNumOperands() == `4` &&
2297	MI.getOperand(`3`).getImm() == `0` && "invalid ORRrs operands");
2298	return true;
2299	}
2300	break;
2301	case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2302	if (MI.getOperand(i: `2`).getImm() == `0`) {
2303	assert(MI.getDesc().getNumOperands() == `4` &&
2304	MI.getOperand(`3`).getImm() == `0` && "invalid ADDXri operands");
2305	return true;
2306	}
2307	break;
2308	}
2309	return false;
2310	}
2311
2312	// Return true if this instruction simply renames a general register without
2313	// modifying bits.
2314	bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
2315	switch (MI.getOpcode()) {
2316	default:
2317	break;
2318	case TargetOpcode::COPY: {
2319	Register DstReg = MI.getOperand(i: `0`).getReg();
2320	return AArch64::FPR128RegClass.contains(Reg: DstReg);
2321	}
2322	case AArch64::ORRv16i8:
2323	if (MI.getOperand(i: `1`).getReg() == MI.getOperand(i: `2`).getReg()) {
2324	assert(MI.getDesc().getNumOperands() == `3` && MI.getOperand(`0`).isReg() &&
2325	"invalid ORRv16i8 operands");
2326	return true;
2327	}
2328	break;
2329	}
2330	return false;
2331	}
2332
2333	Register AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
2334	int &FrameIndex) const {
2335	switch (MI.getOpcode()) {
2336	default:
2337	break;
2338	case AArch64::LDRWui:
2339	case AArch64::LDRXui:
2340	case AArch64::LDRBui:
2341	case AArch64::LDRHui:
2342	case AArch64::LDRSui:
2343	case AArch64::LDRDui:
2344	case AArch64::LDRQui:
2345	case AArch64::LDR_PXI:
2346	if (MI.getOperand(i: `0`).getSubReg() == `0` && MI.getOperand(i: `1`).isFI() &&
2347	MI.getOperand(i: `2`).isImm() && MI.getOperand(i: `2`).getImm() == `0`) {
2348	FrameIndex = MI.getOperand(i: `1`).getIndex();
2349	return MI.getOperand(i: `0`).getReg();
2350	}
2351	break;
2352	}
2353
2354	return `0`;
2355	}
2356
2357	Register AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
2358	int &FrameIndex) const {
2359	switch (MI.getOpcode()) {
2360	default:
2361	break;
2362	case AArch64::STRWui:
2363	case AArch64::STRXui:
2364	case AArch64::STRBui:
2365	case AArch64::STRHui:
2366	case AArch64::STRSui:
2367	case AArch64::STRDui:
2368	case AArch64::STRQui:
2369	case AArch64::STR_PXI:
2370	if (MI.getOperand(i: `0`).getSubReg() == `0` && MI.getOperand(i: `1`).isFI() &&
2371	MI.getOperand(i: `2`).isImm() && MI.getOperand(i: `2`).getImm() == `0`) {
2372	FrameIndex = MI.getOperand(i: `1`).getIndex();
2373	return MI.getOperand(i: `0`).getReg();
2374	}
2375	break;
2376	}
2377	return `0`;
2378	}
2379
2380	/// Check all MachineMemOperands for a hint to suppress pairing.
2381	bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
2382	return llvm::any_of(Range: MI.memoperands(), P: [](MachineMemOperand *MMO) {
2383	return MMO->getFlags() & MOSuppressPair;
2384	});
2385	}
2386
2387	/// Set a flag on the first MachineMemOperand to suppress pairing.
2388	void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
2389	if (MI.memoperands_empty())
2390	return;
2391	(*MI.memoperands_begin())->setFlags(MOSuppressPair);
2392	}
2393
2394	/// Check all MachineMemOperands for a hint that the load/store is strided.
2395	bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
2396	return llvm::any_of(Range: MI.memoperands(), P: [](MachineMemOperand *MMO) {
2397	return MMO->getFlags() & MOStridedAccess;
2398	});
2399	}
2400
2401	bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) {
2402	switch (Opc) {
2403	default:
2404	return false;
2405	case AArch64::STURSi:
2406	case AArch64::STRSpre:
2407	case AArch64::STURDi:
2408	case AArch64::STRDpre:
2409	case AArch64::STURQi:
2410	case AArch64::STRQpre:
2411	case AArch64::STURBBi:
2412	case AArch64::STURHHi:
2413	case AArch64::STURWi:
2414	case AArch64::STRWpre:
2415	case AArch64::STURXi:
2416	case AArch64::STRXpre:
2417	case AArch64::LDURSi:
2418	case AArch64::LDRSpre:
2419	case AArch64::LDURDi:
2420	case AArch64::LDRDpre:
2421	case AArch64::LDURQi:
2422	case AArch64::LDRQpre:
2423	case AArch64::LDURWi:
2424	case AArch64::LDRWpre:
2425	case AArch64::LDURXi:
2426	case AArch64::LDRXpre:
2427	case AArch64::LDRSWpre:
2428	case AArch64::LDURSWi:
2429	case AArch64::LDURHHi:
2430	case AArch64::LDURBBi:
2431	case AArch64::LDURSBWi:
2432	case AArch64::LDURSHWi:
2433	return true;
2434	}
2435	}
2436
2437	std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2438	switch (Opc) {
2439	default: return {};
2440	case AArch64::PRFMui: return AArch64::PRFUMi;
2441	case AArch64::LDRXui: return AArch64::LDURXi;
2442	case AArch64::LDRWui: return AArch64::LDURWi;
2443	case AArch64::LDRBui: return AArch64::LDURBi;
2444	case AArch64::LDRHui: return AArch64::LDURHi;
2445	case AArch64::LDRSui: return AArch64::LDURSi;
2446	case AArch64::LDRDui: return AArch64::LDURDi;
2447	case AArch64::LDRQui: return AArch64::LDURQi;
2448	case AArch64::LDRBBui: return AArch64::LDURBBi;
2449	case AArch64::LDRHHui: return AArch64::LDURHHi;
2450	case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2451	case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2452	case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2453	case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2454	case AArch64::LDRSWui: return AArch64::LDURSWi;
2455	case AArch64::STRXui: return AArch64::STURXi;
2456	case AArch64::STRWui: return AArch64::STURWi;
2457	case AArch64::STRBui: return AArch64::STURBi;
2458	case AArch64::STRHui: return AArch64::STURHi;
2459	case AArch64::STRSui: return AArch64::STURSi;
2460	case AArch64::STRDui: return AArch64::STURDi;
2461	case AArch64::STRQui: return AArch64::STURQi;
2462	case AArch64::STRBBui: return AArch64::STURBBi;
2463	case AArch64::STRHHui: return AArch64::STURHHi;
2464	}
2465	}
2466
2467	unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
2468	switch (Opc) {
2469	default:
2470	llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2471	case AArch64::ADDG:
2472	case AArch64::LDAPURBi:
2473	case AArch64::LDAPURHi:
2474	case AArch64::LDAPURi:
2475	case AArch64::LDAPURSBWi:
2476	case AArch64::LDAPURSBXi:
2477	case AArch64::LDAPURSHWi:
2478	case AArch64::LDAPURSHXi:
2479	case AArch64::LDAPURSWi:
2480	case AArch64::LDAPURXi:
2481	case AArch64::LDR_PPXI:
2482	case AArch64::LDR_PXI:
2483	case AArch64::LDR_ZXI:
2484	case AArch64::LDR_ZZXI:
2485	case AArch64::LDR_ZZZXI:
2486	case AArch64::LDR_ZZZZXI:
2487	case AArch64::LDRBBui:
2488	case AArch64::LDRBui:
2489	case AArch64::LDRDui:
2490	case AArch64::LDRHHui:
2491	case AArch64::LDRHui:
2492	case AArch64::LDRQui:
2493	case AArch64::LDRSBWui:
2494	case AArch64::LDRSBXui:
2495	case AArch64::LDRSHWui:
2496	case AArch64::LDRSHXui:
2497	case AArch64::LDRSui:
2498	case AArch64::LDRSWui:
2499	case AArch64::LDRWui:
2500	case AArch64::LDRXui:
2501	case AArch64::LDURBBi:
2502	case AArch64::LDURBi:
2503	case AArch64::LDURDi:
2504	case AArch64::LDURHHi:
2505	case AArch64::LDURHi:
2506	case AArch64::LDURQi:
2507	case AArch64::LDURSBWi:
2508	case AArch64::LDURSBXi:
2509	case AArch64::LDURSHWi:
2510	case AArch64::LDURSHXi:
2511	case AArch64::LDURSi:
2512	case AArch64::LDURSWi:
2513	case AArch64::LDURWi:
2514	case AArch64::LDURXi:
2515	case AArch64::PRFMui:
2516	case AArch64::PRFUMi:
2517	case AArch64::ST2Gi:
2518	case AArch64::STGi:
2519	case AArch64::STLURBi:
2520	case AArch64::STLURHi:
2521	case AArch64::STLURWi:
2522	case AArch64::STLURXi:
2523	case AArch64::StoreSwiftAsyncContext:
2524	case AArch64::STR_PPXI:
2525	case AArch64::STR_PXI:
2526	case AArch64::STR_ZXI:
2527	case AArch64::STR_ZZXI:
2528	case AArch64::STR_ZZZXI:
2529	case AArch64::STR_ZZZZXI:
2530	case AArch64::STRBBui:
2531	case AArch64::STRBui:
2532	case AArch64::STRDui:
2533	case AArch64::STRHHui:
2534	case AArch64::STRHui:
2535	case AArch64::STRQui:
2536	case AArch64::STRSui:
2537	case AArch64::STRWui:
2538	case AArch64::STRXui:
2539	case AArch64::STURBBi:
2540	case AArch64::STURBi:
2541	case AArch64::STURDi:
2542	case AArch64::STURHHi:
2543	case AArch64::STURHi:
2544	case AArch64::STURQi:
2545	case AArch64::STURSi:
2546	case AArch64::STURWi:
2547	case AArch64::STURXi:
2548	case AArch64::STZ2Gi:
2549	case AArch64::STZGi:
2550	case AArch64::TAGPstack:
2551	case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
2552	case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
2553	return `2`;
2554	case AArch64::LD1B_D_IMM:
2555	case AArch64::LD1B_H_IMM:
2556	case AArch64::LD1B_IMM:
2557	case AArch64::LD1B_S_IMM:
2558	case AArch64::LD1D_IMM:
2559	case AArch64::LD1H_D_IMM:
2560	case AArch64::LD1H_IMM:
2561	case AArch64::LD1H_S_IMM:
2562	case AArch64::LD1RB_D_IMM:
2563	case AArch64::LD1RB_H_IMM:
2564	case AArch64::LD1RB_IMM:
2565	case AArch64::LD1RB_S_IMM:
2566	case AArch64::LD1RD_IMM:
2567	case AArch64::LD1RH_D_IMM:
2568	case AArch64::LD1RH_IMM:
2569	case AArch64::LD1RH_S_IMM:
2570	case AArch64::LD1RSB_D_IMM:
2571	case AArch64::LD1RSB_H_IMM:
2572	case AArch64::LD1RSB_S_IMM:
2573	case AArch64::LD1RSH_D_IMM:
2574	case AArch64::LD1RSH_S_IMM:
2575	case AArch64::LD1RSW_IMM:
2576	case AArch64::LD1RW_D_IMM:
2577	case AArch64::LD1RW_IMM:
2578	case AArch64::LD1SB_D_IMM:
2579	case AArch64::LD1SB_H_IMM:
2580	case AArch64::LD1SB_S_IMM:
2581	case AArch64::LD1SH_D_IMM:
2582	case AArch64::LD1SH_S_IMM:
2583	case AArch64::LD1SW_D_IMM:
2584	case AArch64::LD1W_D_IMM:
2585	case AArch64::LD1W_IMM:
2586	case AArch64::LD2B_IMM:
2587	case AArch64::LD2D_IMM:
2588	case AArch64::LD2H_IMM:
2589	case AArch64::LD2W_IMM:
2590	case AArch64::LD3B_IMM:
2591	case AArch64::LD3D_IMM:
2592	case AArch64::LD3H_IMM:
2593	case AArch64::LD3W_IMM:
2594	case AArch64::LD4B_IMM:
2595	case AArch64::LD4D_IMM:
2596	case AArch64::LD4H_IMM:
2597	case AArch64::LD4W_IMM:
2598	case AArch64::LDG:
2599	case AArch64::LDNF1B_D_IMM:
2600	case AArch64::LDNF1B_H_IMM:
2601	case AArch64::LDNF1B_IMM:
2602	case AArch64::LDNF1B_S_IMM:
2603	case AArch64::LDNF1D_IMM:
2604	case AArch64::LDNF1H_D_IMM:
2605	case AArch64::LDNF1H_IMM:
2606	case AArch64::LDNF1H_S_IMM:
2607	case AArch64::LDNF1SB_D_IMM:
2608	case AArch64::LDNF1SB_H_IMM:
2609	case AArch64::LDNF1SB_S_IMM:
2610	case AArch64::LDNF1SH_D_IMM:
2611	case AArch64::LDNF1SH_S_IMM:
2612	case AArch64::LDNF1SW_D_IMM:
2613	case AArch64::LDNF1W_D_IMM:
2614	case AArch64::LDNF1W_IMM:
2615	case AArch64::LDNPDi:
2616	case AArch64::LDNPQi:
2617	case AArch64::LDNPSi:
2618	case AArch64::LDNPWi:
2619	case AArch64::LDNPXi:
2620	case AArch64::LDNT1B_ZRI:
2621	case AArch64::LDNT1D_ZRI:
2622	case AArch64::LDNT1H_ZRI:
2623	case AArch64::LDNT1W_ZRI:
2624	case AArch64::LDPDi:
2625	case AArch64::LDPQi:
2626	case AArch64::LDPSi:
2627	case AArch64::LDPWi:
2628	case AArch64::LDPXi:
2629	case AArch64::LDRBBpost:
2630	case AArch64::LDRBBpre:
2631	case AArch64::LDRBpost:
2632	case AArch64::LDRBpre:
2633	case AArch64::LDRDpost:
2634	case AArch64::LDRDpre:
2635	case AArch64::LDRHHpost:
2636	case AArch64::LDRHHpre:
2637	case AArch64::LDRHpost:
2638	case AArch64::LDRHpre:
2639	case AArch64::LDRQpost:
2640	case AArch64::LDRQpre:
2641	case AArch64::LDRSpost:
2642	case AArch64::LDRSpre:
2643	case AArch64::LDRWpost:
2644	case AArch64::LDRWpre:
2645	case AArch64::LDRXpost:
2646	case AArch64::LDRXpre:
2647	case AArch64::ST1B_D_IMM:
2648	case AArch64::ST1B_H_IMM:
2649	case AArch64::ST1B_IMM:
2650	case AArch64::ST1B_S_IMM:
2651	case AArch64::ST1D_IMM:
2652	case AArch64::ST1H_D_IMM:
2653	case AArch64::ST1H_IMM:
2654	case AArch64::ST1H_S_IMM:
2655	case AArch64::ST1W_D_IMM:
2656	case AArch64::ST1W_IMM:
2657	case AArch64::ST2B_IMM:
2658	case AArch64::ST2D_IMM:
2659	case AArch64::ST2H_IMM:
2660	case AArch64::ST2W_IMM:
2661	case AArch64::ST3B_IMM:
2662	case AArch64::ST3D_IMM:
2663	case AArch64::ST3H_IMM:
2664	case AArch64::ST3W_IMM:
2665	case AArch64::ST4B_IMM:
2666	case AArch64::ST4D_IMM:
2667	case AArch64::ST4H_IMM:
2668	case AArch64::ST4W_IMM:
2669	case AArch64::STGPi:
2670	case AArch64::STGPreIndex:
2671	case AArch64::STZGPreIndex:
2672	case AArch64::ST2GPreIndex:
2673	case AArch64::STZ2GPreIndex:
2674	case AArch64::STGPostIndex:
2675	case AArch64::STZGPostIndex:
2676	case AArch64::ST2GPostIndex:
2677	case AArch64::STZ2GPostIndex:
2678	case AArch64::STNPDi:
2679	case AArch64::STNPQi:
2680	case AArch64::STNPSi:
2681	case AArch64::STNPWi:
2682	case AArch64::STNPXi:
2683	case AArch64::STNT1B_ZRI:
2684	case AArch64::STNT1D_ZRI:
2685	case AArch64::STNT1H_ZRI:
2686	case AArch64::STNT1W_ZRI:
2687	case AArch64::STPDi:
2688	case AArch64::STPQi:
2689	case AArch64::STPSi:
2690	case AArch64::STPWi:
2691	case AArch64::STPXi:
2692	case AArch64::STRBBpost:
2693	case AArch64::STRBBpre:
2694	case AArch64::STRBpost:
2695	case AArch64::STRBpre:
2696	case AArch64::STRDpost:
2697	case AArch64::STRDpre:
2698	case AArch64::STRHHpost:
2699	case AArch64::STRHHpre:
2700	case AArch64::STRHpost:
2701	case AArch64::STRHpre:
2702	case AArch64::STRQpost:
2703	case AArch64::STRQpre:
2704	case AArch64::STRSpost:
2705	case AArch64::STRSpre:
2706	case AArch64::STRWpost:
2707	case AArch64::STRWpre:
2708	case AArch64::STRXpost:
2709	case AArch64::STRXpre:
2710	return `3`;
2711	case AArch64::LDPDpost:
2712	case AArch64::LDPDpre:
2713	case AArch64::LDPQpost:
2714	case AArch64::LDPQpre:
2715	case AArch64::LDPSpost:
2716	case AArch64::LDPSpre:
2717	case AArch64::LDPWpost:
2718	case AArch64::LDPWpre:
2719	case AArch64::LDPXpost:
2720	case AArch64::LDPXpre:
2721	case AArch64::STGPpre:
2722	case AArch64::STGPpost:
2723	case AArch64::STPDpost:
2724	case AArch64::STPDpre:
2725	case AArch64::STPQpost:
2726	case AArch64::STPQpre:
2727	case AArch64::STPSpost:
2728	case AArch64::STPSpre:
2729	case AArch64::STPWpost:
2730	case AArch64::STPWpre:
2731	case AArch64::STPXpost:
2732	case AArch64::STPXpre:
2733	return `4`;
2734	}
2735	}
2736
2737	bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
2738	switch (MI.getOpcode()) {
2739	default:
2740	return false;
2741	// Scaled instructions.
2742	case AArch64::STRSui:
2743	case AArch64::STRDui:
2744	case AArch64::STRQui:
2745	case AArch64::STRXui:
2746	case AArch64::STRWui:
2747	case AArch64::LDRSui:
2748	case AArch64::LDRDui:
2749	case AArch64::LDRQui:
2750	case AArch64::LDRXui:
2751	case AArch64::LDRWui:
2752	case AArch64::LDRSWui:
2753	// Unscaled instructions.
2754	case AArch64::STURSi:
2755	case AArch64::STRSpre:
2756	case AArch64::STURDi:
2757	case AArch64::STRDpre:
2758	case AArch64::STURQi:
2759	case AArch64::STRQpre:
2760	case AArch64::STURWi:
2761	case AArch64::STRWpre:
2762	case AArch64::STURXi:
2763	case AArch64::STRXpre:
2764	case AArch64::LDURSi:
2765	case AArch64::LDRSpre:
2766	case AArch64::LDURDi:
2767	case AArch64::LDRDpre:
2768	case AArch64::LDURQi:
2769	case AArch64::LDRQpre:
2770	case AArch64::LDURWi:
2771	case AArch64::LDRWpre:
2772	case AArch64::LDURXi:
2773	case AArch64::LDRXpre:
2774	case AArch64::LDURSWi:
2775	case AArch64::LDRSWpre:
2776	// SVE instructions.
2777	case AArch64::LDR_ZXI:
2778	case AArch64::STR_ZXI:
2779	return true;
2780	}
2781	}
2782
2783	bool AArch64InstrInfo::isTailCallReturnInst(const MachineInstr &MI) {
2784	switch (MI.getOpcode()) {
2785	default:
2786	assert((!MI.isCall() \|\| !MI.isReturn()) &&
2787	"Unexpected instruction - was a new tail call opcode introduced?");
2788	return false;
2789	case AArch64::TCRETURNdi:
2790	case AArch64::TCRETURNri:
2791	case AArch64::TCRETURNrix16x17:
2792	case AArch64::TCRETURNrix17:
2793	case AArch64::TCRETURNrinotx16:
2794	case AArch64::TCRETURNriALL:
2795	case AArch64::AUTH_TCRETURN:
2796	case AArch64::AUTH_TCRETURN_BTI:
2797	return true;
2798	}
2799	}
2800
2801	unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc) {
2802	switch (Opc) {
2803	default:
2804	llvm_unreachable("Opcode has no flag setting equivalent!");
2805	// 32-bit cases:
2806	case AArch64::ADDWri:
2807	return AArch64::ADDSWri;
2808	case AArch64::ADDWrr:
2809	return AArch64::ADDSWrr;
2810	case AArch64::ADDWrs:
2811	return AArch64::ADDSWrs;
2812	case AArch64::ADDWrx:
2813	return AArch64::ADDSWrx;
2814	case AArch64::ANDWri:
2815	return AArch64::ANDSWri;
2816	case AArch64::ANDWrr:
2817	return AArch64::ANDSWrr;
2818	case AArch64::ANDWrs:
2819	return AArch64::ANDSWrs;
2820	case AArch64::BICWrr:
2821	return AArch64::BICSWrr;
2822	case AArch64::BICWrs:
2823	return AArch64::BICSWrs;
2824	case AArch64::SUBWri:
2825	return AArch64::SUBSWri;
2826	case AArch64::SUBWrr:
2827	return AArch64::SUBSWrr;
2828	case AArch64::SUBWrs:
2829	return AArch64::SUBSWrs;
2830	case AArch64::SUBWrx:
2831	return AArch64::SUBSWrx;
2832	// 64-bit cases:
2833	case AArch64::ADDXri:
2834	return AArch64::ADDSXri;
2835	case AArch64::ADDXrr:
2836	return AArch64::ADDSXrr;
2837	case AArch64::ADDXrs:
2838	return AArch64::ADDSXrs;
2839	case AArch64::ADDXrx:
2840	return AArch64::ADDSXrx;
2841	case AArch64::ANDXri:
2842	return AArch64::ANDSXri;
2843	case AArch64::ANDXrr:
2844	return AArch64::ANDSXrr;
2845	case AArch64::ANDXrs:
2846	return AArch64::ANDSXrs;
2847	case AArch64::BICXrr:
2848	return AArch64::BICSXrr;
2849	case AArch64::BICXrs:
2850	return AArch64::BICSXrs;
2851	case AArch64::SUBXri:
2852	return AArch64::SUBSXri;
2853	case AArch64::SUBXrr:
2854	return AArch64::SUBSXrr;
2855	case AArch64::SUBXrs:
2856	return AArch64::SUBSXrs;
2857	case AArch64::SUBXrx:
2858	return AArch64::SUBSXrx;
2859	// SVE instructions:
2860	case AArch64::AND_PPzPP:
2861	return AArch64::ANDS_PPzPP;
2862	case AArch64::BIC_PPzPP:
2863	return AArch64::BICS_PPzPP;
2864	case AArch64::EOR_PPzPP:
2865	return AArch64::EORS_PPzPP;
2866	case AArch64::NAND_PPzPP:
2867	return AArch64::NANDS_PPzPP;
2868	case AArch64::NOR_PPzPP:
2869	return AArch64::NORS_PPzPP;
2870	case AArch64::ORN_PPzPP:
2871	return AArch64::ORNS_PPzPP;
2872	case AArch64::ORR_PPzPP:
2873	return AArch64::ORRS_PPzPP;
2874	case AArch64::BRKA_PPzP:
2875	return AArch64::BRKAS_PPzP;
2876	case AArch64::BRKPA_PPzPP:
2877	return AArch64::BRKPAS_PPzPP;
2878	case AArch64::BRKB_PPzP:
2879	return AArch64::BRKBS_PPzP;
2880	case AArch64::BRKPB_PPzPP:
2881	return AArch64::BRKPBS_PPzPP;
2882	case AArch64::BRKN_PPzP:
2883	return AArch64::BRKNS_PPzP;
2884	case AArch64::RDFFR_PPz:
2885	return AArch64::RDFFRS_PPz;
2886	case AArch64::PTRUE_B:
2887	return AArch64::PTRUES_B;
2888	}
2889	}
2890
2891	// Is this a candidate for ld/st merging or pairing? For example, we don't
2892	// touch volatiles or load/stores that have a hint to avoid pair formation.
2893	bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
2894
2895	bool IsPreLdSt = isPreLdSt(MI);
2896
2897	// If this is a volatile load/store, don't mess with it.
2898	if (MI.hasOrderedMemoryRef())
2899	return false;
2900
2901	// Make sure this is a reg/fi+imm (as opposed to an address reloc).
2902	// For Pre-inc LD/ST, the operand is shifted by one.
2903	assert((MI.getOperand(IsPreLdSt ? `2` : `1`).isReg() \|\|
2904	MI.getOperand(IsPreLdSt ? `2` : `1`).isFI()) &&
2905	"Expected a reg or frame index operand.");
2906
2907	// For Pre-indexed addressing quadword instructions, the third operand is the
2908	// immediate value.
2909	bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(i: `3`).isImm();
2910
2911	if (!MI.getOperand(i: `2`).isImm() && !IsImmPreLdSt)
2912	return false;
2913
2914	// Can't merge/pair if the instruction modifies the base register.
2915	// e.g., ldr x0, [x0]
2916	// This case will never occur with an FI base.
2917	// However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2918	// STR<S,D,Q,W,X>pre, it can be merged.
2919	// For example:
2920	// ldr q0, [x11, #32]!
2921	// ldr q1, [x11, #16]
2922	// to
2923	// ldp q0, q1, [x11, #32]!
2924	if (MI.getOperand(i: `1`).isReg() && !IsPreLdSt) {
2925	Register BaseReg = MI.getOperand(i: `1`).getReg();
2926	const TargetRegisterInfo *TRI = &getRegisterInfo();
2927	if (MI.modifiesRegister(Reg: BaseReg, TRI))
2928	return false;
2929	}
2930
2931	// Pairing SVE fills/spills is only valid for little-endian targets that
2932	// implement VLS 128.
2933	switch (MI.getOpcode()) {
2934	default:
2935	break;
2936	case AArch64::LDR_ZXI:
2937	case AArch64::STR_ZXI:
2938	if (!Subtarget.isLittleEndian() \|\|
2939	Subtarget.getSVEVectorSizeInBits() != `128`)
2940	return false;
2941	}
2942
2943	// Check if this load/store has a hint to avoid pair formation.
2944	// MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2945	if (isLdStPairSuppressed(MI))
2946	return false;
2947
2948	// Do not pair any callee-save store/reload instructions in the
2949	// prologue/epilogue if the CFI information encoded the operations as separate
2950	// instructions, as that will cause the size of the actual prologue to mismatch
2951	// with the prologue size recorded in the Windows CFI.
2952	const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2953	bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2954	MI.getMF()->getFunction().needsUnwindTableEntry();
2955	if (NeedsWinCFI && (MI.getFlag(Flag: MachineInstr::FrameSetup) \|\|
2956	MI.getFlag(Flag: MachineInstr::FrameDestroy)))
2957	return false;
2958
2959	// On some CPUs quad load/store pairs are slower than two single load/stores.
2960	if (Subtarget.isPaired128Slow()) {
2961	switch (MI.getOpcode()) {
2962	default:
2963	break;
2964	case AArch64::LDURQi:
2965	case AArch64::STURQi:
2966	case AArch64::LDRQui:
2967	case AArch64::STRQui:
2968	return false;
2969	}
2970	}
2971
2972	return true;
2973	}
2974
2975	bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
2976	const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
2977	int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2978	const TargetRegisterInfo TRI) const* {
2979	if (!LdSt.mayLoadOrStore())
2980	return false;
2981
2982	const MachineOperand *BaseOp;
2983	TypeSize WidthN(`0`, false);
2984	if (!getMemOperandWithOffsetWidth(MI: LdSt, BaseOp, Offset, OffsetIsScalable,
2985	Width&: WidthN, TRI))
2986	return false;
2987	// The maximum vscale is 16 under AArch64, return the maximal extent for the
2988	// vector.
2989	Width = LocationSize::precise(Value: WidthN);
2990	BaseOps.push_back(Elt: BaseOp);
2991	return true;
2992	}
2993
2994	std::optional<ExtAddrMode>
2995	AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
2996	const TargetRegisterInfo TRI) const* {
2997	const MachineOperand Base; // Filled with the base operand of MI.*
2998	int64_t Offset; // Filled with the offset of MI.
2999	bool OffsetIsScalable;
3000	if (!getMemOperandWithOffset(MI: MemI, BaseOp&: Base, Offset, OffsetIsScalable, TRI))
3001	return std::nullopt;
3002
3003	if (!Base->isReg())
3004	return std::nullopt;
3005	ExtAddrMode AM;
3006	AM.BaseReg = Base->getReg();
3007	AM.Displacement = Offset;
3008	AM.ScaledReg = `0`;
3009	AM.Scale = `0`;
3010	return AM;
3011	}
3012
3013	bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,
3014	Register Reg,
3015	const MachineInstr &AddrI,
3016	ExtAddrMode &AM) const {
3017	// Filter out instructions into which we cannot fold.
3018	unsigned NumBytes;
3019	int64_t OffsetScale = `1`;
3020	switch (MemI.getOpcode()) {
3021	default:
3022	return false;
3023
3024	case AArch64::LDURQi:
3025	case AArch64::STURQi:
3026	NumBytes = `16`;
3027	break;
3028
3029	case AArch64::LDURDi:
3030	case AArch64::STURDi:
3031	case AArch64::LDURXi:
3032	case AArch64::STURXi:
3033	NumBytes = `8`;
3034	break;
3035
3036	case AArch64::LDURWi:
3037	case AArch64::LDURSWi:
3038	case AArch64::STURWi:
3039	NumBytes = `4`;
3040	break;
3041
3042	case AArch64::LDURHi:
3043	case AArch64::STURHi:
3044	case AArch64::LDURHHi:
3045	case AArch64::STURHHi:
3046	case AArch64::LDURSHXi:
3047	case AArch64::LDURSHWi:
3048	NumBytes = `2`;
3049	break;
3050
3051	case AArch64::LDRBroX:
3052	case AArch64::LDRBBroX:
3053	case AArch64::LDRSBXroX:
3054	case AArch64::LDRSBWroX:
3055	case AArch64::STRBroX:
3056	case AArch64::STRBBroX:
3057	case AArch64::LDURBi:
3058	case AArch64::LDURBBi:
3059	case AArch64::LDURSBXi:
3060	case AArch64::LDURSBWi:
3061	case AArch64::STURBi:
3062	case AArch64::STURBBi:
3063	case AArch64::LDRBui:
3064	case AArch64::LDRBBui:
3065	case AArch64::LDRSBXui:
3066	case AArch64::LDRSBWui:
3067	case AArch64::STRBui:
3068	case AArch64::STRBBui:
3069	NumBytes = `1`;
3070	break;
3071
3072	case AArch64::LDRQroX:
3073	case AArch64::STRQroX:
3074	case AArch64::LDRQui:
3075	case AArch64::STRQui:
3076	NumBytes = `16`;
3077	OffsetScale = `16`;
3078	break;
3079
3080	case AArch64::LDRDroX:
3081	case AArch64::STRDroX:
3082	case AArch64::LDRXroX:
3083	case AArch64::STRXroX:
3084	case AArch64::LDRDui:
3085	case AArch64::STRDui:
3086	case AArch64::LDRXui:
3087	case AArch64::STRXui:
3088	NumBytes = `8`;
3089	OffsetScale = `8`;
3090	break;
3091
3092	case AArch64::LDRWroX:
3093	case AArch64::LDRSWroX:
3094	case AArch64::STRWroX:
3095	case AArch64::LDRWui:
3096	case AArch64::LDRSWui:
3097	case AArch64::STRWui:
3098	NumBytes = `4`;
3099	OffsetScale = `4`;
3100	break;
3101
3102	case AArch64::LDRHroX:
3103	case AArch64::STRHroX:
3104	case AArch64::LDRHHroX:
3105	case AArch64::STRHHroX:
3106	case AArch64::LDRSHXroX:
3107	case AArch64::LDRSHWroX:
3108	case AArch64::LDRHui:
3109	case AArch64::STRHui:
3110	case AArch64::LDRHHui:
3111	case AArch64::STRHHui:
3112	case AArch64::LDRSHXui:
3113	case AArch64::LDRSHWui:
3114	NumBytes = `2`;
3115	OffsetScale = `2`;
3116	break;
3117	}
3118
3119	// Check the fold operand is not the loaded/stored value.
3120	const MachineOperand &BaseRegOp = MemI.getOperand(i: `0`);
3121	if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
3122	return false;
3123
3124	// Handle memory instructions with a [Reg, Reg] addressing mode.
3125	if (MemI.getOperand(i: `2`).isReg()) {
3126	// Bail if the addressing mode already includes extension of the offset
3127	// register.
3128	if (MemI.getOperand(i: `3`).getImm())
3129	return false;
3130
3131	// Check if we actually have a scaled offset.
3132	if (MemI.getOperand(i: `4`).getImm() == `0`)
3133	OffsetScale = `1`;
3134
3135	// If the address instructions is folded into the base register, then the
3136	// addressing mode must not have a scale. Then we can swap the base and the
3137	// scaled registers.
3138	if (MemI.getOperand(i: `1`).getReg() == Reg && OffsetScale != `1`)
3139	return false;
3140
3141	switch (AddrI.getOpcode()) {
3142	default:
3143	return false;
3144
3145	case AArch64::SBFMXri:
3146	// sxtw Xa, Wm
3147	// ldr Xd, [Xn, Xa, lsl #N]
3148	// ->
3149	// ldr Xd, [Xn, Wm, sxtw #N]
3150	if (AddrI.getOperand(i: `2`).getImm() != `0` \|\|
3151	AddrI.getOperand(i: `3`).getImm() != `31`)
3152	return false;
3153
3154	AM.BaseReg = MemI.getOperand(i: `1`).getReg();
3155	if (AM.BaseReg == Reg)
3156	AM.BaseReg = MemI.getOperand(i: `2`).getReg();
3157	AM.ScaledReg = AddrI.getOperand(i: `1`).getReg();
3158	AM.Scale = OffsetScale;
3159	AM.Displacement = `0`;
3160	AM.Form = ExtAddrMode::Formula::SExtScaledReg;
3161	return true;
3162
3163	case TargetOpcode::SUBREG_TO_REG: {
3164	// mov Wa, Wm
3165	// ldr Xd, [Xn, Xa, lsl #N]
3166	// ->
3167	// ldr Xd, [Xn, Wm, uxtw #N]
3168
3169	// Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3170	if (AddrI.getOperand(i: `1`).getImm() != `0` \|\|
3171	AddrI.getOperand(i: `3`).getImm() != AArch64::sub_32)
3172	return false;
3173
3174	const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
3175	Register OffsetReg = AddrI.getOperand(i: `2`).getReg();
3176	if (!OffsetReg.isVirtual() \|\| !MRI.hasOneNonDBGUse(RegNo: OffsetReg))
3177	return false;
3178
3179	const MachineInstr &DefMI = *MRI.getVRegDef(Reg: OffsetReg);
3180	if (DefMI.getOpcode() != AArch64::ORRWrs \|\|
3181	DefMI.getOperand(i: `1`).getReg() != AArch64::WZR \|\|
3182	DefMI.getOperand(i: `3`).getImm() != `0`)
3183	return false;
3184
3185	AM.BaseReg = MemI.getOperand(i: `1`).getReg();
3186	if (AM.BaseReg == Reg)
3187	AM.BaseReg = MemI.getOperand(i: `2`).getReg();
3188	AM.ScaledReg = DefMI.getOperand(i: `2`).getReg();
3189	AM.Scale = OffsetScale;
3190	AM.Displacement = `0`;
3191	AM.Form = ExtAddrMode::Formula::ZExtScaledReg;
3192	return true;
3193	}
3194	}
3195	}
3196
3197	// Handle memory instructions with a [Reg, #Imm] addressing mode.
3198
3199	// Check we are not breaking a potential conversion to an LDP.
3200	auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
3201	int64_t NewOffset) -> bool {
3202	int64_t MinOffset, MaxOffset;
3203	switch (NumBytes) {
3204	default:
3205	return true;
3206	case `4`:
3207	MinOffset = -`256`;
3208	MaxOffset = `252`;
3209	break;
3210	case `8`:
3211	MinOffset = -`512`;
3212	MaxOffset = `504`;
3213	break;
3214	case `16`:
3215	MinOffset = -`1024`;
3216	MaxOffset = `1008`;
3217	break;
3218	}
3219	return OldOffset < MinOffset \|\| OldOffset > MaxOffset \|\|
3220	(NewOffset >= MinOffset && NewOffset <= MaxOffset);
3221	};
3222	auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
3223	int64_t OldOffset = MemI.getOperand(i: `2`).getImm() * OffsetScale;
3224	int64_t NewOffset = OldOffset + Disp;
3225	if (!isLegalAddressingMode(NumBytes, Offset: NewOffset, / Scale / `0`))
3226	return false;
3227	// If the old offset would fit into an LDP, but the new offset wouldn't,
3228	// bail out.
3229	if (!validateOffsetForLDP (NumBytes, OldOffset, NewOffset))
3230	return false;
3231	AM.BaseReg = AddrI.getOperand(i: `1`).getReg();
3232	AM.ScaledReg = `0`;
3233	AM.Scale = `0`;
3234	AM.Displacement = NewOffset;
3235	AM.Form = ExtAddrMode::Formula::Basic;
3236	return true;
3237	};
3238
3239	auto canFoldAddRegIntoAddrMode =
3240	[&](int64_t Scale,
3241	ExtAddrMode::Formula Form = ExtAddrMode::Formula::Basic) -> bool {
3242	if (MemI.getOperand(i: `2`).getImm() != `0`)
3243	return false;
3244	if ((unsigned)Scale != Scale)
3245	return false;
3246	if (!isLegalAddressingMode(NumBytes, / Offset / `0`, Scale))
3247	return false;
3248	AM.BaseReg = AddrI.getOperand(i: `1`).getReg();
3249	AM.ScaledReg = AddrI.getOperand(i: `2`).getReg();
3250	AM.Scale = Scale;
3251	AM.Displacement = `0`;
3252	AM.Form = Form;
3253	return true;
3254	};
3255
3256	auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
3257	unsigned Opcode = MemI.getOpcode();
3258	return (Opcode == AArch64::STURQi \|\| Opcode == AArch64::STRQui) &&
3259	Subtarget.isSTRQroSlow();
3260	};
3261
3262	int64_t Disp = `0`;
3263	const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
3264	switch (AddrI.getOpcode()) {
3265	default:
3266	return false;
3267
3268	case AArch64::ADDXri:
3269	// add Xa, Xn, #N
3270	// ldr Xd, [Xa, #M]
3271	// ->
3272	// ldr Xd, [Xn, #N'+M]
3273	Disp = AddrI.getOperand(i: `2`).getImm() << AddrI.getOperand(i: `3`).getImm();
3274	return canFoldAddSubImmIntoAddrMode (Disp);
3275
3276	case AArch64::SUBXri:
3277	// sub Xa, Xn, #N
3278	// ldr Xd, [Xa, #M]
3279	// ->
3280	// ldr Xd, [Xn, #N'+M]
3281	Disp = AddrI.getOperand(i: `2`).getImm() << AddrI.getOperand(i: `3`).getImm();
3282	return canFoldAddSubImmIntoAddrMode (-Disp);
3283
3284	case AArch64::ADDXrs: {
3285	// add Xa, Xn, Xm, lsl #N
3286	// ldr Xd, [Xa]
3287	// ->
3288	// ldr Xd, [Xn, Xm, lsl #N]
3289
3290	// Don't fold the add if the result would be slower, unless optimising for
3291	// size.
3292	unsigned Shift = static_cast<unsigned>(AddrI.getOperand(i: `3`).getImm());
3293	if (AArch64_AM::getShiftType(Imm: Shift) != AArch64_AM::ShiftExtendType::LSL)
3294	return false;
3295	Shift = AArch64_AM::getShiftValue(Imm: Shift);
3296	if (!OptSize) {
3297	if (Shift != `2` && Shift != `3` && Subtarget.hasAddrLSLSlow14())
3298	return false;
3299	if (avoidSlowSTRQ (MemI))
3300	return false;
3301	}
3302	return canFoldAddRegIntoAddrMode (`1ULL` << Shift);
3303	}
3304
3305	case AArch64::ADDXrr:
3306	// add Xa, Xn, Xm
3307	// ldr Xd, [Xa]
3308	// ->
3309	// ldr Xd, [Xn, Xm, lsl #0]
3310
3311	// Don't fold the add if the result would be slower, unless optimising for
3312	// size.
3313	if (!OptSize && avoidSlowSTRQ (MemI))
3314	return false;
3315	return canFoldAddRegIntoAddrMode (`1`);
3316
3317	case AArch64::ADDXrx:
3318	// add Xa, Xn, Wm, {s,u}xtw #N
3319	// ldr Xd, [Xa]
3320	// ->
3321	// ldr Xd, [Xn, Wm, {s,u}xtw #N]
3322
3323	// Don't fold the add if the result would be slower, unless optimising for
3324	// size.
3325	if (!OptSize && avoidSlowSTRQ (MemI))
3326	return false;
3327
3328	// Can fold only sign-/zero-extend of a word.
3329	unsigned Imm = static_cast<unsigned>(AddrI.getOperand(i: `3`).getImm());
3330	AArch64_AM::ShiftExtendType Extend = AArch64_AM::getArithExtendType(Imm);
3331	if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3332	return false;
3333
3334	return canFoldAddRegIntoAddrMode (
3335	`1ULL` << AArch64_AM::getArithShiftValue(Imm),
3336	(Extend == AArch64_AM::SXTW) ? ExtAddrMode::Formula::SExtScaledReg
3337	: ExtAddrMode::Formula::ZExtScaledReg);
3338	}
3339	}
3340
3341	// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3342	// return the opcode of an instruction performing the same operation, but using
3343	// the [Reg, Reg] addressing mode.
3344	static unsigned regOffsetOpcode(unsigned Opcode) {
3345	switch (Opcode) {
3346	default:
3347	llvm_unreachable("Address folding not implemented for instruction");
3348
3349	case AArch64::LDURQi:
3350	case AArch64::LDRQui:
3351	return AArch64::LDRQroX;
3352	case AArch64::STURQi:
3353	case AArch64::STRQui:
3354	return AArch64::STRQroX;
3355	case AArch64::LDURDi:
3356	case AArch64::LDRDui:
3357	return AArch64::LDRDroX;
3358	case AArch64::STURDi:
3359	case AArch64::STRDui:
3360	return AArch64::STRDroX;
3361	case AArch64::LDURXi:
3362	case AArch64::LDRXui:
3363	return AArch64::LDRXroX;
3364	case AArch64::STURXi:
3365	case AArch64::STRXui:
3366	return AArch64::STRXroX;
3367	case AArch64::LDURWi:
3368	case AArch64::LDRWui:
3369	return AArch64::LDRWroX;
3370	case AArch64::LDURSWi:
3371	case AArch64::LDRSWui:
3372	return AArch64::LDRSWroX;
3373	case AArch64::STURWi:
3374	case AArch64::STRWui:
3375	return AArch64::STRWroX;
3376	case AArch64::LDURHi:
3377	case AArch64::LDRHui:
3378	return AArch64::LDRHroX;
3379	case AArch64::STURHi:
3380	case AArch64::STRHui:
3381	return AArch64::STRHroX;
3382	case AArch64::LDURHHi:
3383	case AArch64::LDRHHui:
3384	return AArch64::LDRHHroX;
3385	case AArch64::STURHHi:
3386	case AArch64::STRHHui:
3387	return AArch64::STRHHroX;
3388	case AArch64::LDURSHXi:
3389	case AArch64::LDRSHXui:
3390	return AArch64::LDRSHXroX;
3391	case AArch64::LDURSHWi:
3392	case AArch64::LDRSHWui:
3393	return AArch64::LDRSHWroX;
3394	case AArch64::LDURBi:
3395	case AArch64::LDRBui:
3396	return AArch64::LDRBroX;
3397	case AArch64::LDURBBi:
3398	case AArch64::LDRBBui:
3399	return AArch64::LDRBBroX;
3400	case AArch64::LDURSBXi:
3401	case AArch64::LDRSBXui:
3402	return AArch64::LDRSBXroX;
3403	case AArch64::LDURSBWi:
3404	case AArch64::LDRSBWui:
3405	return AArch64::LDRSBWroX;
3406	case AArch64::STURBi:
3407	case AArch64::STRBui:
3408	return AArch64::STRBroX;
3409	case AArch64::STURBBi:
3410	case AArch64::STRBBui:
3411	return AArch64::STRBBroX;
3412	}
3413	}
3414
3415	// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3416	// the opcode of an instruction performing the same operation, but using the
3417	// [Reg, #Imm] addressing mode with scaled offset.
3418	unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3419	switch (Opcode) {
3420	default:
3421	llvm_unreachable("Address folding not implemented for instruction");
3422
3423	case AArch64::LDURQi:
3424	Scale = `16`;
3425	return AArch64::LDRQui;
3426	case AArch64::STURQi:
3427	Scale = `16`;
3428	return AArch64::STRQui;
3429	case AArch64::LDURDi:
3430	Scale = `8`;
3431	return AArch64::LDRDui;
3432	case AArch64::STURDi:
3433	Scale = `8`;
3434	return AArch64::STRDui;
3435	case AArch64::LDURXi:
3436	Scale = `8`;
3437	return AArch64::LDRXui;
3438	case AArch64::STURXi:
3439	Scale = `8`;
3440	return AArch64::STRXui;
3441	case AArch64::LDURWi:
3442	Scale = `4`;
3443	return AArch64::LDRWui;
3444	case AArch64::LDURSWi:
3445	Scale = `4`;
3446	return AArch64::LDRSWui;
3447	case AArch64::STURWi:
3448	Scale = `4`;
3449	return AArch64::STRWui;
3450	case AArch64::LDURHi:
3451	Scale = `2`;
3452	return AArch64::LDRHui;
3453	case AArch64::STURHi:
3454	Scale = `2`;
3455	return AArch64::STRHui;
3456	case AArch64::LDURHHi:
3457	Scale = `2`;
3458	return AArch64::LDRHHui;
3459	case AArch64::STURHHi:
3460	Scale = `2`;
3461	return AArch64::STRHHui;
3462	case AArch64::LDURSHXi:
3463	Scale = `2`;
3464	return AArch64::LDRSHXui;
3465	case AArch64::LDURSHWi:
3466	Scale = `2`;
3467	return AArch64::LDRSHWui;
3468	case AArch64::LDURBi:
3469	Scale = `1`;
3470	return AArch64::LDRBui;
3471	case AArch64::LDURBBi:
3472	Scale = `1`;
3473	return AArch64::LDRBBui;
3474	case AArch64::LDURSBXi:
3475	Scale = `1`;
3476	return AArch64::LDRSBXui;
3477	case AArch64::LDURSBWi:
3478	Scale = `1`;
3479	return AArch64::LDRSBWui;
3480	case AArch64::STURBi:
3481	Scale = `1`;
3482	return AArch64::STRBui;
3483	case AArch64::STURBBi:
3484	Scale = `1`;
3485	return AArch64::STRBBui;
3486	case AArch64::LDRQui:
3487	case AArch64::STRQui:
3488	Scale = `16`;
3489	return Opcode;
3490	case AArch64::LDRDui:
3491	case AArch64::STRDui:
3492	case AArch64::LDRXui:
3493	case AArch64::STRXui:
3494	Scale = `8`;
3495	return Opcode;
3496	case AArch64::LDRWui:
3497	case AArch64::LDRSWui:
3498	case AArch64::STRWui:
3499	Scale = `4`;
3500	return Opcode;
3501	case AArch64::LDRHui:
3502	case AArch64::STRHui:
3503	case AArch64::LDRHHui:
3504	case AArch64::STRHHui:
3505	case AArch64::LDRSHXui:
3506	case AArch64::LDRSHWui:
3507	Scale = `2`;
3508	return Opcode;
3509	case AArch64::LDRBui:
3510	case AArch64::LDRBBui:
3511	case AArch64::LDRSBXui:
3512	case AArch64::LDRSBWui:
3513	case AArch64::STRBui:
3514	case AArch64::STRBBui:
3515	Scale = `1`;
3516	return Opcode;
3517	}
3518	}
3519
3520	// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3521	// the opcode of an instruction performing the same operation, but using the
3522	// [Reg, #Imm] addressing mode with unscaled offset.
3523	unsigned unscaledOffsetOpcode(unsigned Opcode) {
3524	switch (Opcode) {
3525	default:
3526	llvm_unreachable("Address folding not implemented for instruction");
3527
3528	case AArch64::LDURQi:
3529	case AArch64::STURQi:
3530	case AArch64::LDURDi:
3531	case AArch64::STURDi:
3532	case AArch64::LDURXi:
3533	case AArch64::STURXi:
3534	case AArch64::LDURWi:
3535	case AArch64::LDURSWi:
3536	case AArch64::STURWi:
3537	case AArch64::LDURHi:
3538	case AArch64::STURHi:
3539	case AArch64::LDURHHi:
3540	case AArch64::STURHHi:
3541	case AArch64::LDURSHXi:
3542	case AArch64::LDURSHWi:
3543	case AArch64::LDURBi:
3544	case AArch64::STURBi:
3545	case AArch64::LDURBBi:
3546	case AArch64::STURBBi:
3547	case AArch64::LDURSBWi:
3548	case AArch64::LDURSBXi:
3549	return Opcode;
3550	case AArch64::LDRQui:
3551	return AArch64::LDURQi;
3552	case AArch64::STRQui:
3553	return AArch64::STURQi;
3554	case AArch64::LDRDui:
3555	return AArch64::LDURDi;
3556	case AArch64::STRDui:
3557	return AArch64::STURDi;
3558	case AArch64::LDRXui:
3559	return AArch64::LDURXi;
3560	case AArch64::STRXui:
3561	return AArch64::STURXi;
3562	case AArch64::LDRWui:
3563	return AArch64::LDURWi;
3564	case AArch64::LDRSWui:
3565	return AArch64::LDURSWi;
3566	case AArch64::STRWui:
3567	return AArch64::STURWi;
3568	case AArch64::LDRHui:
3569	return AArch64::LDURHi;
3570	case AArch64::STRHui:
3571	return AArch64::STURHi;
3572	case AArch64::LDRHHui:
3573	return AArch64::LDURHHi;
3574	case AArch64::STRHHui:
3575	return AArch64::STURHHi;
3576	case AArch64::LDRSHXui:
3577	return AArch64::LDURSHXi;
3578	case AArch64::LDRSHWui:
3579	return AArch64::LDURSHWi;
3580	case AArch64::LDRBBui:
3581	return AArch64::LDURBBi;
3582	case AArch64::LDRBui:
3583	return AArch64::LDURBi;
3584	case AArch64::STRBBui:
3585	return AArch64::STURBBi;
3586	case AArch64::STRBui:
3587	return AArch64::STURBi;
3588	case AArch64::LDRSBWui:
3589	return AArch64::LDURSBWi;
3590	case AArch64::LDRSBXui:
3591	return AArch64::LDURSBXi;
3592	}
3593	}
3594
3595	// Given the opcode of a memory load/store instruction, return the opcode of an
3596	// instruction performing the same operation, but using
3597	// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3598	// offset register.
3599	static unsigned offsetExtendOpcode(unsigned Opcode) {
3600	switch (Opcode) {
3601	default:
3602	llvm_unreachable("Address folding not implemented for instruction");
3603
3604	case AArch64::LDRQroX:
3605	case AArch64::LDURQi:
3606	case AArch64::LDRQui:
3607	return AArch64::LDRQroW;
3608	case AArch64::STRQroX:
3609	case AArch64::STURQi:
3610	case AArch64::STRQui:
3611	return AArch64::STRQroW;
3612	case AArch64::LDRDroX:
3613	case AArch64::LDURDi:
3614	case AArch64::LDRDui:
3615	return AArch64::LDRDroW;
3616	case AArch64::STRDroX:
3617	case AArch64::STURDi:
3618	case AArch64::STRDui:
3619	return AArch64::STRDroW;
3620	case AArch64::LDRXroX:
3621	case AArch64::LDURXi:
3622	case AArch64::LDRXui:
3623	return AArch64::LDRXroW;
3624	case AArch64::STRXroX:
3625	case AArch64::STURXi:
3626	case AArch64::STRXui:
3627	return AArch64::STRXroW;
3628	case AArch64::LDRWroX:
3629	case AArch64::LDURWi:
3630	case AArch64::LDRWui:
3631	return AArch64::LDRWroW;
3632	case AArch64::LDRSWroX:
3633	case AArch64::LDURSWi:
3634	case AArch64::LDRSWui:
3635	return AArch64::LDRSWroW;
3636	case AArch64::STRWroX:
3637	case AArch64::STURWi:
3638	case AArch64::STRWui:
3639	return AArch64::STRWroW;
3640	case AArch64::LDRHroX:
3641	case AArch64::LDURHi:
3642	case AArch64::LDRHui:
3643	return AArch64::LDRHroW;
3644	case AArch64::STRHroX:
3645	case AArch64::STURHi:
3646	case AArch64::STRHui:
3647	return AArch64::STRHroW;
3648	case AArch64::LDRHHroX:
3649	case AArch64::LDURHHi:
3650	case AArch64::LDRHHui:
3651	return AArch64::LDRHHroW;
3652	case AArch64::STRHHroX:
3653	case AArch64::STURHHi:
3654	case AArch64::STRHHui:
3655	return AArch64::STRHHroW;
3656	case AArch64::LDRSHXroX:
3657	case AArch64::LDURSHXi:
3658	case AArch64::LDRSHXui:
3659	return AArch64::LDRSHXroW;
3660	case AArch64::LDRSHWroX:
3661	case AArch64::LDURSHWi:
3662	case AArch64::LDRSHWui:
3663	return AArch64::LDRSHWroW;
3664	case AArch64::LDRBroX:
3665	case AArch64::LDURBi:
3666	case AArch64::LDRBui:
3667	return AArch64::LDRBroW;
3668	case AArch64::LDRBBroX:
3669	case AArch64::LDURBBi:
3670	case AArch64::LDRBBui:
3671	return AArch64::LDRBBroW;
3672	case AArch64::LDRSBXroX:
3673	case AArch64::LDURSBXi:
3674	case AArch64::LDRSBXui:
3675	return AArch64::LDRSBXroW;
3676	case AArch64::LDRSBWroX:
3677	case AArch64::LDURSBWi:
3678	case AArch64::LDRSBWui:
3679	return AArch64::LDRSBWroW;
3680	case AArch64::STRBroX:
3681	case AArch64::STURBi:
3682	case AArch64::STRBui:
3683	return AArch64::STRBroW;
3684	case AArch64::STRBBroX:
3685	case AArch64::STURBBi:
3686	case AArch64::STRBBui:
3687	return AArch64::STRBBroW;
3688	}
3689	}
3690
3691	MachineInstr *AArch64InstrInfo::emitLdStWithAddr(MachineInstr &MemI,
3692	const ExtAddrMode &AM) const {
3693
3694	const DebugLoc &DL = MemI.getDebugLoc();
3695	MachineBasicBlock &MBB = *MemI.getParent();
3696	MachineRegisterInfo &MRI = MemI.getMF()->getRegInfo();
3697
3698	if (AM.Form == ExtAddrMode::Formula::Basic) {
3699	if (AM.ScaledReg) {
3700	// The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3701	unsigned Opcode = regOffsetOpcode(Opcode: MemI.getOpcode());
3702	MRI.constrainRegClass(Reg: AM.BaseReg, RC: &AArch64::GPR64spRegClass);
3703	auto B = BuildMI(BB&: MBB, I&: MemI, MIMD: DL, MCID: get(Opcode))
3704	.addReg(RegNo: MemI.getOperand(i: `0`).getReg(),
3705	flags: MemI.mayLoad() ? RegState::Define : `0`)
3706	.addReg(RegNo: AM.BaseReg)
3707	.addReg(RegNo: AM.ScaledReg)
3708	.addImm(Val: `0`)
3709	.addImm(Val: AM.Scale > `1`)
3710	.setMemRefs(MemI.memoperands())
3711	.setMIFlags(MemI.getFlags());
3712	return B.getInstr();
3713	}
3714
3715	assert(AM.ScaledReg == `0` && AM.Scale == `0` &&
3716	"Addressing mode not supported for folding");
3717
3718	// The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3719	unsigned Scale = `1`;
3720	unsigned Opcode = MemI.getOpcode();
3721	if (isInt<`9`>(x: AM.Displacement))
3722	Opcode = unscaledOffsetOpcode(Opcode);
3723	else
3724	Opcode = scaledOffsetOpcode(Opcode, Scale);
3725
3726	auto B = BuildMI(BB&: MBB, I&: MemI, MIMD: DL, MCID: get(Opcode))
3727	.addReg(RegNo: MemI.getOperand(i: `0`).getReg(),
3728	flags: MemI.mayLoad() ? RegState::Define : `0`)
3729	.addReg(RegNo: AM.BaseReg)
3730	.addImm(Val: AM.Displacement / Scale)
3731	.setMemRefs(MemI.memoperands())
3732	.setMIFlags(MemI.getFlags());
3733	return B.getInstr();
3734	}
3735
3736	if (AM.Form == ExtAddrMode::Formula::SExtScaledReg \|\|
3737	AM.Form == ExtAddrMode::Formula::ZExtScaledReg) {
3738	// The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3739	assert(AM.ScaledReg && !AM.Displacement &&
3740	"Address offset can be a register or an immediate, but not both");
3741	unsigned Opcode = offsetExtendOpcode(Opcode: MemI.getOpcode());
3742	MRI.constrainRegClass(Reg: AM.BaseReg, RC: &AArch64::GPR64spRegClass);
3743	// Make sure the offset register is in the correct register class.
3744	Register OffsetReg = AM.ScaledReg;
3745	const TargetRegisterClass *RC = MRI.getRegClass(Reg: OffsetReg);
3746	if (RC->hasSuperClassEq(RC: &AArch64::GPR64RegClass)) {
3747	OffsetReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
3748	BuildMI(BB&: MBB, I&: MemI, MIMD: DL, MCID: get(Opcode: TargetOpcode::COPY), DestReg: OffsetReg)
3749	.addReg(RegNo: AM.ScaledReg, flags: `0`, SubReg: AArch64::sub_32);
3750	}
3751	auto B = BuildMI(BB&: MBB, I&: MemI, MIMD: DL, MCID: get(Opcode))
3752	.addReg(RegNo: MemI.getOperand(i: `0`).getReg(),
3753	flags: MemI.mayLoad() ? RegState::Define : `0`)
3754	.addReg(RegNo: AM.BaseReg)
3755	.addReg(RegNo: OffsetReg)
3756	.addImm(Val: AM.Form == ExtAddrMode::Formula::SExtScaledReg)
3757	.addImm(Val: AM.Scale != `1`)
3758	.setMemRefs(MemI.memoperands())
3759	.setMIFlags(MemI.getFlags());
3760
3761	return B.getInstr();
3762	}
3763
3764	llvm_unreachable(
3765	"Function must not be called with an addressing mode it can't handle");
3766	}
3767
3768	/// Return true if the opcode is a post-index ld/st instruction, which really
3769	/// loads from base+0.
3770	static bool isPostIndexLdStOpcode(unsigned Opcode) {
3771	switch (Opcode) {
3772	default:
3773	return false;
3774	case AArch64::LD1Fourv16b_POST:
3775	case AArch64::LD1Fourv1d_POST:
3776	case AArch64::LD1Fourv2d_POST:
3777	case AArch64::LD1Fourv2s_POST:
3778	case AArch64::LD1Fourv4h_POST:
3779	case AArch64::LD1Fourv4s_POST:
3780	case AArch64::LD1Fourv8b_POST:
3781	case AArch64::LD1Fourv8h_POST:
3782	case AArch64::LD1Onev16b_POST:
3783	case AArch64::LD1Onev1d_POST:
3784	case AArch64::LD1Onev2d_POST:
3785	case AArch64::LD1Onev2s_POST:
3786	case AArch64::LD1Onev4h_POST:
3787	case AArch64::LD1Onev4s_POST:
3788	case AArch64::LD1Onev8b_POST:
3789	case AArch64::LD1Onev8h_POST:
3790	case AArch64::LD1Rv16b_POST:
3791	case AArch64::LD1Rv1d_POST:
3792	case AArch64::LD1Rv2d_POST:
3793	case AArch64::LD1Rv2s_POST:
3794	case AArch64::LD1Rv4h_POST:
3795	case AArch64::LD1Rv4s_POST:
3796	case AArch64::LD1Rv8b_POST:
3797	case AArch64::LD1Rv8h_POST:
3798	case AArch64::LD1Threev16b_POST:
3799	case AArch64::LD1Threev1d_POST:
3800	case AArch64::LD1Threev2d_POST:
3801	case AArch64::LD1Threev2s_POST:
3802	case AArch64::LD1Threev4h_POST:
3803	case AArch64::LD1Threev4s_POST:
3804	case AArch64::LD1Threev8b_POST:
3805	case AArch64::LD1Threev8h_POST:
3806	case AArch64::LD1Twov16b_POST:
3807	case AArch64::LD1Twov1d_POST:
3808	case AArch64::LD1Twov2d_POST:
3809	case AArch64::LD1Twov2s_POST:
3810	case AArch64::LD1Twov4h_POST:
3811	case AArch64::LD1Twov4s_POST:
3812	case AArch64::LD1Twov8b_POST:
3813	case AArch64::LD1Twov8h_POST:
3814	case AArch64::LD1i16_POST:
3815	case AArch64::LD1i32_POST:
3816	case AArch64::LD1i64_POST:
3817	case AArch64::LD1i8_POST:
3818	case AArch64::LD2Rv16b_POST:
3819	case AArch64::LD2Rv1d_POST:
3820	case AArch64::LD2Rv2d_POST:
3821	case AArch64::LD2Rv2s_POST:
3822	case AArch64::LD2Rv4h_POST:
3823	case AArch64::LD2Rv4s_POST:
3824	case AArch64::LD2Rv8b_POST:
3825	case AArch64::LD2Rv8h_POST:
3826	case AArch64::LD2Twov16b_POST:
3827	case AArch64::LD2Twov2d_POST:
3828	case AArch64::LD2Twov2s_POST:
3829	case AArch64::LD2Twov4h_POST:
3830	case AArch64::LD2Twov4s_POST:
3831	case AArch64::LD2Twov8b_POST:
3832	case AArch64::LD2Twov8h_POST:
3833	case AArch64::LD2i16_POST:
3834	case AArch64::LD2i32_POST:
3835	case AArch64::LD2i64_POST:
3836	case AArch64::LD2i8_POST:
3837	case AArch64::LD3Rv16b_POST:
3838	case AArch64::LD3Rv1d_POST:
3839	case AArch64::LD3Rv2d_POST:
3840	case AArch64::LD3Rv2s_POST:
3841	case AArch64::LD3Rv4h_POST:
3842	case AArch64::LD3Rv4s_POST:
3843	case AArch64::LD3Rv8b_POST:
3844	case AArch64::LD3Rv8h_POST:
3845	case AArch64::LD3Threev16b_POST:
3846	case AArch64::LD3Threev2d_POST:
3847	case AArch64::LD3Threev2s_POST:
3848	case AArch64::LD3Threev4h_POST:
3849	case AArch64::LD3Threev4s_POST:
3850	case AArch64::LD3Threev8b_POST:
3851	case AArch64::LD3Threev8h_POST:
3852	case AArch64::LD3i16_POST:
3853	case AArch64::LD3i32_POST:
3854	case AArch64::LD3i64_POST:
3855	case AArch64::LD3i8_POST:
3856	case AArch64::LD4Fourv16b_POST:
3857	case AArch64::LD4Fourv2d_POST:
3858	case AArch64::LD4Fourv2s_POST:
3859	case AArch64::LD4Fourv4h_POST:
3860	case AArch64::LD4Fourv4s_POST:
3861	case AArch64::LD4Fourv8b_POST:
3862	case AArch64::LD4Fourv8h_POST:
3863	case AArch64::LD4Rv16b_POST:
3864	case AArch64::LD4Rv1d_POST:
3865	case AArch64::LD4Rv2d_POST:
3866	case AArch64::LD4Rv2s_POST:
3867	case AArch64::LD4Rv4h_POST:
3868	case AArch64::LD4Rv4s_POST:
3869	case AArch64::LD4Rv8b_POST:
3870	case AArch64::LD4Rv8h_POST:
3871	case AArch64::LD4i16_POST:
3872	case AArch64::LD4i32_POST:
3873	case AArch64::LD4i64_POST:
3874	case AArch64::LD4i8_POST:
3875	case AArch64::LDAPRWpost:
3876	case AArch64::LDAPRXpost:
3877	case AArch64::LDIAPPWpost:
3878	case AArch64::LDIAPPXpost:
3879	case AArch64::LDPDpost:
3880	case AArch64::LDPQpost:
3881	case AArch64::LDPSWpost:
3882	case AArch64::LDPSpost:
3883	case AArch64::LDPWpost:
3884	case AArch64::LDPXpost:
3885	case AArch64::LDRBBpost:
3886	case AArch64::LDRBpost:
3887	case AArch64::LDRDpost:
3888	case AArch64::LDRHHpost:
3889	case AArch64::LDRHpost:
3890	case AArch64::LDRQpost:
3891	case AArch64::LDRSBWpost:
3892	case AArch64::LDRSBXpost:
3893	case AArch64::LDRSHWpost:
3894	case AArch64::LDRSHXpost:
3895	case AArch64::LDRSWpost:
3896	case AArch64::LDRSpost:
3897	case AArch64::LDRWpost:
3898	case AArch64::LDRXpost:
3899	case AArch64::ST1Fourv16b_POST:
3900	case AArch64::ST1Fourv1d_POST:
3901	case AArch64::ST1Fourv2d_POST:
3902	case AArch64::ST1Fourv2s_POST:
3903	case AArch64::ST1Fourv4h_POST:
3904	case AArch64::ST1Fourv4s_POST:
3905	case AArch64::ST1Fourv8b_POST:
3906	case AArch64::ST1Fourv8h_POST:
3907	case AArch64::ST1Onev16b_POST:
3908	case AArch64::ST1Onev1d_POST:
3909	case AArch64::ST1Onev2d_POST:
3910	case AArch64::ST1Onev2s_POST:
3911	case AArch64::ST1Onev4h_POST:
3912	case AArch64::ST1Onev4s_POST:
3913	case AArch64::ST1Onev8b_POST:
3914	case AArch64::ST1Onev8h_POST:
3915	case AArch64::ST1Threev16b_POST:
3916	case AArch64::ST1Threev1d_POST:
3917	case AArch64::ST1Threev2d_POST:
3918	case AArch64::ST1Threev2s_POST:
3919	case AArch64::ST1Threev4h_POST:
3920	case AArch64::ST1Threev4s_POST:
3921	case AArch64::ST1Threev8b_POST:
3922	case AArch64::ST1Threev8h_POST:
3923	case AArch64::ST1Twov16b_POST:
3924	case AArch64::ST1Twov1d_POST:
3925	case AArch64::ST1Twov2d_POST:
3926	case AArch64::ST1Twov2s_POST:
3927	case AArch64::ST1Twov4h_POST:
3928	case AArch64::ST1Twov4s_POST:
3929	case AArch64::ST1Twov8b_POST:
3930	case AArch64::ST1Twov8h_POST:
3931	case AArch64::ST1i16_POST:
3932	case AArch64::ST1i32_POST:
3933	case AArch64::ST1i64_POST:
3934	case AArch64::ST1i8_POST:
3935	case AArch64::ST2GPostIndex:
3936	case AArch64::ST2Twov16b_POST:
3937	case AArch64::ST2Twov2d_POST:
3938	case AArch64::ST2Twov2s_POST:
3939	case AArch64::ST2Twov4h_POST:
3940	case AArch64::ST2Twov4s_POST:
3941	case AArch64::ST2Twov8b_POST:
3942	case AArch64::ST2Twov8h_POST:
3943	case AArch64::ST2i16_POST:
3944	case AArch64::ST2i32_POST:
3945	case AArch64::ST2i64_POST:
3946	case AArch64::ST2i8_POST:
3947	case AArch64::ST3Threev16b_POST:
3948	case AArch64::ST3Threev2d_POST:
3949	case AArch64::ST3Threev2s_POST:
3950	case AArch64::ST3Threev4h_POST:
3951	case AArch64::ST3Threev4s_POST:
3952	case AArch64::ST3Threev8b_POST:
3953	case AArch64::ST3Threev8h_POST:
3954	case AArch64::ST3i16_POST:
3955	case AArch64::ST3i32_POST:
3956	case AArch64::ST3i64_POST:
3957	case AArch64::ST3i8_POST:
3958	case AArch64::ST4Fourv16b_POST:
3959	case AArch64::ST4Fourv2d_POST:
3960	case AArch64::ST4Fourv2s_POST:
3961	case AArch64::ST4Fourv4h_POST:
3962	case AArch64::ST4Fourv4s_POST:
3963	case AArch64::ST4Fourv8b_POST:
3964	case AArch64::ST4Fourv8h_POST:
3965	case AArch64::ST4i16_POST:
3966	case AArch64::ST4i32_POST:
3967	case AArch64::ST4i64_POST:
3968	case AArch64::ST4i8_POST:
3969	case AArch64::STGPostIndex:
3970	case AArch64::STGPpost:
3971	case AArch64::STPDpost:
3972	case AArch64::STPQpost:
3973	case AArch64::STPSpost:
3974	case AArch64::STPWpost:
3975	case AArch64::STPXpost:
3976	case AArch64::STRBBpost:
3977	case AArch64::STRBpost:
3978	case AArch64::STRDpost:
3979	case AArch64::STRHHpost:
3980	case AArch64::STRHpost:
3981	case AArch64::STRQpost:
3982	case AArch64::STRSpost:
3983	case AArch64::STRWpost:
3984	case AArch64::STRXpost:
3985	case AArch64::STZ2GPostIndex:
3986	case AArch64::STZGPostIndex:
3987	return true;
3988	}
3989	}
3990
3991	bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
3992	const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
3993	bool &OffsetIsScalable, TypeSize &Width,
3994	const TargetRegisterInfo TRI) const* {
3995	assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3996	// Handle only loads/stores with base register followed by immediate offset.
3997	if (LdSt.getNumExplicitOperands() == `3`) {
3998	// Non-paired instruction (e.g., ldr x1, [x0, #8]).
3999	if ((!LdSt.getOperand(i: `1`).isReg() && !LdSt.getOperand(i: `1`).isFI()) \|\|
4000	!LdSt.getOperand(i: `2`).isImm())
4001	return false;
4002	} else if (LdSt.getNumExplicitOperands() == `4`) {
4003	// Paired instruction (e.g., ldp x1, x2, [x0, #8]).
4004	if (!LdSt.getOperand(i: `1`).isReg() \|\|
4005	(!LdSt.getOperand(i: `2`).isReg() && !LdSt.getOperand(i: `2`).isFI()) \|\|
4006	!LdSt.getOperand(i: `3`).isImm())
4007	return false;
4008	} else
4009	return false;
4010
4011	// Get the scaling factor for the instruction and set the width for the
4012	// instruction.
4013	TypeSize Scale(`0U`, false);
4014	int64_t Dummy1, Dummy2;
4015
4016	// If this returns false, then it's an instruction we don't want to handle.
4017	if (!getMemOpInfo(Opcode: LdSt.getOpcode(), Scale, Width, MinOffset&: Dummy1, MaxOffset&: Dummy2))
4018	return false;
4019
4020	// Compute the offset. Offset is calculated as the immediate operand
4021	// multiplied by the scaling factor. Unscaled instructions have scaling factor
4022	// set to 1. Postindex are a special case which have an offset of 0.
4023	if (isPostIndexLdStOpcode(Opcode: LdSt.getOpcode())) {
4024	BaseOp = &LdSt.getOperand(i: `2`);
4025	Offset = `0`;
4026	} else if (LdSt.getNumExplicitOperands() == `3`) {
4027	BaseOp = &LdSt.getOperand(i: `1`);
4028	Offset = LdSt.getOperand(i: `2`).getImm() * Scale.getKnownMinValue();
4029	} else {
4030	assert(LdSt.getNumExplicitOperands() == `4` && "invalid number of operands");
4031	BaseOp = &LdSt.getOperand(i: `2`);
4032	Offset = LdSt.getOperand(i: `3`).getImm() * Scale.getKnownMinValue();
4033	}
4034	OffsetIsScalable = Scale.isScalable();
4035
4036	return BaseOp->isReg() \|\| BaseOp->isFI();
4037	}
4038
4039	MachineOperand &
4040	AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
4041	assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4042	MachineOperand &OfsOp = LdSt.getOperand(i: LdSt.getNumExplicitOperands() - `1`);
4043	assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
4044	return OfsOp;
4045	}
4046
4047	bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
4048	TypeSize &Width, int64_t &MinOffset,
4049	int64_t &MaxOffset) {
4050	switch (Opcode) {
4051	// Not a memory operation or something we want to handle.
4052	default:
4053	Scale = TypeSize::getFixed(ExactSize: `0`);
4054	Width = TypeSize::getFixed(ExactSize: `0`);
4055	MinOffset = MaxOffset = `0`;
4056	return false;
4057	// LDR / STR
4058	case AArch64::LDRQui:
4059	case AArch64::STRQui:
4060	Scale = TypeSize::getFixed(ExactSize: `16`);
4061	Width = TypeSize::getFixed(ExactSize: `16`);
4062	MinOffset = `0`;
4063	MaxOffset = `4095`;
4064	break;
4065	case AArch64::LDRXui:
4066	case AArch64::LDRDui:
4067	case AArch64::STRXui:
4068	case AArch64::STRDui:
4069	case AArch64::PRFMui:
4070	Scale = TypeSize::getFixed(ExactSize: `8`);
4071	Width = TypeSize::getFixed(ExactSize: `8`);
4072	MinOffset = `0`;
4073	MaxOffset = `4095`;
4074	break;
4075	case AArch64::LDRWui:
4076	case AArch64::LDRSui:
4077	case AArch64::LDRSWui:
4078	case AArch64::STRWui:
4079	case AArch64::STRSui:
4080	Scale = TypeSize::getFixed(ExactSize: `4`);
4081	Width = TypeSize::getFixed(ExactSize: `4`);
4082	MinOffset = `0`;
4083	MaxOffset = `4095`;
4084	break;
4085	case AArch64::LDRHui:
4086	case AArch64::LDRHHui:
4087	case AArch64::LDRSHWui:
4088	case AArch64::LDRSHXui:
4089	case AArch64::STRHui:
4090	case AArch64::STRHHui:
4091	Scale = TypeSize::getFixed(ExactSize: `2`);
4092	Width = TypeSize::getFixed(ExactSize: `2`);
4093	MinOffset = `0`;
4094	MaxOffset = `4095`;
4095	break;
4096	case AArch64::LDRBui:
4097	case AArch64::LDRBBui:
4098	case AArch64::LDRSBWui:
4099	case AArch64::LDRSBXui:
4100	case AArch64::STRBui:
4101	case AArch64::STRBBui:
4102	Scale = TypeSize::getFixed(ExactSize: `1`);
4103	Width = TypeSize::getFixed(ExactSize: `1`);
4104	MinOffset = `0`;
4105	MaxOffset = `4095`;
4106	break;
4107	// post/pre inc
4108	case AArch64::STRQpre:
4109	case AArch64::LDRQpost:
4110	Scale = TypeSize::getFixed(ExactSize: `1`);
4111	Width = TypeSize::getFixed(ExactSize: `16`);
4112	MinOffset = -`256`;
4113	MaxOffset = `255`;
4114	break;
4115	case AArch64::LDRDpost:
4116	case AArch64::LDRDpre:
4117	case AArch64::LDRXpost:
4118	case AArch64::LDRXpre:
4119	case AArch64::STRDpost:
4120	case AArch64::STRDpre:
4121	case AArch64::STRXpost:
4122	case AArch64::STRXpre:
4123	Scale = TypeSize::getFixed(ExactSize: `1`);
4124	Width = TypeSize::getFixed(ExactSize: `8`);
4125	MinOffset = -`256`;
4126	MaxOffset = `255`;
4127	break;
4128	case AArch64::STRWpost:
4129	case AArch64::STRWpre:
4130	case AArch64::LDRWpost:
4131	case AArch64::LDRWpre:
4132	case AArch64::STRSpost:
4133	case AArch64::STRSpre:
4134	case AArch64::LDRSpost:
4135	case AArch64::LDRSpre:
4136	Scale = TypeSize::getFixed(ExactSize: `1`);
4137	Width = TypeSize::getFixed(ExactSize: `4`);
4138	MinOffset = -`256`;
4139	MaxOffset = `255`;
4140	break;
4141	case AArch64::LDRHpost:
4142	case AArch64::LDRHpre:
4143	case AArch64::STRHpost:
4144	case AArch64::STRHpre:
4145	case AArch64::LDRHHpost:
4146	case AArch64::LDRHHpre:
4147	case AArch64::STRHHpost:
4148	case AArch64::STRHHpre:
4149	Scale = TypeSize::getFixed(ExactSize: `1`);
4150	Width = TypeSize::getFixed(ExactSize: `2`);
4151	MinOffset = -`256`;
4152	MaxOffset = `255`;
4153	break;
4154	case AArch64::LDRBpost:
4155	case AArch64::LDRBpre:
4156	case AArch64::STRBpost:
4157	case AArch64::STRBpre:
4158	case AArch64::LDRBBpost:
4159	case AArch64::LDRBBpre:
4160	case AArch64::STRBBpost:
4161	case AArch64::STRBBpre:
4162	Scale = TypeSize::getFixed(ExactSize: `1`);
4163	Width = TypeSize::getFixed(ExactSize: `1`);
4164	MinOffset = -`256`;
4165	MaxOffset = `255`;
4166	break;
4167	// Unscaled
4168	case AArch64::LDURQi:
4169	case AArch64::STURQi:
4170	Scale = TypeSize::getFixed(ExactSize: `1`);
4171	Width = TypeSize::getFixed(ExactSize: `16`);
4172	MinOffset = -`256`;
4173	MaxOffset = `255`;
4174	break;
4175	case AArch64::LDURXi:
4176	case AArch64::LDURDi:
4177	case AArch64::LDAPURXi:
4178	case AArch64::STURXi:
4179	case AArch64::STURDi:
4180	case AArch64::STLURXi:
4181	case AArch64::PRFUMi:
4182	Scale = TypeSize::getFixed(ExactSize: `1`);
4183	Width = TypeSize::getFixed(ExactSize: `8`);
4184	MinOffset = -`256`;
4185	MaxOffset = `255`;
4186	break;
4187	case AArch64::LDURWi:
4188	case AArch64::LDURSi:
4189	case AArch64::LDURSWi:
4190	case AArch64::LDAPURi:
4191	case AArch64::LDAPURSWi:
4192	case AArch64::STURWi:
4193	case AArch64::STURSi:
4194	case AArch64::STLURWi:
4195	Scale = TypeSize::getFixed(ExactSize: `1`);
4196	Width = TypeSize::getFixed(ExactSize: `4`);
4197	MinOffset = -`256`;
4198	MaxOffset = `255`;
4199	break;
4200	case AArch64::LDURHi:
4201	case AArch64::LDURHHi:
4202	case AArch64::LDURSHXi:
4203	case AArch64::LDURSHWi:
4204	case AArch64::LDAPURHi:
4205	case AArch64::LDAPURSHWi:
4206	case AArch64::LDAPURSHXi:
4207	case AArch64::STURHi:
4208	case AArch64::STURHHi:
4209	case AArch64::STLURHi:
4210	Scale = TypeSize::getFixed(ExactSize: `1`);
4211	Width = TypeSize::getFixed(ExactSize: `2`);
4212	MinOffset = -`256`;
4213	MaxOffset = `255`;
4214	break;
4215	case AArch64::LDURBi:
4216	case AArch64::LDURBBi:
4217	case AArch64::LDURSBXi:
4218	case AArch64::LDURSBWi:
4219	case AArch64::LDAPURBi:
4220	case AArch64::LDAPURSBWi:
4221	case AArch64::LDAPURSBXi:
4222	case AArch64::STURBi:
4223	case AArch64::STURBBi:
4224	case AArch64::STLURBi:
4225	Scale = TypeSize::getFixed(ExactSize: `1`);
4226	Width = TypeSize::getFixed(ExactSize: `1`);
4227	MinOffset = -`256`;
4228	MaxOffset = `255`;
4229	break;
4230	// LDP / STP (including pre/post inc)
4231	case AArch64::LDPQi:
4232	case AArch64::LDNPQi:
4233	case AArch64::STPQi:
4234	case AArch64::STNPQi:
4235	case AArch64::LDPQpost:
4236	case AArch64::LDPQpre:
4237	case AArch64::STPQpost:
4238	case AArch64::STPQpre:
4239	Scale = TypeSize::getFixed(ExactSize: `16`);
4240	Width = TypeSize::getFixed(ExactSize: `16` * `2`);
4241	MinOffset = -`64`;
4242	MaxOffset = `63`;
4243	break;
4244	case AArch64::LDPXi:
4245	case AArch64::LDPDi:
4246	case AArch64::LDNPXi:
4247	case AArch64::LDNPDi:
4248	case AArch64::STPXi:
4249	case AArch64::STPDi:
4250	case AArch64::STNPXi:
4251	case AArch64::STNPDi:
4252	case AArch64::LDPDpost:
4253	case AArch64::LDPDpre:
4254	case AArch64::LDPXpost:
4255	case AArch64::LDPXpre:
4256	case AArch64::STPDpost:
4257	case AArch64::STPDpre:
4258	case AArch64::STPXpost:
4259	case AArch64::STPXpre:
4260	Scale = TypeSize::getFixed(ExactSize: `8`);
4261	Width = TypeSize::getFixed(ExactSize: `8` * `2`);
4262	MinOffset = -`64`;
4263	MaxOffset = `63`;
4264	break;
4265	case AArch64::LDPWi:
4266	case AArch64::LDPSi:
4267	case AArch64::LDNPWi:
4268	case AArch64::LDNPSi:
4269	case AArch64::STPWi:
4270	case AArch64::STPSi:
4271	case AArch64::STNPWi:
4272	case AArch64::STNPSi:
4273	case AArch64::LDPSpost:
4274	case AArch64::LDPSpre:
4275	case AArch64::LDPWpost:
4276	case AArch64::LDPWpre:
4277	case AArch64::STPSpost:
4278	case AArch64::STPSpre:
4279	case AArch64::STPWpost:
4280	case AArch64::STPWpre:
4281	Scale = TypeSize::getFixed(ExactSize: `4`);
4282	Width = TypeSize::getFixed(ExactSize: `4` * `2`);
4283	MinOffset = -`64`;
4284	MaxOffset = `63`;
4285	break;
4286	case AArch64::StoreSwiftAsyncContext:
4287	// Store is an STRXui, but there might be an ADDXri in the expansion too.
4288	Scale = TypeSize::getFixed(ExactSize: `1`);
4289	Width = TypeSize::getFixed(ExactSize: `8`);
4290	MinOffset = `0`;
4291	MaxOffset = `4095`;
4292	break;
4293	case AArch64::ADDG:
4294	Scale = TypeSize::getFixed(ExactSize: `16`);
4295	Width = TypeSize::getFixed(ExactSize: `0`);
4296	MinOffset = `0`;
4297	MaxOffset = `63`;
4298	break;
4299	case AArch64::TAGPstack:
4300	Scale = TypeSize::getFixed(ExactSize: `16`);
4301	Width = TypeSize::getFixed(ExactSize: `0`);
4302	// TAGP with a negative offset turns into SUBP, which has a maximum offset
4303	// of 63 (not 64!).
4304	MinOffset = -`63`;
4305	MaxOffset = `63`;
4306	break;
4307	case AArch64::LDG:
4308	case AArch64::STGi:
4309	case AArch64::STGPreIndex:
4310	case AArch64::STGPostIndex:
4311	case AArch64::STZGi:
4312	case AArch64::STZGPreIndex:
4313	case AArch64::STZGPostIndex:
4314	Scale = TypeSize::getFixed(ExactSize: `16`);
4315	Width = TypeSize::getFixed(ExactSize: `16`);
4316	MinOffset = -`256`;
4317	MaxOffset = `255`;
4318	break;
4319	// SVE
4320	case AArch64::STR_ZZZZXI:
4321	case AArch64::LDR_ZZZZXI:
4322	Scale = TypeSize::getScalable(MinimumSize: `16`);
4323	Width = TypeSize::getScalable(MinimumSize: `16` * `4`);
4324	MinOffset = -`256`;
4325	MaxOffset = `252`;
4326	break;
4327	case AArch64::STR_ZZZXI:
4328	case AArch64::LDR_ZZZXI:
4329	Scale = TypeSize::getScalable(MinimumSize: `16`);
4330	Width = TypeSize::getScalable(MinimumSize: `16` * `3`);
4331	MinOffset = -`256`;
4332	MaxOffset = `253`;
4333	break;
4334	case AArch64::STR_ZZXI:
4335	case AArch64::LDR_ZZXI:
4336	Scale = TypeSize::getScalable(MinimumSize: `16`);
4337	Width = TypeSize::getScalable(MinimumSize: `16` * `2`);
4338	MinOffset = -`256`;
4339	MaxOffset = `254`;
4340	break;
4341	case AArch64::LDR_PXI:
4342	case AArch64::STR_PXI:
4343	Scale = TypeSize::getScalable(MinimumSize: `2`);
4344	Width = TypeSize::getScalable(MinimumSize: `2`);
4345	MinOffset = -`256`;
4346	MaxOffset = `255`;
4347	break;
4348	case AArch64::LDR_PPXI:
4349	case AArch64::STR_PPXI:
4350	Scale = TypeSize::getScalable(MinimumSize: `2`);
4351	Width = TypeSize::getScalable(MinimumSize: `2` * `2`);
4352	MinOffset = -`256`;
4353	MaxOffset = `254`;
4354	break;
4355	case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
4356	case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
4357	case AArch64::LDR_ZXI:
4358	case AArch64::STR_ZXI:
4359	Scale = TypeSize::getScalable(MinimumSize: `16`);
4360	Width = TypeSize::getScalable(MinimumSize: `16`);
4361	MinOffset = -`256`;
4362	MaxOffset = `255`;
4363	break;
4364	case AArch64::LD1B_IMM:
4365	case AArch64::LD1H_IMM:
4366	case AArch64::LD1W_IMM:
4367	case AArch64::LD1D_IMM:
4368	case AArch64::LDNT1B_ZRI:
4369	case AArch64::LDNT1H_ZRI:
4370	case AArch64::LDNT1W_ZRI:
4371	case AArch64::LDNT1D_ZRI:
4372	case AArch64::ST1B_IMM:
4373	case AArch64::ST1H_IMM:
4374	case AArch64::ST1W_IMM:
4375	case AArch64::ST1D_IMM:
4376	case AArch64::STNT1B_ZRI:
4377	case AArch64::STNT1H_ZRI:
4378	case AArch64::STNT1W_ZRI:
4379	case AArch64::STNT1D_ZRI:
4380	case AArch64::LDNF1B_IMM:
4381	case AArch64::LDNF1H_IMM:
4382	case AArch64::LDNF1W_IMM:
4383	case AArch64::LDNF1D_IMM:
4384	// A full vectors worth of data
4385	// Width = mbytes elements*
4386	Scale = TypeSize::getScalable(MinimumSize: `16`);
4387	Width = TypeSize::getScalable(MinimumSize: `16`);
4388	MinOffset = -`8`;
4389	MaxOffset = `7`;
4390	break;
4391	case AArch64::LD2B_IMM:
4392	case AArch64::LD2H_IMM:
4393	case AArch64::LD2W_IMM:
4394	case AArch64::LD2D_IMM:
4395	case AArch64::ST2B_IMM:
4396	case AArch64::ST2H_IMM:
4397	case AArch64::ST2W_IMM:
4398	case AArch64::ST2D_IMM:
4399	Scale = TypeSize::getScalable(MinimumSize: `32`);
4400	Width = TypeSize::getScalable(MinimumSize: `16` * `2`);
4401	MinOffset = -`8`;
4402	MaxOffset = `7`;
4403	break;
4404	case AArch64::LD3B_IMM:
4405	case AArch64::LD3H_IMM:
4406	case AArch64::LD3W_IMM:
4407	case AArch64::LD3D_IMM:
4408	case AArch64::ST3B_IMM:
4409	case AArch64::ST3H_IMM:
4410	case AArch64::ST3W_IMM:
4411	case AArch64::ST3D_IMM:
4412	Scale = TypeSize::getScalable(MinimumSize: `48`);
4413	Width = TypeSize::getScalable(MinimumSize: `16` * `3`);
4414	MinOffset = -`8`;
4415	MaxOffset = `7`;
4416	break;
4417	case AArch64::LD4B_IMM:
4418	case AArch64::LD4H_IMM:
4419	case AArch64::LD4W_IMM:
4420	case AArch64::LD4D_IMM:
4421	case AArch64::ST4B_IMM:
4422	case AArch64::ST4H_IMM:
4423	case AArch64::ST4W_IMM:
4424	case AArch64::ST4D_IMM:
4425	Scale = TypeSize::getScalable(MinimumSize: `64`);
4426	Width = TypeSize::getScalable(MinimumSize: `16` * `4`);
4427	MinOffset = -`8`;
4428	MaxOffset = `7`;
4429	break;
4430	case AArch64::LD1B_H_IMM:
4431	case AArch64::LD1SB_H_IMM:
4432	case AArch64::LD1H_S_IMM:
4433	case AArch64::LD1SH_S_IMM:
4434	case AArch64::LD1W_D_IMM:
4435	case AArch64::LD1SW_D_IMM:
4436	case AArch64::ST1B_H_IMM:
4437	case AArch64::ST1H_S_IMM:
4438	case AArch64::ST1W_D_IMM:
4439	case AArch64::LDNF1B_H_IMM:
4440	case AArch64::LDNF1SB_H_IMM:
4441	case AArch64::LDNF1H_S_IMM:
4442	case AArch64::LDNF1SH_S_IMM:
4443	case AArch64::LDNF1W_D_IMM:
4444	case AArch64::LDNF1SW_D_IMM:
4445	// A half vector worth of data
4446	// Width = mbytes elements*
4447	Scale = TypeSize::getScalable(MinimumSize: `8`);
4448	Width = TypeSize::getScalable(MinimumSize: `8`);
4449	MinOffset = -`8`;
4450	MaxOffset = `7`;
4451	break;
4452	case AArch64::LD1B_S_IMM:
4453	case AArch64::LD1SB_S_IMM:
4454	case AArch64::LD1H_D_IMM:
4455	case AArch64::LD1SH_D_IMM:
4456	case AArch64::ST1B_S_IMM:
4457	case AArch64::ST1H_D_IMM:
4458	case AArch64::LDNF1B_S_IMM:
4459	case AArch64::LDNF1SB_S_IMM:
4460	case AArch64::LDNF1H_D_IMM:
4461	case AArch64::LDNF1SH_D_IMM:
4462	// A quarter vector worth of data
4463	// Width = mbytes elements*
4464	Scale = TypeSize::getScalable(MinimumSize: `4`);
4465	Width = TypeSize::getScalable(MinimumSize: `4`);
4466	MinOffset = -`8`;
4467	MaxOffset = `7`;
4468	break;
4469	case AArch64::LD1B_D_IMM:
4470	case AArch64::LD1SB_D_IMM:
4471	case AArch64::ST1B_D_IMM:
4472	case AArch64::LDNF1B_D_IMM:
4473	case AArch64::LDNF1SB_D_IMM:
4474	// A eighth vector worth of data
4475	// Width = mbytes elements*
4476	Scale = TypeSize::getScalable(MinimumSize: `2`);
4477	Width = TypeSize::getScalable(MinimumSize: `2`);
4478	MinOffset = -`8`;
4479	MaxOffset = `7`;
4480	break;
4481	case AArch64::ST2Gi:
4482	case AArch64::ST2GPreIndex:
4483	case AArch64::ST2GPostIndex:
4484	case AArch64::STZ2Gi:
4485	case AArch64::STZ2GPreIndex:
4486	case AArch64::STZ2GPostIndex:
4487	Scale = TypeSize::getFixed(ExactSize: `16`);
4488	Width = TypeSize::getFixed(ExactSize: `32`);
4489	MinOffset = -`256`;
4490	MaxOffset = `255`;
4491	break;
4492	case AArch64::STGPi:
4493	case AArch64::STGPpost:
4494	case AArch64::STGPpre:
4495	Scale = TypeSize::getFixed(ExactSize: `16`);
4496	Width = TypeSize::getFixed(ExactSize: `16`);
4497	MinOffset = -`64`;
4498	MaxOffset = `63`;
4499	break;
4500	case AArch64::LD1RB_IMM:
4501	case AArch64::LD1RB_H_IMM:
4502	case AArch64::LD1RB_S_IMM:
4503	case AArch64::LD1RB_D_IMM:
4504	case AArch64::LD1RSB_H_IMM:
4505	case AArch64::LD1RSB_S_IMM:
4506	case AArch64::LD1RSB_D_IMM:
4507	Scale = TypeSize::getFixed(ExactSize: `1`);
4508	Width = TypeSize::getFixed(ExactSize: `1`);
4509	MinOffset = `0`;
4510	MaxOffset = `63`;
4511	break;
4512	case AArch64::LD1RH_IMM:
4513	case AArch64::LD1RH_S_IMM:
4514	case AArch64::LD1RH_D_IMM:
4515	case AArch64::LD1RSH_S_IMM:
4516	case AArch64::LD1RSH_D_IMM:
4517	Scale = TypeSize::getFixed(ExactSize: `2`);
4518	Width = TypeSize::getFixed(ExactSize: `2`);
4519	MinOffset = `0`;
4520	MaxOffset = `63`;
4521	break;
4522	case AArch64::LD1RW_IMM:
4523	case AArch64::LD1RW_D_IMM:
4524	case AArch64::LD1RSW_IMM:
4525	Scale = TypeSize::getFixed(ExactSize: `4`);
4526	Width = TypeSize::getFixed(ExactSize: `4`);
4527	MinOffset = `0`;
4528	MaxOffset = `63`;
4529	break;
4530	case AArch64::LD1RD_IMM:
4531	Scale = TypeSize::getFixed(ExactSize: `8`);
4532	Width = TypeSize::getFixed(ExactSize: `8`);
4533	MinOffset = `0`;
4534	MaxOffset = `63`;
4535	break;
4536	}
4537
4538	return true;
4539	}
4540
4541	// Scaling factor for unscaled load or store.
4542	int AArch64InstrInfo::getMemScale(unsigned Opc) {
4543	switch (Opc) {
4544	default:
4545	llvm_unreachable("Opcode has unknown scale!");
4546	case AArch64::LDRBBui:
4547	case AArch64::LDURBBi:
4548	case AArch64::LDRSBWui:
4549	case AArch64::LDURSBWi:
4550	case AArch64::STRBBui:
4551	case AArch64::STURBBi:
4552	return `1`;
4553	case AArch64::LDRHHui:
4554	case AArch64::LDURHHi:
4555	case AArch64::LDRSHWui:
4556	case AArch64::LDURSHWi:
4557	case AArch64::STRHHui:
4558	case AArch64::STURHHi:
4559	return `2`;
4560	case AArch64::LDRSui:
4561	case AArch64::LDURSi:
4562	case AArch64::LDRSpre:
4563	case AArch64::LDRSWui:
4564	case AArch64::LDURSWi:
4565	case AArch64::LDRSWpre:
4566	case AArch64::LDRWpre:
4567	case AArch64::LDRWui:
4568	case AArch64::LDURWi:
4569	case AArch64::STRSui:
4570	case AArch64::STURSi:
4571	case AArch64::STRSpre:
4572	case AArch64::STRWui:
4573	case AArch64::STURWi:
4574	case AArch64::STRWpre:
4575	case AArch64::LDPSi:
4576	case AArch64::LDPSWi:
4577	case AArch64::LDPWi:
4578	case AArch64::STPSi:
4579	case AArch64::STPWi:
4580	return `4`;
4581	case AArch64::LDRDui:
4582	case AArch64::LDURDi:
4583	case AArch64::LDRDpre:
4584	case AArch64::LDRXui:
4585	case AArch64::LDURXi:
4586	case AArch64::LDRXpre:
4587	case AArch64::STRDui:
4588	case AArch64::STURDi:
4589	case AArch64::STRDpre:
4590	case AArch64::STRXui:
4591	case AArch64::STURXi:
4592	case AArch64::STRXpre:
4593	case AArch64::LDPDi:
4594	case AArch64::LDPXi:
4595	case AArch64::STPDi:
4596	case AArch64::STPXi:
4597	return `8`;
4598	case AArch64::LDRQui:
4599	case AArch64::LDURQi:
4600	case AArch64::STRQui:
4601	case AArch64::STURQi:
4602	case AArch64::STRQpre:
4603	case AArch64::LDPQi:
4604	case AArch64::LDRQpre:
4605	case AArch64::STPQi:
4606	case AArch64::STGi:
4607	case AArch64::STZGi:
4608	case AArch64::ST2Gi:
4609	case AArch64::STZ2Gi:
4610	case AArch64::STGPi:
4611	return `16`;
4612	}
4613	}
4614
4615	bool AArch64InstrInfo::isPreLd(const MachineInstr &MI) {
4616	switch (MI.getOpcode()) {
4617	default:
4618	return false;
4619	case AArch64::LDRWpre:
4620	case AArch64::LDRXpre:
4621	case AArch64::LDRSWpre:
4622	case AArch64::LDRSpre:
4623	case AArch64::LDRDpre:
4624	case AArch64::LDRQpre:
4625	return true;
4626	}
4627	}
4628
4629	bool AArch64InstrInfo::isPreSt(const MachineInstr &MI) {
4630	switch (MI.getOpcode()) {
4631	default:
4632	return false;
4633	case AArch64::STRWpre:
4634	case AArch64::STRXpre:
4635	case AArch64::STRSpre:
4636	case AArch64::STRDpre:
4637	case AArch64::STRQpre:
4638	return true;
4639	}
4640	}
4641
4642	bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) {
4643	return isPreLd(MI) \|\| isPreSt(MI);
4644	}
4645
4646	bool AArch64InstrInfo::isPairedLdSt(const MachineInstr &MI) {
4647	switch (MI.getOpcode()) {
4648	default:
4649	return false;
4650	case AArch64::LDPSi:
4651	case AArch64::LDPSWi:
4652	case AArch64::LDPDi:
4653	case AArch64::LDPQi:
4654	case AArch64::LDPWi:
4655	case AArch64::LDPXi:
4656	case AArch64::STPSi:
4657	case AArch64::STPDi:
4658	case AArch64::STPQi:
4659	case AArch64::STPWi:
4660	case AArch64::STPXi:
4661	case AArch64::STGPi:
4662	return true;
4663	}
4664	}
4665
4666	const MachineOperand &AArch64InstrInfo::getLdStBaseOp(const MachineInstr &MI) {
4667	assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4668	unsigned Idx =
4669	AArch64InstrInfo::isPairedLdSt(MI) \|\| AArch64InstrInfo::isPreLdSt(MI) ? `2`
4670	: `1`;
4671	return MI.getOperand(i: Idx);
4672	}
4673
4674	const MachineOperand &
4675	AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) {
4676	assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4677	unsigned Idx =
4678	AArch64InstrInfo::isPairedLdSt(MI) \|\| AArch64InstrInfo::isPreLdSt(MI) ? `3`
4679	: `2`;
4680	return MI.getOperand(i: Idx);
4681	}
4682
4683	const MachineOperand &
4684	AArch64InstrInfo::getLdStAmountOp(const MachineInstr &MI) {
4685	switch (MI.getOpcode()) {
4686	default:
4687	llvm_unreachable("Unexpected opcode");
4688	case AArch64::LDRBroX:
4689	case AArch64::LDRBBroX:
4690	case AArch64::LDRSBXroX:
4691	case AArch64::LDRSBWroX:
4692	case AArch64::LDRHroX:
4693	case AArch64::LDRHHroX:
4694	case AArch64::LDRSHXroX:
4695	case AArch64::LDRSHWroX:
4696	case AArch64::LDRWroX:
4697	case AArch64::LDRSroX:
4698	case AArch64::LDRSWroX:
4699	case AArch64::LDRDroX:
4700	case AArch64::LDRXroX:
4701	case AArch64::LDRQroX:
4702	return MI.getOperand(i: `4`);
4703	}
4704	}
4705
4706	static const TargetRegisterClass getRegClass(const* MachineInstr &MI,
4707	Register Reg) {
4708	if (MI.getParent() == nullptr)
4709	return nullptr;
4710	const MachineFunction *MF = MI.getParent()->getParent();
4711	return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4712	}
4713
4714	bool AArch64InstrInfo::isHForm(const MachineInstr &MI) {
4715	auto IsHFPR = [&](const MachineOperand &Op) {
4716	if (!Op.isReg())
4717	return false;
4718	auto Reg = Op.getReg();
4719	if (Reg.isPhysical())
4720	return AArch64::FPR16RegClass.contains(Reg);
4721	const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4722	return TRC == &AArch64::FPR16RegClass \|\|
4723	TRC == &AArch64::FPR16_loRegClass;
4724	};
4725	return llvm::any_of(Range: MI.operands(), P: IsHFPR);
4726	}
4727
4728	bool AArch64InstrInfo::isQForm(const MachineInstr &MI) {
4729	auto IsQFPR = [&](const MachineOperand &Op) {
4730	if (!Op.isReg())
4731	return false;
4732	auto Reg = Op.getReg();
4733	if (Reg.isPhysical())
4734	return AArch64::FPR128RegClass.contains(Reg);
4735	const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4736	return TRC == &AArch64::FPR128RegClass \|\|
4737	TRC == &AArch64::FPR128_loRegClass;
4738	};
4739	return llvm::any_of(Range: MI.operands(), P: IsQFPR);
4740	}
4741
4742	bool AArch64InstrInfo::hasBTISemantics(const MachineInstr &MI) {
4743	switch (MI.getOpcode()) {
4744	case AArch64::BRK:
4745	case AArch64::HLT:
4746	case AArch64::PACIASP:
4747	case AArch64::PACIBSP:
4748	// Implicit BTI behavior.
4749	return true;
4750	case AArch64::PAUTH_PROLOGUE:
4751	// PAUTH_PROLOGUE expands to PACI(A\|B)SP.
4752	return true;
4753	case AArch64::HINT: {
4754	unsigned Imm = MI.getOperand(i: `0`).getImm();
4755	// Explicit BTI instruction.
4756	if (Imm == `32` \|\| Imm == `34` \|\| Imm == `36` \|\| Imm == `38`)
4757	return true;
4758	// PACI(A\|B)SP instructions.
4759	if (Imm == `25` \|\| Imm == `27`)
4760	return true;
4761	return false;
4762	}
4763	default:
4764	return false;
4765	}
4766	}
4767
4768	bool AArch64InstrInfo::isFpOrNEON(Register Reg) {
4769	if (Reg == `0`)
4770	return false;
4771	assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
4772	return AArch64::FPR128RegClass.contains(Reg) \|\|
4773	AArch64::FPR64RegClass.contains(Reg) \|\|
4774	AArch64::FPR32RegClass.contains(Reg) \|\|
4775	AArch64::FPR16RegClass.contains(Reg) \|\|
4776	AArch64::FPR8RegClass.contains(Reg);
4777	}
4778
4779	bool AArch64InstrInfo::isFpOrNEON(const MachineInstr &MI) {
4780	auto IsFPR = [&](const MachineOperand &Op) {
4781	if (!Op.isReg())
4782	return false;
4783	auto Reg = Op.getReg();
4784	if (Reg.isPhysical())
4785	return isFpOrNEON(Reg);
4786
4787	const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4788	return TRC == &AArch64::FPR128RegClass \|\|
4789	TRC == &AArch64::FPR128_loRegClass \|\|
4790	TRC == &AArch64::FPR64RegClass \|\|
4791	TRC == &AArch64::FPR64_loRegClass \|\|
4792	TRC == &AArch64::FPR32RegClass \|\| TRC == &AArch64::FPR16RegClass \|\|
4793	TRC == &AArch64::FPR8RegClass;
4794	};
4795	return llvm::any_of(Range: MI.operands(), P: IsFPR);
4796	}
4797
4798	// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4799	// scaled.
4800	static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4801	int Scale = AArch64InstrInfo::getMemScale(Opc);
4802
4803	// If the byte-offset isn't a multiple of the stride, we can't scale this
4804	// offset.
4805	if (Offset % Scale != `0`)
4806	return false;
4807
4808	// Convert the byte-offset used by unscaled into an "element" offset used
4809	// by the scaled pair load/store instructions.
4810	Offset /= Scale;
4811	return true;
4812	}
4813
4814	static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4815	if (FirstOpc == SecondOpc)
4816	return true;
4817	// We can also pair sign-ext and zero-ext instructions.
4818	switch (FirstOpc) {
4819	default:
4820	return false;
4821	case AArch64::STRSui:
4822	case AArch64::STURSi:
4823	return SecondOpc == AArch64::STRSui \|\| SecondOpc == AArch64::STURSi;
4824	case AArch64::STRDui:
4825	case AArch64::STURDi:
4826	return SecondOpc == AArch64::STRDui \|\| SecondOpc == AArch64::STURDi;
4827	case AArch64::STRQui:
4828	case AArch64::STURQi:
4829	return SecondOpc == AArch64::STRQui \|\| SecondOpc == AArch64::STURQi;
4830	case AArch64::STRWui:
4831	case AArch64::STURWi:
4832	return SecondOpc == AArch64::STRWui \|\| SecondOpc == AArch64::STURWi;
4833	case AArch64::STRXui:
4834	case AArch64::STURXi:
4835	return SecondOpc == AArch64::STRXui \|\| SecondOpc == AArch64::STURXi;
4836	case AArch64::LDRSui:
4837	case AArch64::LDURSi:
4838	return SecondOpc == AArch64::LDRSui \|\| SecondOpc == AArch64::LDURSi;
4839	case AArch64::LDRDui:
4840	case AArch64::LDURDi:
4841	return SecondOpc == AArch64::LDRDui \|\| SecondOpc == AArch64::LDURDi;
4842	case AArch64::LDRQui:
4843	case AArch64::LDURQi:
4844	return SecondOpc == AArch64::LDRQui \|\| SecondOpc == AArch64::LDURQi;
4845	case AArch64::LDRWui:
4846	case AArch64::LDURWi:
4847	return SecondOpc == AArch64::LDRSWui \|\| SecondOpc == AArch64::LDURSWi;
4848	case AArch64::LDRSWui:
4849	case AArch64::LDURSWi:
4850	return SecondOpc == AArch64::LDRWui \|\| SecondOpc == AArch64::LDURWi;
4851	case AArch64::LDRXui:
4852	case AArch64::LDURXi:
4853	return SecondOpc == AArch64::LDRXui \|\| SecondOpc == AArch64::LDURXi;
4854	}
4855	// These instructions can't be paired based on their opcodes.
4856	return false;
4857	}
4858
4859	static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4860	int64_t Offset1, unsigned Opcode1, int FI2,
4861	int64_t Offset2, unsigned Opcode2) {
4862	// Accesses through fixed stack object frame indices may access a different
4863	// fixed stack slot. Check that the object offsets + offsets match.
4864	if (MFI.isFixedObjectIndex(ObjectIdx: FI1) && MFI.isFixedObjectIndex(ObjectIdx: FI2)) {
4865	int64_t ObjectOffset1 = MFI.getObjectOffset(ObjectIdx: FI1);
4866	int64_t ObjectOffset2 = MFI.getObjectOffset(ObjectIdx: FI2);
4867	assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4868	// Convert to scaled object offsets.
4869	int Scale1 = AArch64InstrInfo::getMemScale(Opc: Opcode1);
4870	if (ObjectOffset1 % Scale1 != `0`)
4871	return false;
4872	ObjectOffset1 /= Scale1;
4873	int Scale2 = AArch64InstrInfo::getMemScale(Opc: Opcode2);
4874	if (ObjectOffset2 % Scale2 != `0`)
4875	return false;
4876	ObjectOffset2 /= Scale2;
4877	ObjectOffset1 += Offset1;
4878	ObjectOffset2 += Offset2;
4879	return ObjectOffset1 + `1` == ObjectOffset2;
4880	}
4881
4882	return FI1 == FI2;
4883	}
4884
4885	/// Detect opportunities for ldp/stp formation.
4886	///
4887	/// Only called for LdSt for which getMemOperandWithOffset returns true.
4888	bool AArch64InstrInfo::shouldClusterMemOps(
4889	ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4890	bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4891	int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4892	unsigned NumBytes) const {
4893	assert(BaseOps1.size() == `1` && BaseOps2.size() == `1`);
4894	const MachineOperand &BaseOp1 = *BaseOps1.front();
4895	const MachineOperand &BaseOp2 = *BaseOps2.front();
4896	const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4897	const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4898	if (BaseOp1.getType() != BaseOp2.getType())
4899	return false;
4900
4901	assert((BaseOp1.isReg() \|\| BaseOp1.isFI()) &&
4902	"Only base registers and frame indices are supported.");
4903
4904	// Check for both base regs and base FI.
4905	if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4906	return false;
4907
4908	// Only cluster up to a single pair.
4909	if (ClusterSize > `2`)
4910	return false;
4911
4912	if (!isPairableLdStInst(MI: FirstLdSt) \|\| !isPairableLdStInst(MI: SecondLdSt))
4913	return false;
4914
4915	// Can we pair these instructions based on their opcodes?
4916	unsigned FirstOpc = FirstLdSt.getOpcode();
4917	unsigned SecondOpc = SecondLdSt.getOpcode();
4918	if (!canPairLdStOpc(FirstOpc, SecondOpc))
4919	return false;
4920
4921	// Can't merge volatiles or load/stores that have a hint to avoid pair
4922	// formation, for example.
4923	if (!isCandidateToMergeOrPair(MI: FirstLdSt) \|\|
4924	!isCandidateToMergeOrPair(MI: SecondLdSt))
4925	return false;
4926
4927	// isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4928	int64_t Offset1 = FirstLdSt.getOperand(i: `2`).getImm();
4929	if (hasUnscaledLdStOffset(Opc: FirstOpc) && !scaleOffset(Opc: FirstOpc, Offset&: Offset1))
4930	return false;
4931
4932	int64_t Offset2 = SecondLdSt.getOperand(i: `2`).getImm();
4933	if (hasUnscaledLdStOffset(Opc: SecondOpc) && !scaleOffset(Opc: SecondOpc, Offset&: Offset2))
4934	return false;
4935
4936	// Pairwise instructions have a 7-bit signed offset field.
4937	if (Offset1 > `63` \|\| Offset1 < -`64`)
4938	return false;
4939
4940	// The caller should already have ordered First/SecondLdSt by offset.
4941	// Note: except for non-equal frame index bases
4942	if (BaseOp1.isFI()) {
4943	assert((!BaseOp1.isIdenticalTo(BaseOp2) \|\| Offset1 <= Offset2) &&
4944	"Caller should have ordered offsets.");
4945
4946	const MachineFrameInfo &MFI =
4947	FirstLdSt.getParent()->getParent()->getFrameInfo();
4948	return shouldClusterFI(MFI, FI1: BaseOp1.getIndex(), Offset1, Opcode1: FirstOpc,
4949	FI2: BaseOp2.getIndex(), Offset2, Opcode2: SecondOpc);
4950	}
4951
4952	assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4953
4954	return Offset1 + `1` == Offset2;
4955	}
4956
4957	static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
4958	MCRegister Reg, unsigned SubIdx,
4959	unsigned State,
4960	const TargetRegisterInfo *TRI) {
4961	if (!SubIdx)
4962	return MIB.addReg(RegNo: Reg, flags: State);
4963
4964	if (Reg.isPhysical())
4965	return MIB.addReg(RegNo: TRI->getSubReg(Reg, Idx: SubIdx), flags: State);
4966	return MIB.addReg(RegNo: Reg, flags: State, SubReg: SubIdx);
4967	}
4968
4969	static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
4970	unsigned NumRegs) {
4971	// We really want the positive remainder mod 32 here, that happens to be
4972	// easily obtainable with a mask.
4973	return ((DestReg - SrcReg) & `0x1f`) < NumRegs;
4974	}
4975
4976	void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
4977	MachineBasicBlock::iterator I,
4978	const DebugLoc &DL, MCRegister DestReg,
4979	MCRegister SrcReg, bool KillSrc,
4980	unsigned Opcode,
4981	ArrayRef<unsigned> Indices) const {
4982	assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
4983	const TargetRegisterInfo *TRI = &getRegisterInfo();
4984	uint16_t DestEncoding = TRI->getEncodingValue(Reg: DestReg);
4985	uint16_t SrcEncoding = TRI->getEncodingValue(Reg: SrcReg);
4986	unsigned NumRegs = Indices.size();
4987
4988	int SubReg = `0`, End = NumRegs, Incr = `1`;
4989	if (forwardCopyWillClobberTuple(DestReg: DestEncoding, SrcReg: SrcEncoding, NumRegs)) {
4990	SubReg = NumRegs - `1`;
4991	End = -`1`;
4992	Incr = -`1`;
4993	}
4994
4995	for (; SubReg != End; SubReg += Incr) {
4996	const MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode));
4997	AddSubReg(MIB, Reg: DestReg, SubIdx: Indices [SubReg], State: RegState::Define, TRI);
4998	AddSubReg(MIB, Reg: SrcReg, SubIdx: Indices [SubReg], State: `0`, TRI);
4999	AddSubReg(MIB, Reg: SrcReg, SubIdx: Indices [SubReg], State: getKillRegState(B: KillSrc), TRI);
5000	}
5001	}
5002
5003	void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
5004	MachineBasicBlock::iterator I,
5005	const DebugLoc &DL, MCRegister DestReg,
5006	MCRegister SrcReg, bool KillSrc,
5007	unsigned Opcode, unsigned ZeroReg,
5008	llvm::ArrayRef<unsigned> Indices) const {
5009	const TargetRegisterInfo *TRI = &getRegisterInfo();
5010	unsigned NumRegs = Indices.size();
5011
5012	#ifndef NDEBUG
5013	uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5014	uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5015	assert(DestEncoding % NumRegs == `0` && SrcEncoding % NumRegs == `0` &&
5016	"GPR reg sequences should not be able to overlap");
5017	#endif
5018
5019	for (unsigned SubReg = `0`; SubReg != NumRegs; ++SubReg) {
5020	const MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode));
5021	AddSubReg(MIB, Reg: DestReg, SubIdx: Indices [SubReg], State: RegState::Define, TRI);
5022	MIB.addReg(RegNo: ZeroReg);
5023	AddSubReg(MIB, Reg: SrcReg, SubIdx: Indices [SubReg], State: getKillRegState(B: KillSrc), TRI);
5024	MIB.addImm(Val: `0`);
5025	}
5026	}
5027
5028	void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
5029	MachineBasicBlock::iterator I,
5030	const DebugLoc &DL, Register DestReg,
5031	Register SrcReg, bool KillSrc,
5032	bool RenamableDest,
5033	bool RenamableSrc) const {
5034	if (AArch64::GPR32spRegClass.contains(Reg: DestReg) &&
5035	(AArch64::GPR32spRegClass.contains(Reg: SrcReg) \|\| SrcReg == AArch64::WZR)) {
5036	const TargetRegisterInfo *TRI = &getRegisterInfo();
5037
5038	if (DestReg == AArch64::WSP \|\| SrcReg == AArch64::WSP) {
5039	// If either operand is WSP, expand to ADD #0.
5040	if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5041	!Subtarget.hasZeroCycleRegMoveGPR32()) {
5042	// Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
5043	MCRegister DestRegX = TRI->getMatchingSuperReg(
5044	Reg: DestReg, SubIdx: AArch64::sub_32, RC: &AArch64::GPR64spRegClass);
5045	MCRegister SrcRegX = TRI->getMatchingSuperReg(
5046	Reg: SrcReg, SubIdx: AArch64::sub_32, RC: &AArch64::GPR64spRegClass);
5047	// This instruction is reading and writing X registers. This may upset
5048	// the register scavenger and machine verifier, so we need to indicate
5049	// that we are reading an undefined value from SrcRegX, but a proper
5050	// value from SrcReg.
5051	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ADDXri), DestReg: DestRegX)
5052	.addReg(RegNo: SrcRegX, flags: RegState::Undef)
5053	.addImm(Val: `0`)
5054	.addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: `0`))
5055	.addReg(RegNo: SrcReg, flags: RegState::Implicit \| getKillRegState(B: KillSrc));
5056	} else {
5057	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ADDWri), DestReg)
5058	.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc))
5059	.addImm(Val: `0`)
5060	.addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: `0`));
5061	}
5062	} else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
5063	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::MOVZWi), DestReg)
5064	.addImm(Val: `0`)
5065	.addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: `0`));
5066	} else {
5067	if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5068	!Subtarget.hasZeroCycleRegMoveGPR32()) {
5069	// Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
5070	MCRegister DestRegX = TRI->getMatchingSuperReg(
5071	Reg: DestReg, SubIdx: AArch64::sub_32, RC: &AArch64::GPR64spRegClass);
5072	MCRegister SrcRegX = TRI->getMatchingSuperReg(
5073	Reg: SrcReg, SubIdx: AArch64::sub_32, RC: &AArch64::GPR64spRegClass);
5074	// This instruction is reading and writing X registers. This may upset
5075	// the register scavenger and machine verifier, so we need to indicate
5076	// that we are reading an undefined value from SrcRegX, but a proper
5077	// value from SrcReg.
5078	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ORRXrr), DestReg: DestRegX)
5079	.addReg(RegNo: AArch64::XZR)
5080	.addReg(RegNo: SrcRegX, flags: RegState::Undef)
5081	.addReg(RegNo: SrcReg, flags: RegState::Implicit \| getKillRegState(B: KillSrc));
5082	} else {
5083	// Otherwise, expand to ORR WZR.
5084	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ORRWrr), DestReg)
5085	.addReg(RegNo: AArch64::WZR)
5086	.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5087	}
5088	}
5089	return;
5090	}
5091
5092	// Copy a Predicate register by ORRing with itself.
5093	if (AArch64::PPRRegClass.contains(Reg: DestReg) &&
5094	AArch64::PPRRegClass.contains(Reg: SrcReg)) {
5095	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5096	"Unexpected SVE register.");
5097	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ORR_PPzPP), DestReg)
5098	.addReg(RegNo: SrcReg) // Pg
5099	.addReg(RegNo: SrcReg)
5100	.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5101	return;
5102	}
5103
5104	// Copy a predicate-as-counter register by ORRing with itself as if it
5105	// were a regular predicate (mask) register.
5106	bool DestIsPNR = AArch64::PNRRegClass.contains(Reg: DestReg);
5107	bool SrcIsPNR = AArch64::PNRRegClass.contains(Reg: SrcReg);
5108	if (DestIsPNR \|\| SrcIsPNR) {
5109	auto ToPPR = [](MCRegister R) -> MCRegister {
5110	return (R - AArch64::PN0) + AArch64::P0;
5111	};
5112	MCRegister PPRSrcReg = SrcIsPNR ? ToPPR (SrcReg) : SrcReg.asMCReg();
5113	MCRegister PPRDestReg = DestIsPNR ? ToPPR (DestReg) : DestReg.asMCReg();
5114
5115	if (PPRSrcReg != PPRDestReg) {
5116	auto NewMI = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ORR_PPzPP), DestReg: PPRDestReg)
5117	.addReg(RegNo: PPRSrcReg) // Pg
5118	.addReg(RegNo: PPRSrcReg)
5119	.addReg(RegNo: PPRSrcReg, flags: getKillRegState(B: KillSrc));
5120	if (DestIsPNR)
5121	NewMI.addDef(RegNo: DestReg, Flags: RegState::Implicit);
5122	}
5123	return;
5124	}
5125
5126	// Copy a Z register by ORRing with itself.
5127	if (AArch64::ZPRRegClass.contains(Reg: DestReg) &&
5128	AArch64::ZPRRegClass.contains(Reg: SrcReg)) {
5129	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5130	"Unexpected SVE register.");
5131	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ORR_ZZZ), DestReg)
5132	.addReg(RegNo: SrcReg)
5133	.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5134	return;
5135	}
5136
5137	// Copy a Z register pair by copying the individual sub-registers.
5138	if ((AArch64::ZPR2RegClass.contains(Reg: DestReg) \|\|
5139	AArch64::ZPR2StridedOrContiguousRegClass.contains(Reg: DestReg)) &&
5140	(AArch64::ZPR2RegClass.contains(Reg: SrcReg) \|\|
5141	AArch64::ZPR2StridedOrContiguousRegClass.contains(Reg: SrcReg))) {
5142	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5143	"Unexpected SVE register.");
5144	static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
5145	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORR_ZZZ,
5146	Indices);
5147	return;
5148	}
5149
5150	// Copy a Z register triple by copying the individual sub-registers.
5151	if (AArch64::ZPR3RegClass.contains(Reg: DestReg) &&
5152	AArch64::ZPR3RegClass.contains(Reg: SrcReg)) {
5153	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5154	"Unexpected SVE register.");
5155	static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5156	AArch64::zsub2};
5157	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORR_ZZZ,
5158	Indices);
5159	return;
5160	}
5161
5162	// Copy a Z register quad by copying the individual sub-registers.
5163	if ((AArch64::ZPR4RegClass.contains(Reg: DestReg) \|\|
5164	AArch64::ZPR4StridedOrContiguousRegClass.contains(Reg: DestReg)) &&
5165	(AArch64::ZPR4RegClass.contains(Reg: SrcReg) \|\|
5166	AArch64::ZPR4StridedOrContiguousRegClass.contains(Reg: SrcReg))) {
5167	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5168	"Unexpected SVE register.");
5169	static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5170	AArch64::zsub2, AArch64::zsub3};
5171	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORR_ZZZ,
5172	Indices);
5173	return;
5174	}
5175
5176	if (AArch64::GPR64spRegClass.contains(Reg: DestReg) &&
5177	(AArch64::GPR64spRegClass.contains(Reg: SrcReg) \|\| SrcReg == AArch64::XZR)) {
5178	if (DestReg == AArch64::SP \|\| SrcReg == AArch64::SP) {
5179	// If either operand is SP, expand to ADD #0.
5180	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ADDXri), DestReg)
5181	.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc))
5182	.addImm(Val: `0`)
5183	.addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: `0`));
5184	} else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
5185	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::MOVZXi), DestReg)
5186	.addImm(Val: `0`)
5187	.addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: `0`));
5188	} else {
5189	// Otherwise, expand to ORR XZR.
5190	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ORRXrr), DestReg)
5191	.addReg(RegNo: AArch64::XZR)
5192	.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5193	}
5194	return;
5195	}
5196
5197	// Copy a DDDD register quad by copying the individual sub-registers.
5198	if (AArch64::DDDDRegClass.contains(Reg: DestReg) &&
5199	AArch64::DDDDRegClass.contains(Reg: SrcReg)) {
5200	static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5201	AArch64::dsub2, AArch64::dsub3};
5202	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORRv8i8,
5203	Indices);
5204	return;
5205	}
5206
5207	// Copy a DDD register triple by copying the individual sub-registers.
5208	if (AArch64::DDDRegClass.contains(Reg: DestReg) &&
5209	AArch64::DDDRegClass.contains(Reg: SrcReg)) {
5210	static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5211	AArch64::dsub2};
5212	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORRv8i8,
5213	Indices);
5214	return;
5215	}
5216
5217	// Copy a DD register pair by copying the individual sub-registers.
5218	if (AArch64::DDRegClass.contains(Reg: DestReg) &&
5219	AArch64::DDRegClass.contains(Reg: SrcReg)) {
5220	static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
5221	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORRv8i8,
5222	Indices);
5223	return;
5224	}
5225
5226	// Copy a QQQQ register quad by copying the individual sub-registers.
5227	if (AArch64::QQQQRegClass.contains(Reg: DestReg) &&
5228	AArch64::QQQQRegClass.contains(Reg: SrcReg)) {
5229	static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5230	AArch64::qsub2, AArch64::qsub3};
5231	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORRv16i8,
5232	Indices);
5233	return;
5234	}
5235
5236	// Copy a QQQ register triple by copying the individual sub-registers.
5237	if (AArch64::QQQRegClass.contains(Reg: DestReg) &&
5238	AArch64::QQQRegClass.contains(Reg: SrcReg)) {
5239	static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5240	AArch64::qsub2};
5241	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORRv16i8,
5242	Indices);
5243	return;
5244	}
5245
5246	// Copy a QQ register pair by copying the individual sub-registers.
5247	if (AArch64::QQRegClass.contains(Reg: DestReg) &&
5248	AArch64::QQRegClass.contains(Reg: SrcReg)) {
5249	static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
5250	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORRv16i8,
5251	Indices);
5252	return;
5253	}
5254
5255	if (AArch64::XSeqPairsClassRegClass.contains(Reg: DestReg) &&
5256	AArch64::XSeqPairsClassRegClass.contains(Reg: SrcReg)) {
5257	static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
5258	copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORRXrs,
5259	ZeroReg: AArch64::XZR, Indices);
5260	return;
5261	}
5262
5263	if (AArch64::WSeqPairsClassRegClass.contains(Reg: DestReg) &&
5264	AArch64::WSeqPairsClassRegClass.contains(Reg: SrcReg)) {
5265	static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
5266	copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, Opcode: AArch64::ORRWrs,
5267	ZeroReg: AArch64::WZR, Indices);
5268	return;
5269	}
5270
5271	if (AArch64::FPR128RegClass.contains(Reg: DestReg) &&
5272	AArch64::FPR128RegClass.contains(Reg: SrcReg)) {
5273	if (Subtarget.isSVEorStreamingSVEAvailable() &&
5274	!Subtarget.isNeonAvailable())
5275	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ORR_ZZZ))
5276	.addReg(RegNo: AArch64::Z0 + (DestReg - AArch64::Q0), flags: RegState::Define)
5277	.addReg(RegNo: AArch64::Z0 + (SrcReg - AArch64::Q0))
5278	.addReg(RegNo: AArch64::Z0 + (SrcReg - AArch64::Q0));
5279	else if (Subtarget.isNeonAvailable())
5280	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::ORRv16i8), DestReg)
5281	.addReg(RegNo: SrcReg)
5282	.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5283	else {
5284	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::STRQpre))
5285	.addReg(RegNo: AArch64::SP, flags: RegState::Define)
5286	.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc))
5287	.addReg(RegNo: AArch64::SP)
5288	.addImm(Val: -`16`);
5289	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::LDRQpost))
5290	.addReg(RegNo: AArch64::SP, flags: RegState::Define)
5291	.addReg(RegNo: DestReg, flags: RegState::Define)
5292	.addReg(RegNo: AArch64::SP)
5293	.addImm(Val: `16`);
5294	}
5295	return;
5296	}
5297
5298	if (AArch64::FPR64RegClass.contains(Reg: DestReg) &&
5299	AArch64::FPR64RegClass.contains(Reg: SrcReg)) {
5300	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVDr), DestReg)
5301	.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5302	return;
5303	}
5304
5305	if (AArch64::FPR32RegClass.contains(Reg: DestReg) &&
5306	AArch64::FPR32RegClass.contains(Reg: SrcReg)) {
5307	if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5308	!Subtarget.hasZeroCycleRegMoveFPR32()) {
5309	const TargetRegisterInfo *TRI = &getRegisterInfo();
5310	MCRegister DestRegD = TRI->getMatchingSuperReg(Reg: DestReg, SubIdx: AArch64::ssub,
5311	RC: &AArch64::FPR64RegClass);
5312	MCRegister SrcRegD = TRI->getMatchingSuperReg(Reg: SrcReg, SubIdx: AArch64::ssub,
5313	RC: &AArch64::FPR64RegClass);
5314	// This instruction is reading and writing D registers. This may upset
5315	// the register scavenger and machine verifier, so we need to indicate
5316	// that we are reading an undefined value from SrcRegD, but a proper
5317	// value from SrcReg.
5318	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVDr), DestReg: DestRegD)
5319	.addReg(RegNo: SrcRegD, flags: RegState::Undef)
5320	.addReg(RegNo: SrcReg, flags: RegState::Implicit \| getKillRegState(B: KillSrc));
5321	} else {
5322	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVSr), DestReg)
5323	.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5324	}
5325	return;
5326	}
5327
5328	if (AArch64::FPR16RegClass.contains(Reg: DestReg) &&
5329	AArch64::FPR16RegClass.contains(Reg: SrcReg)) {
5330	if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5331	!Subtarget.hasZeroCycleRegMoveFPR32()) {
5332	const TargetRegisterInfo *TRI = &getRegisterInfo();
5333	MCRegister DestRegD = TRI->getMatchingSuperReg(Reg: DestReg, SubIdx: AArch64::hsub,
5334	RC: &AArch64::FPR64RegClass);
5335	MCRegister SrcRegD = TRI->getMatchingSuperReg(Reg: SrcReg, SubIdx: AArch64::hsub,
5336	RC: &AArch64::FPR64RegClass);
5337	// This instruction is reading and writing D registers. This may upset
5338	// the register scavenger and machine verifier, so we need to indicate
5339	// that we are reading an undefined value from SrcRegD, but a proper
5340	// value from SrcReg.
5341	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVDr), DestReg: DestRegD)
5342	.addReg(RegNo: SrcRegD, flags: RegState::Undef)
5343	.addReg(RegNo: SrcReg, flags: RegState::Implicit \| getKillRegState(B: KillSrc));
5344	} else {
5345	DestReg = RI.getMatchingSuperReg(Reg: DestReg, SubIdx: AArch64::hsub,
5346	RC: &AArch64::FPR32RegClass);
5347	SrcReg = RI.getMatchingSuperReg(Reg: SrcReg, SubIdx: AArch64::hsub,
5348	RC: &AArch64::FPR32RegClass);
5349	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVSr), DestReg)
5350	.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5351	}
5352	return;
5353	}
5354
5355	if (AArch64::FPR8RegClass.contains(Reg: DestReg) &&
5356	AArch64::FPR8RegClass.contains(Reg: SrcReg)) {
5357	if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5358	!Subtarget.hasZeroCycleRegMoveFPR32()) {
5359	const TargetRegisterInfo *TRI = &getRegisterInfo();
5360	MCRegister DestRegD = TRI->getMatchingSuperReg(Reg: DestReg, SubIdx: AArch64::bsub,
5361	RC: &AArch64::FPR64RegClass);
5362	MCRegister SrcRegD = TRI->getMatchingSuperReg(Reg: SrcReg, SubIdx: AArch64::bsub,
5363	RC: &AArch64::FPR64RegClass);
5364	// This instruction is reading and writing D registers. This may upset
5365	// the register scavenger and machine verifier, so we need to indicate
5366	// that we are reading an undefined value from SrcRegD, but a proper
5367	// value from SrcReg.
5368	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVDr), DestReg: DestRegD)
5369	.addReg(RegNo: SrcRegD, flags: RegState::Undef)
5370	.addReg(RegNo: SrcReg, flags: RegState::Implicit \| getKillRegState(B: KillSrc));
5371	} else {
5372	DestReg = RI.getMatchingSuperReg(Reg: DestReg, SubIdx: AArch64::bsub,
5373	RC: &AArch64::FPR32RegClass);
5374	SrcReg = RI.getMatchingSuperReg(Reg: SrcReg, SubIdx: AArch64::bsub,
5375	RC: &AArch64::FPR32RegClass);
5376	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVSr), DestReg)
5377	.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5378	}
5379	return;
5380	}
5381
5382	// Copies between GPR64 and FPR64.
5383	if (AArch64::FPR64RegClass.contains(Reg: DestReg) &&
5384	AArch64::GPR64RegClass.contains(Reg: SrcReg)) {
5385	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVXDr), DestReg)
5386	.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5387	return;
5388	}
5389	if (AArch64::GPR64RegClass.contains(Reg: DestReg) &&
5390	AArch64::FPR64RegClass.contains(Reg: SrcReg)) {
5391	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVDXr), DestReg)
5392	.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5393	return;
5394	}
5395	// Copies between GPR32 and FPR32.
5396	if (AArch64::FPR32RegClass.contains(Reg: DestReg) &&
5397	AArch64::GPR32RegClass.contains(Reg: SrcReg)) {
5398	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVWSr), DestReg)
5399	.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5400	return;
5401	}
5402	if (AArch64::GPR32RegClass.contains(Reg: DestReg) &&
5403	AArch64::FPR32RegClass.contains(Reg: SrcReg)) {
5404	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::FMOVSWr), DestReg)
5405	.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
5406	return;
5407	}
5408
5409	if (DestReg == AArch64::NZCV) {
5410	assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
5411	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::MSR))
5412	.addImm(Val: AArch64SysReg::NZCV)
5413	.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc))
5414	.addReg(RegNo: AArch64::NZCV, flags: RegState::Implicit \| RegState::Define);
5415	return;
5416	}
5417
5418	if (SrcReg == AArch64::NZCV) {
5419	assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
5420	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: AArch64::MRS), DestReg)
5421	.addImm(Val: AArch64SysReg::NZCV)
5422	.addReg(RegNo: AArch64::NZCV, flags: RegState::Implicit \| getKillRegState(B: KillSrc));
5423	return;
5424	}
5425
5426	#ifndef NDEBUG
5427	const TargetRegisterInfo &TRI = getRegisterInfo();
5428	errs() << TRI.getRegAsmName(DestReg) << " = COPY "
5429	<< TRI.getRegAsmName(SrcReg) << "\n";
5430	#endif
5431	llvm_unreachable("unimplemented reg-to-reg copy");
5432	}
5433
5434	static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
5435	MachineBasicBlock &MBB,
5436	MachineBasicBlock::iterator InsertBefore,
5437	const MCInstrDesc &MCID,
5438	Register SrcReg, bool IsKill,
5439	unsigned SubIdx0, unsigned SubIdx1, int FI,
5440	MachineMemOperand *MMO) {
5441	Register SrcReg0 = SrcReg;
5442	Register SrcReg1 = SrcReg;
5443	if (SrcReg.isPhysical()) {
5444	SrcReg0 = TRI.getSubReg(Reg: SrcReg, Idx: SubIdx0);
5445	SubIdx0 = `0`;
5446	SrcReg1 = TRI.getSubReg(Reg: SrcReg, Idx: SubIdx1);
5447	SubIdx1 = `0`;
5448	}
5449	BuildMI(BB&: MBB, I: InsertBefore, MIMD: DebugLoc (), MCID)
5450	.addReg(RegNo: SrcReg0, flags: getKillRegState(B: IsKill), SubReg: SubIdx0)
5451	.addReg(RegNo: SrcReg1, flags: getKillRegState(B: IsKill), SubReg: SubIdx1)
5452	.addFrameIndex(Idx: FI)
5453	.addImm(Val: `0`)
5454	.addMemOperand(MMO);
5455	}
5456
5457	void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
5458	MachineBasicBlock::iterator MBBI,
5459	Register SrcReg, bool isKill, int FI,
5460	const TargetRegisterClass *RC,
5461	const TargetRegisterInfo *TRI,
5462	Register VReg,
5463	MachineInstr::MIFlag Flags) const {
5464	MachineFunction &MF = *MBB.getParent();
5465	MachineFrameInfo &MFI = MF.getFrameInfo();
5466
5467	MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
5468	MachineMemOperand *MMO =
5469	MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOStore,
5470	Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI));
5471	unsigned Opc = `0`;
5472	bool Offset = true;
5473	MCRegister PNRReg = MCRegister::NoRegister;
5474	unsigned StackID = TargetStackID::Default;
5475	switch (TRI->getSpillSize(RC: *RC)) {
5476	case `1`:
5477	if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5478	Opc = AArch64::STRBui;
5479	break;
5480	case `2`: {
5481	if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5482	Opc = AArch64::STRHui;
5483	else if (AArch64::PNRRegClass.hasSubClassEq(RC) \|\|
5484	AArch64::PPRRegClass.hasSubClassEq(RC)) {
5485	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5486	"Unexpected register store without SVE store instructions");
5487	Opc = AArch64::STR_PXI;
5488	StackID = TargetStackID::ScalableVector;
5489	}
5490	break;
5491	}
5492	case `4`:
5493	if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5494	Opc = AArch64::STRWui;
5495	if (SrcReg.isVirtual())
5496	MF.getRegInfo().constrainRegClass(Reg: SrcReg, RC: &AArch64::GPR32RegClass);
5497	else
5498	assert(SrcReg != AArch64::WSP);
5499	} else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5500	Opc = AArch64::STRSui;
5501	else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5502	Opc = AArch64::STR_PPXI;
5503	StackID = TargetStackID::ScalableVector;
5504	}
5505	break;
5506	case `8`:
5507	if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5508	Opc = AArch64::STRXui;
5509	if (SrcReg.isVirtual())
5510	MF.getRegInfo().constrainRegClass(Reg: SrcReg, RC: &AArch64::GPR64RegClass);
5511	else
5512	assert(SrcReg != AArch64::SP);
5513	} else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5514	Opc = AArch64::STRDui;
5515	} else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5516	storeRegPairToStackSlot(TRI: getRegisterInfo(), MBB, InsertBefore: MBBI,
5517	MCID: get(Opcode: AArch64::STPWi), SrcReg, IsKill: isKill,
5518	SubIdx0: AArch64::sube32, SubIdx1: AArch64::subo32, FI, MMO);
5519	return;
5520	}
5521	break;
5522	case `16`:
5523	if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5524	Opc = AArch64::STRQui;
5525	else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5526	assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5527	Opc = AArch64::ST1Twov1d;
5528	Offset = false;
5529	} else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5530	storeRegPairToStackSlot(TRI: getRegisterInfo(), MBB, InsertBefore: MBBI,
5531	MCID: get(Opcode: AArch64::STPXi), SrcReg, IsKill: isKill,
5532	SubIdx0: AArch64::sube64, SubIdx1: AArch64::subo64, FI, MMO);
5533	return;
5534	} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5535	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5536	"Unexpected register store without SVE store instructions");
5537	Opc = AArch64::STR_ZXI;
5538	StackID = TargetStackID::ScalableVector;
5539	} else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
5540	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5541	"Unexpected predicate store without SVE store instructions");
5542	Opc = AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO;
5543	StackID = TargetStackID::ScalableVector;
5544	}
5545	break;
5546	case `24`:
5547	if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5548	assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5549	Opc = AArch64::ST1Threev1d;
5550	Offset = false;
5551	}
5552	break;
5553	case `32`:
5554	if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5555	assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5556	Opc = AArch64::ST1Fourv1d;
5557	Offset = false;
5558	} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5559	assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5560	Opc = AArch64::ST1Twov2d;
5561	Offset = false;
5562	} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) \|\|
5563	AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5564	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5565	"Unexpected register store without SVE store instructions");
5566	Opc = AArch64::STR_ZZXI;
5567	StackID = TargetStackID::ScalableVector;
5568	}
5569	break;
5570	case `48`:
5571	if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5572	assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5573	Opc = AArch64::ST1Threev2d;
5574	Offset = false;
5575	} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5576	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5577	"Unexpected register store without SVE store instructions");
5578	Opc = AArch64::STR_ZZZXI;
5579	StackID = TargetStackID::ScalableVector;
5580	}
5581	break;
5582	case `64`:
5583	if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5584	assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5585	Opc = AArch64::ST1Fourv2d;
5586	Offset = false;
5587	} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) \|\|
5588	AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5589	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5590	"Unexpected register store without SVE store instructions");
5591	Opc = AArch64::STR_ZZZZXI;
5592	StackID = TargetStackID::ScalableVector;
5593	}
5594	break;
5595	}
5596	assert(Opc && "Unknown register class");
5597	MFI.setStackID(ObjectIdx: FI, ID: StackID);
5598
5599	const MachineInstrBuilder MI = BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc (), MCID: get(Opcode: Opc))
5600	.addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill))
5601	.addFrameIndex(Idx: FI);
5602
5603	if (Offset)
5604	MI.addImm(Val: `0`);
5605	if (PNRReg.isValid())
5606	MI.addDef(RegNo: PNRReg, Flags: RegState::Implicit);
5607	MI.addMemOperand(MMO);
5608	}
5609
5610	static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
5611	MachineBasicBlock &MBB,
5612	MachineBasicBlock::iterator InsertBefore,
5613	const MCInstrDesc &MCID,
5614	Register DestReg, unsigned SubIdx0,
5615	unsigned SubIdx1, int FI,
5616	MachineMemOperand *MMO) {
5617	Register DestReg0 = DestReg;
5618	Register DestReg1 = DestReg;
5619	bool IsUndef = true;
5620	if (DestReg.isPhysical()) {
5621	DestReg0 = TRI.getSubReg(Reg: DestReg, Idx: SubIdx0);
5622	SubIdx0 = `0`;
5623	DestReg1 = TRI.getSubReg(Reg: DestReg, Idx: SubIdx1);
5624	SubIdx1 = `0`;
5625	IsUndef = false;
5626	}
5627	BuildMI(BB&: MBB, I: InsertBefore, MIMD: DebugLoc (), MCID)
5628	.addReg(RegNo: DestReg0, flags: RegState::Define \| getUndefRegState(B: IsUndef), SubReg: SubIdx0)
5629	.addReg(RegNo: DestReg1, flags: RegState::Define \| getUndefRegState(B: IsUndef), SubReg: SubIdx1)
5630	.addFrameIndex(Idx: FI)
5631	.addImm(Val: `0`)
5632	.addMemOperand(MMO);
5633	}
5634
5635	void AArch64InstrInfo::loadRegFromStackSlot(
5636	MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg,
5637	int FI, const TargetRegisterClass RC, const* TargetRegisterInfo *TRI,
5638	Register VReg, MachineInstr::MIFlag Flags) const {
5639	MachineFunction &MF = *MBB.getParent();
5640	MachineFrameInfo &MFI = MF.getFrameInfo();
5641	MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
5642	MachineMemOperand *MMO =
5643	MF.getMachineMemOperand(PtrInfo, F: MachineMemOperand::MOLoad,
5644	Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI));
5645
5646	unsigned Opc = `0`;
5647	bool Offset = true;
5648	unsigned StackID = TargetStackID::Default;
5649	Register PNRReg = MCRegister::NoRegister;
5650	switch (TRI->getSpillSize(RC: *RC)) {
5651	case `1`:
5652	if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5653	Opc = AArch64::LDRBui;
5654	break;
5655	case `2`: {
5656	bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
5657	if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5658	Opc = AArch64::LDRHui;
5659	else if (IsPNR \|\| AArch64::PPRRegClass.hasSubClassEq(RC)) {
5660	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5661	"Unexpected register load without SVE load instructions");
5662	if (IsPNR)
5663	PNRReg = DestReg;
5664	Opc = AArch64::LDR_PXI;
5665	StackID = TargetStackID::ScalableVector;
5666	}
5667	break;
5668	}
5669	case `4`:
5670	if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5671	Opc = AArch64::LDRWui;
5672	if (DestReg.isVirtual())
5673	MF.getRegInfo().constrainRegClass(Reg: DestReg, RC: &AArch64::GPR32RegClass);
5674	else
5675	assert(DestReg != AArch64::WSP);
5676	} else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5677	Opc = AArch64::LDRSui;
5678	else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5679	Opc = AArch64::LDR_PPXI;
5680	StackID = TargetStackID::ScalableVector;
5681	}
5682	break;
5683	case `8`:
5684	if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5685	Opc = AArch64::LDRXui;
5686	if (DestReg.isVirtual())
5687	MF.getRegInfo().constrainRegClass(Reg: DestReg, RC: &AArch64::GPR64RegClass);
5688	else
5689	assert(DestReg != AArch64::SP);
5690	} else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5691	Opc = AArch64::LDRDui;
5692	} else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5693	loadRegPairFromStackSlot(TRI: getRegisterInfo(), MBB, InsertBefore: MBBI,
5694	MCID: get(Opcode: AArch64::LDPWi), DestReg, SubIdx0: AArch64::sube32,
5695	SubIdx1: AArch64::subo32, FI, MMO);
5696	return;
5697	}
5698	break;
5699	case `16`:
5700	if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5701	Opc = AArch64::LDRQui;
5702	else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5703	assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5704	Opc = AArch64::LD1Twov1d;
5705	Offset = false;
5706	} else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5707	loadRegPairFromStackSlot(TRI: getRegisterInfo(), MBB, InsertBefore: MBBI,
5708	MCID: get(Opcode: AArch64::LDPXi), DestReg, SubIdx0: AArch64::sube64,
5709	SubIdx1: AArch64::subo64, FI, MMO);
5710	return;
5711	} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5712	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5713	"Unexpected register load without SVE load instructions");
5714	Opc = AArch64::LDR_ZXI;
5715	StackID = TargetStackID::ScalableVector;
5716	} else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
5717	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5718	"Unexpected predicate load without SVE load instructions");
5719	Opc = AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO;
5720	StackID = TargetStackID::ScalableVector;
5721	}
5722	break;
5723	case `24`:
5724	if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5725	assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5726	Opc = AArch64::LD1Threev1d;
5727	Offset = false;
5728	}
5729	break;
5730	case `32`:
5731	if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5732	assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5733	Opc = AArch64::LD1Fourv1d;
5734	Offset = false;
5735	} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5736	assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5737	Opc = AArch64::LD1Twov2d;
5738	Offset = false;
5739	} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) \|\|
5740	AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5741	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5742	"Unexpected register load without SVE load instructions");
5743	Opc = AArch64::LDR_ZZXI;
5744	StackID = TargetStackID::ScalableVector;
5745	}
5746	break;
5747	case `48`:
5748	if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5749	assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5750	Opc = AArch64::LD1Threev2d;
5751	Offset = false;
5752	} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5753	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5754	"Unexpected register load without SVE load instructions");
5755	Opc = AArch64::LDR_ZZZXI;
5756	StackID = TargetStackID::ScalableVector;
5757	}
5758	break;
5759	case `64`:
5760	if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5761	assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5762	Opc = AArch64::LD1Fourv2d;
5763	Offset = false;
5764	} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) \|\|
5765	AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5766	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5767	"Unexpected register load without SVE load instructions");
5768	Opc = AArch64::LDR_ZZZZXI;
5769	StackID = TargetStackID::ScalableVector;
5770	}
5771	break;
5772	}
5773
5774	assert(Opc && "Unknown register class");
5775	MFI.setStackID(ObjectIdx: FI, ID: StackID);
5776
5777	const MachineInstrBuilder MI = BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc (), MCID: get(Opcode: Opc))
5778	.addReg(RegNo: DestReg, flags: getDefRegState(B: true))
5779	.addFrameIndex(Idx: FI);
5780	if (Offset)
5781	MI.addImm(Val: `0`);
5782	if (PNRReg.isValid() && !PNRReg.isVirtual())
5783	MI.addDef(RegNo: PNRReg, Flags: RegState::Implicit);
5784	MI.addMemOperand(MMO);
5785	}
5786
5787	bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
5788	const MachineInstr &UseMI,
5789	const TargetRegisterInfo *TRI) {
5790	return any_of(Range: instructionsWithoutDebug(It: std::next(x: DefMI.getIterator()),
5791	End: UseMI.getIterator()),
5792	P: [TRI](const MachineInstr &I) {
5793	return I.modifiesRegister(Reg: AArch64::NZCV, TRI) \|\|
5794	I.readsRegister(Reg: AArch64::NZCV, TRI);
5795	});
5796	}
5797
5798	void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
5799	const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
5800	// The smallest scalable element supported by scaled SVE addressing
5801	// modes are predicates, which are 2 scalable bytes in size. So the scalable
5802	// byte offset must always be a multiple of 2.
5803	assert(Offset.getScalable() % `2` == `0` && "Invalid frame offset");
5804
5805	// VGSized offsets are divided by '2', because the VG register is the
5806	// the number of 64bit granules as opposed to 128bit vector chunks,
5807	// which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5808	// So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5809	// VG = n 2 and the dwarf offset must be VG * 8 bytes.*
5810	ByteSized = Offset.getFixed();
5811	VGSized = Offset.getScalable() / `2`;
5812	}
5813
5814	/// Returns the offset in parts to which this frame offset can be
5815	/// decomposed for the purpose of describing a frame offset.
5816	/// For non-scalable offsets this is simply its byte size.
5817	void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
5818	const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
5819	int64_t &NumDataVectors) {
5820	// The smallest scalable element supported by scaled SVE addressing
5821	// modes are predicates, which are 2 scalable bytes in size. So the scalable
5822	// byte offset must always be a multiple of 2.
5823	assert(Offset.getScalable() % `2` == `0` && "Invalid frame offset");
5824
5825	NumBytes = Offset.getFixed();
5826	NumDataVectors = `0`;
5827	NumPredicateVectors = Offset.getScalable() / `2`;
5828	// This method is used to get the offsets to adjust the frame offset.
5829	// If the function requires ADDPL to be used and needs more than two ADDPL
5830	// instructions, part of the offset is folded into NumDataVectors so that it
5831	// uses ADDVL for part of it, reducing the number of ADDPL instructions.
5832	if (NumPredicateVectors % `8` == `0` \|\| NumPredicateVectors < -`64` \|\|
5833	NumPredicateVectors > `62`) {
5834	NumDataVectors = NumPredicateVectors / `8`;
5835	NumPredicateVectors -= NumDataVectors * `8`;
5836	}
5837	}
5838
5839	// Convenience function to create a DWARF expression for
5840	// Expr + NumBytes + NumVGScaledBytes AArch64::VG*
5841	static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes,
5842	int NumVGScaledBytes, unsigned VG,
5843	llvm::raw_string_ostream &Comment) {
5844	uint8_t buffer[`16`];
5845
5846	if (NumBytes) {
5847	Expr.push_back(Elt: dwarf::DW_OP_consts);
5848	Expr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: NumBytes, p: buffer));
5849	Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_plus);
5850	Comment << (NumBytes < `0` ? " - " : " + ") << std::abs(x: NumBytes);
5851	}
5852
5853	if (NumVGScaledBytes) {
5854	Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_consts);
5855	Expr.append(in_start: buffer, in_end: buffer + encodeSLEB128(Value: NumVGScaledBytes, p: buffer));
5856
5857	Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_bregx);
5858	Expr.append(in_start: buffer, in_end: buffer + encodeULEB128(Value: VG, p: buffer));
5859	Expr.push_back(Elt: `0`);
5860
5861	Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_mul);
5862	Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_plus);
5863
5864	Comment << (NumVGScaledBytes < `0` ? " - " : " + ")
5865	<< std::abs(x: NumVGScaledBytes) << " * VG";
5866	}
5867	}
5868
5869	// Creates an MCCFIInstruction:
5870	// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
5871	static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI,
5872	unsigned Reg,
5873	const StackOffset &Offset) {
5874	int64_t NumBytes, NumVGScaledBytes;
5875	AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset, ByteSized&: NumBytes,
5876	VGSized&: NumVGScaledBytes);
5877	std::string CommentBuffer;
5878	llvm::raw_string_ostream Comment(CommentBuffer);
5879
5880	if (Reg == AArch64::SP)
5881	Comment << "sp";
5882	else if (Reg == AArch64::FP)
5883	Comment << "fp";
5884	else
5885	Comment << printReg(Reg, TRI: &TRI);
5886
5887	// Build up the expression (Reg + NumBytes + NumVGScaledBytes AArch64::VG)*
5888	SmallString<`64`> Expr;
5889	unsigned DwarfReg = TRI.getDwarfRegNum(RegNum: Reg, isEH: true);
5890	Expr.push_back(Elt: (uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
5891	Expr.push_back(Elt: `0`);
5892	appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
5893	VG: TRI.getDwarfRegNum(RegNum: AArch64::VG, isEH: true), Comment);
5894
5895	// Wrap this into DW_CFA_def_cfa.
5896	SmallString<`64`> DefCfaExpr;
5897	DefCfaExpr.push_back(Elt: dwarf::DW_CFA_def_cfa_expression);
5898	uint8_t buffer[`16`];
5899	DefCfaExpr.append(in_start: buffer, in_end: buffer + encodeULEB128(Value: Expr.size(), p: buffer));
5900	DefCfaExpr.append(RHS: Expr.str());
5901	return MCCFIInstruction::createEscape(L: nullptr, Vals: DefCfaExpr.str(), Loc: SMLoc (),
5902	Comment: Comment.str());
5903	}
5904
5905	MCCFIInstruction llvm::createDefCFA(const TargetRegisterInfo &TRI,
5906	unsigned FrameReg, unsigned Reg,
5907	const StackOffset &Offset,
5908	bool LastAdjustmentWasScalable) {
5909	if (Offset.getScalable())
5910	return createDefCFAExpression(TRI, Reg, Offset);
5911
5912	if (FrameReg == Reg && !LastAdjustmentWasScalable)
5913	return MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: int(Offset.getFixed()));
5914
5915	unsigned DwarfReg = TRI.getDwarfRegNum(RegNum: Reg, isEH: true);
5916	return MCCFIInstruction::cfiDefCfa(L: nullptr, Register: DwarfReg, Offset: (int)Offset.getFixed());
5917	}
5918
5919	MCCFIInstruction llvm::createCFAOffset(const TargetRegisterInfo &TRI,
5920	unsigned Reg,
5921	const StackOffset &OffsetFromDefCFA) {
5922	int64_t NumBytes, NumVGScaledBytes;
5923	AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
5924	Offset: OffsetFromDefCFA, ByteSized&: NumBytes, VGSized&: NumVGScaledBytes);
5925
5926	unsigned DwarfReg = TRI.getDwarfRegNum(RegNum: Reg, isEH: true);
5927
5928	// Non-scalable offsets can use DW_CFA_offset directly.
5929	if (!NumVGScaledBytes)
5930	return MCCFIInstruction::createOffset(L: nullptr, Register: DwarfReg, Offset: NumBytes);
5931
5932	std::string CommentBuffer;
5933	llvm::raw_string_ostream Comment(CommentBuffer);
5934	Comment << printReg(Reg, TRI: &TRI) << " @ cfa";
5935
5936	// Build up expression (NumBytes + NumVGScaledBytes AArch64::VG)*
5937	SmallString<`64`> OffsetExpr;
5938	appendVGScaledOffsetExpr(Expr&: OffsetExpr, NumBytes, NumVGScaledBytes,
5939	VG: TRI.getDwarfRegNum(RegNum: AArch64::VG, isEH: true), Comment);
5940
5941	// Wrap this into DW_CFA_expression
5942	SmallString<`64`> CfaExpr;
5943	CfaExpr.push_back(Elt: dwarf::DW_CFA_expression);
5944	uint8_t buffer[`16`];
5945	CfaExpr.append(in_start: buffer, in_end: buffer + encodeULEB128(Value: DwarfReg, p: buffer));
5946	CfaExpr.append(in_start: buffer, in_end: buffer + encodeULEB128(Value: OffsetExpr.size(), p: buffer));
5947	CfaExpr.append(RHS: OffsetExpr.str());
5948
5949	return MCCFIInstruction::createEscape(L: nullptr, Vals: CfaExpr.str(), Loc: SMLoc (),
5950	Comment: Comment.str());
5951	}
5952
5953	// Helper function to emit a frame offset adjustment from a given
5954	// pointer (SrcReg), stored into DestReg. This function is explicit
5955	// in that it requires the opcode.
5956	static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
5957	MachineBasicBlock::iterator MBBI,
5958	const DebugLoc &DL, unsigned DestReg,
5959	unsigned SrcReg, int64_t Offset, unsigned Opc,
5960	const TargetInstrInfo *TII,
5961	MachineInstr::MIFlag Flag, bool NeedsWinCFI,
5962	bool HasWinCFI, bool* EmitCFAOffset,
5963	StackOffset CFAOffset, unsigned FrameReg) {
5964	int Sign = `1`;
5965	unsigned MaxEncoding, ShiftSize;
5966	switch (Opc) {
5967	case AArch64::ADDXri:
5968	case AArch64::ADDSXri:
5969	case AArch64::SUBXri:
5970	case AArch64::SUBSXri:
5971	MaxEncoding = `0xfff`;
5972	ShiftSize = `12`;
5973	break;
5974	case AArch64::ADDVL_XXI:
5975	case AArch64::ADDPL_XXI:
5976	case AArch64::ADDSVL_XXI:
5977	case AArch64::ADDSPL_XXI:
5978	MaxEncoding = `31`;
5979	ShiftSize = `0`;
5980	if (Offset < `0`) {
5981	MaxEncoding = `32`;
5982	Sign = -`1`;
5983	Offset = -Offset;
5984	}
5985	break;
5986	default:
5987	llvm_unreachable("Unsupported opcode");
5988	}
5989
5990	// `Offset` can be in bytes or in "scalable bytes".
5991	int VScale = `1`;
5992	if (Opc == AArch64::ADDVL_XXI \|\| Opc == AArch64::ADDSVL_XXI)
5993	VScale = `16`;
5994	else if (Opc == AArch64::ADDPL_XXI \|\| Opc == AArch64::ADDSPL_XXI)
5995	VScale = `2`;
5996
5997	// FIXME: If the offset won't fit in 24-bits, compute the offset into a
5998	// scratch register. If DestReg is a virtual register, use it as the
5999	// scratch register; otherwise, create a new virtual register (to be
6000	// replaced by the scavenger at the end of PEI). That case can be optimized
6001	// slightly if DestReg is SP which is always 16-byte aligned, so the scratch
6002	// register can be loaded with offset%8 and the add/sub can use an extending
6003	// instruction with LSL#3.
6004	// Currently the function handles any offsets but generates a poor sequence
6005	// of code.
6006	// assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
6007
6008	const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
6009	Register TmpReg = DestReg;
6010	if (TmpReg == AArch64::XZR)
6011	TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
6012	RegClass: &AArch64::GPR64RegClass);
6013	do {
6014	uint64_t ThisVal = std::min<uint64_t>(a: Offset, b: MaxEncodableValue);
6015	unsigned LocalShiftSize = `0`;
6016	if (ThisVal > MaxEncoding) {
6017	ThisVal = ThisVal >> ShiftSize;
6018	LocalShiftSize = ShiftSize;
6019	}
6020	assert((ThisVal >> ShiftSize) <= MaxEncoding &&
6021	"Encoding cannot handle value that big");
6022
6023	Offset -= ThisVal << LocalShiftSize;
6024	if (Offset == `0`)
6025	TmpReg = DestReg;
6026	auto MBI = BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc), DestReg: TmpReg)
6027	.addReg(RegNo: SrcReg)
6028	.addImm(Val: Sign * (int)ThisVal);
6029	if (ShiftSize)
6030	MBI = MBI.addImm(
6031	Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: LocalShiftSize));
6032	MBI = MBI.setMIFlag(Flag);
6033
6034	auto Change =
6035	VScale == `1`
6036	? StackOffset::getFixed(Fixed: ThisVal << LocalShiftSize)
6037	: StackOffset::getScalable(Scalable: VScale * (ThisVal << LocalShiftSize));
6038	if (Sign == -`1` \|\| Opc == AArch64::SUBXri \|\| Opc == AArch64::SUBSXri)
6039	CFAOffset += Change;
6040	else
6041	CFAOffset -= Change;
6042	if (EmitCFAOffset && DestReg == TmpReg) {
6043	MachineFunction &MF = *MBB.getParent();
6044	const TargetSubtargetInfo &STI = MF.getSubtarget();
6045	const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
6046
6047	unsigned CFIIndex = MF.addFrameInst(
6048	Inst: createDefCFA(TRI, FrameReg, Reg: DestReg, Offset: CFAOffset, LastAdjustmentWasScalable: VScale != `1`));
6049	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::CFI_INSTRUCTION))
6050	.addCFIIndex(CFIIndex)
6051	.setMIFlags(Flag);
6052	}
6053
6054	if (NeedsWinCFI) {
6055	int Imm = (int)(ThisVal << LocalShiftSize);
6056	if (VScale != `1` && DestReg == AArch64::SP) {
6057	if (HasWinCFI)
6058	HasWinCFI = true*;
6059	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_AllocZ))
6060	.addImm(Val: ThisVal)
6061	.setMIFlag(Flag);
6062	} else if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) \|\|
6063	(SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
6064	assert(VScale == `1` && "Expected non-scalable operation");
6065	if (HasWinCFI)
6066	HasWinCFI = true*;
6067	if (Imm == `0`)
6068	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_SetFP)).setMIFlag(Flag);
6069	else
6070	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_AddFP))
6071	.addImm(Val: Imm)
6072	.setMIFlag(Flag);
6073	assert(Offset == `0` && "Expected remaining offset to be zero to "
6074	"emit a single SEH directive");
6075	} else if (DestReg == AArch64::SP) {
6076	assert(VScale == `1` && "Expected non-scalable operation");
6077	if (HasWinCFI)
6078	HasWinCFI = true*;
6079	assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
6080	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::SEH_StackAlloc))
6081	.addImm(Val: Imm)
6082	.setMIFlag(Flag);
6083	}
6084	}
6085
6086	SrcReg = TmpReg;
6087	} while (Offset);
6088	}
6089
6090	void llvm::emitFrameOffset(MachineBasicBlock &MBB,
6091	MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
6092	unsigned DestReg, unsigned SrcReg,
6093	StackOffset Offset, const TargetInstrInfo *TII,
6094	MachineInstr::MIFlag Flag, bool SetNZCV,
6095	bool NeedsWinCFI, bool *HasWinCFI,
6096	bool EmitCFAOffset, StackOffset CFAOffset,
6097	unsigned FrameReg) {
6098	// If a function is marked as arm_locally_streaming, then the runtime value of
6099	// vscale in the prologue/epilogue is different the runtime value of vscale
6100	// in the function's body. To avoid having to consider multiple vscales,
6101	// we can use `addsvl` to allocate any scalable stack-slots, which under
6102	// most circumstances will be only locals, not callee-save slots.
6103	const Function &F = MBB.getParent()->getFunction();
6104	bool UseSVL = F.hasFnAttribute(Kind: "aarch64_pstate_sm_body");
6105
6106	int64_t Bytes, NumPredicateVectors, NumDataVectors;
6107	AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
6108	Offset, NumBytes&: Bytes, NumPredicateVectors, NumDataVectors);
6109
6110	// First emit non-scalable frame offsets, or a simple 'mov'.
6111	if (Bytes \|\| (!Offset && SrcReg != DestReg)) {
6112	assert((DestReg != AArch64::SP \|\| Bytes % `8` == `0`) &&
6113	"SP increment/decrement not 8-byte aligned");
6114	unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
6115	if (Bytes < `0`) {
6116	Bytes = -Bytes;
6117	Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
6118	}
6119	emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Offset: Bytes, Opc, TII, Flag,
6120	NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6121	FrameReg);
6122	CFAOffset += (Opc == AArch64::ADDXri \|\| Opc == AArch64::ADDSXri)
6123	? StackOffset::getFixed(Fixed: -Bytes)
6124	: StackOffset::getFixed(Fixed: Bytes);
6125	SrcReg = DestReg;
6126	FrameReg = DestReg;
6127	}
6128
6129	assert(!(SetNZCV && (NumPredicateVectors \|\| NumDataVectors)) &&
6130	"SetNZCV not supported with SVE vectors");
6131	assert(!(NeedsWinCFI && NumPredicateVectors) &&
6132	"WinCFI can't allocate fractions of an SVE data vector");
6133
6134	if (NumDataVectors) {
6135	emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Offset: NumDataVectors,
6136	Opc: UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI, TII,
6137	Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6138	FrameReg);
6139	CFAOffset += StackOffset::getScalable(Scalable: -NumDataVectors * `16`);
6140	SrcReg = DestReg;
6141	}
6142
6143	if (NumPredicateVectors) {
6144	assert(DestReg != AArch64::SP && "Unaligned access to SP");
6145	emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Offset: NumPredicateVectors,
6146	Opc: UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI, TII,
6147	Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6148	FrameReg);
6149	}
6150	}
6151
6152	MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
6153	MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
6154	MachineBasicBlock::iterator InsertPt, int FrameIndex,
6155	LiveIntervals LIS, VirtRegMap VRM) const {
6156	// This is a bit of a hack. Consider this instruction:
6157	//
6158	// %0 = COPY %sp; GPR64all:%0
6159	//
6160	// We explicitly chose GPR64all for the virtual register so such a copy might
6161	// be eliminated by RegisterCoalescer. However, that may not be possible, and
6162	// %0 may even spill. We can't spill %sp, and since it is in the GPR64all
6163	// register class, TargetInstrInfo::foldMemoryOperand() is going to try.
6164	//
6165	// To prevent that, we are going to constrain the %0 register class here.
6166	if (MI.isFullCopy()) {
6167	Register DstReg = MI.getOperand(i: `0`).getReg();
6168	Register SrcReg = MI.getOperand(i: `1`).getReg();
6169	if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
6170	MF.getRegInfo().constrainRegClass(Reg: DstReg, RC: &AArch64::GPR64RegClass);
6171	return nullptr;
6172	}
6173	if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
6174	MF.getRegInfo().constrainRegClass(Reg: SrcReg, RC: &AArch64::GPR64RegClass);
6175	return nullptr;
6176	}
6177	// Nothing can folded with copy from/to NZCV.
6178	if (SrcReg == AArch64::NZCV \|\| DstReg == AArch64::NZCV)
6179	return nullptr;
6180	}
6181
6182	// Handle the case where a copy is being spilled or filled but the source
6183	// and destination register class don't match. For example:
6184	//
6185	// %0 = COPY %xzr; GPR64common:%0
6186	//
6187	// In this case we can still safely fold away the COPY and generate the
6188	// following spill code:
6189	//
6190	// STRXui %xzr, %stack.0
6191	//
6192	// This also eliminates spilled cross register class COPYs (e.g. between x and
6193	// d regs) of the same size. For example:
6194	//
6195	// %0 = COPY %1; GPR64:%0, FPR64:%1
6196	//
6197	// will be filled as
6198	//
6199	// LDRDui %0, fi<#0>
6200	//
6201	// instead of
6202	//
6203	// LDRXui %Temp, fi<#0>
6204	// %0 = FMOV %Temp
6205	//
6206	if (MI.isCopy() && Ops.size() == `1` &&
6207	// Make sure we're only folding the explicit COPY defs/uses.
6208	(Ops [`0`] == `0` \|\| Ops [`0`] == `1`)) {
6209	bool IsSpill = Ops [`0`] == `0`;
6210	bool IsFill = !IsSpill;
6211	const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
6212	const MachineRegisterInfo &MRI = MF.getRegInfo();
6213	MachineBasicBlock &MBB = *MI.getParent();
6214	const MachineOperand &DstMO = MI.getOperand(i: `0`);
6215	const MachineOperand &SrcMO = MI.getOperand(i: `1`);
6216	Register DstReg = DstMO.getReg();
6217	Register SrcReg = SrcMO.getReg();
6218	// This is slightly expensive to compute for physical regs since
6219	// getMinimalPhysRegClass is slow.
6220	auto getRegClass = [&](unsigned Reg) {
6221	return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
6222	: TRI.getMinimalPhysRegClass(Reg);
6223	};
6224
6225	if (DstMO.getSubReg() == `0` && SrcMO.getSubReg() == `0`) {
6226	assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
6227	TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
6228	"Mismatched register size in non subreg COPY");
6229	if (IsSpill)
6230	storeRegToStackSlot(MBB, MBBI: InsertPt, SrcReg, isKill: SrcMO.isKill(), FI: FrameIndex,
6231	RC: getRegClass (SrcReg), TRI: &TRI, VReg: Register ());
6232	else
6233	loadRegFromStackSlot(MBB, MBBI: InsertPt, DestReg: DstReg, FI: FrameIndex,
6234	RC: getRegClass (DstReg), TRI: &TRI, VReg: Register ());
6235	return &*--InsertPt;
6236	}
6237
6238	// Handle cases like spilling def of:
6239	//
6240	// %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
6241	//
6242	// where the physical register source can be widened and stored to the full
6243	// virtual reg destination stack slot, in this case producing:
6244	//
6245	// STRXui %xzr, %stack.0
6246	//
6247	if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
6248	TRI.getRegSizeInBits(RC: *getRegClass (DstReg)) == `64`) {
6249	assert(SrcMO.getSubReg() == `0` &&
6250	"Unexpected subreg on physical register");
6251	storeRegToStackSlot(MBB, MBBI: InsertPt, SrcReg: AArch64::XZR, isKill: SrcMO.isKill(),
6252	FI: FrameIndex, RC: &AArch64::GPR64RegClass, TRI: &TRI,
6253	VReg: Register ());
6254	return &*--InsertPt;
6255	}
6256
6257	// Handle cases like filling use of:
6258	//
6259	// %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
6260	//
6261	// where we can load the full virtual reg source stack slot, into the subreg
6262	// destination, in this case producing:
6263	//
6264	// LDRWui %0:sub_32<def,read-undef>, %stack.0
6265	//
6266	if (IsFill && SrcMO.getSubReg() == `0` && DstMO.isUndef()) {
6267	const TargetRegisterClass *FillRC;
6268	switch (DstMO.getSubReg()) {
6269	default:
6270	FillRC = nullptr;
6271	break;
6272	case AArch64::sub_32:
6273	FillRC = &AArch64::GPR32RegClass;
6274	break;
6275	case AArch64::ssub:
6276	FillRC = &AArch64::FPR32RegClass;
6277	break;
6278	case AArch64::dsub:
6279	FillRC = &AArch64::FPR64RegClass;
6280	break;
6281	}
6282
6283	if (FillRC) {
6284	assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
6285	TRI.getRegSizeInBits(*FillRC) &&
6286	"Mismatched regclass size on folded subreg COPY");
6287	loadRegFromStackSlot(MBB, MBBI: InsertPt, DestReg: DstReg, FI: FrameIndex, RC: FillRC, TRI: &TRI,
6288	VReg: Register ());
6289	MachineInstr &LoadMI = *--InsertPt;
6290	MachineOperand &LoadDst = LoadMI.getOperand(i: `0`);
6291	assert(LoadDst.getSubReg() == `0` && "unexpected subreg on fill load");
6292	LoadDst.setSubReg(DstMO.getSubReg());
6293	LoadDst.setIsUndef();
6294	return &LoadMI;
6295	}
6296	}
6297	}
6298
6299	// Cannot fold.
6300	return nullptr;
6301	}
6302
6303	int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
6304	StackOffset &SOffset,
6305	bool *OutUseUnscaledOp,
6306	unsigned *OutUnscaledOp,
6307	int64_t *EmittableOffset) {
6308	// Set output values in case of early exit.
6309	if (EmittableOffset)
6310	*EmittableOffset = `0`;
6311	if (OutUseUnscaledOp)
6312	OutUseUnscaledOp = false*;
6313	if (OutUnscaledOp)
6314	*OutUnscaledOp = `0`;
6315
6316	// Exit early for structured vector spills/fills as they can't take an
6317	// immediate offset.
6318	switch (MI.getOpcode()) {
6319	default:
6320	break;
6321	case AArch64::LD1Rv1d:
6322	case AArch64::LD1Rv2s:
6323	case AArch64::LD1Rv2d:
6324	case AArch64::LD1Rv4h:
6325	case AArch64::LD1Rv4s:
6326	case AArch64::LD1Rv8b:
6327	case AArch64::LD1Rv8h:
6328	case AArch64::LD1Rv16b:
6329	case AArch64::LD1Twov2d:
6330	case AArch64::LD1Threev2d:
6331	case AArch64::LD1Fourv2d:
6332	case AArch64::LD1Twov1d:
6333	case AArch64::LD1Threev1d:
6334	case AArch64::LD1Fourv1d:
6335	case AArch64::ST1Twov2d:
6336	case AArch64::ST1Threev2d:
6337	case AArch64::ST1Fourv2d:
6338	case AArch64::ST1Twov1d:
6339	case AArch64::ST1Threev1d:
6340	case AArch64::ST1Fourv1d:
6341	case AArch64::ST1i8:
6342	case AArch64::ST1i16:
6343	case AArch64::ST1i32:
6344	case AArch64::ST1i64:
6345	case AArch64::IRG:
6346	case AArch64::IRGstack:
6347	case AArch64::STGloop:
6348	case AArch64::STZGloop:
6349	return AArch64FrameOffsetCannotUpdate;
6350	}
6351
6352	// Get the min/max offset and the scale.
6353	TypeSize ScaleValue(`0U`, false), Width(`0U`, false);
6354	int64_t MinOff, MaxOff;
6355	if (!AArch64InstrInfo::getMemOpInfo(Opcode: MI.getOpcode(), Scale&: ScaleValue, Width, MinOffset&: MinOff,
6356	MaxOffset&: MaxOff))
6357	llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6358
6359	// Construct the complete offset.
6360	bool IsMulVL = ScaleValue.isScalable();
6361	unsigned Scale = ScaleValue.getKnownMinValue();
6362	int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
6363
6364	const MachineOperand &ImmOpnd =
6365	MI.getOperand(i: AArch64InstrInfo::getLoadStoreImmIdx(Opc: MI.getOpcode()));
6366	Offset += ImmOpnd.getImm() * Scale;
6367
6368	// If the offset doesn't match the scale, we rewrite the instruction to
6369	// use the unscaled instruction instead. Likewise, if we have a negative
6370	// offset and there is an unscaled op to use.
6371	std::optional<unsigned> UnscaledOp =
6372	AArch64InstrInfo::getUnscaledLdSt(Opc: MI.getOpcode());
6373	bool useUnscaledOp = UnscaledOp && (Offset % Scale \|\| Offset < `0`);
6374	if (useUnscaledOp &&
6375	!AArch64InstrInfo::getMemOpInfo(Opcode: *UnscaledOp, Scale&: ScaleValue, Width, MinOffset&: MinOff,
6376	MaxOffset&: MaxOff))
6377	llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6378
6379	Scale = ScaleValue.getKnownMinValue();
6380	assert(IsMulVL == ScaleValue.isScalable() &&
6381	"Unscaled opcode has different value for scalable");
6382
6383	int64_t Remainder = Offset % Scale;
6384	assert(!(Remainder && useUnscaledOp) &&
6385	"Cannot have remainder when using unscaled op");
6386
6387	assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
6388	int64_t NewOffset = Offset / Scale;
6389	if (MinOff <= NewOffset && NewOffset <= MaxOff)
6390	Offset = Remainder;
6391	else {
6392	NewOffset = NewOffset < `0` ? MinOff : MaxOff;
6393	Offset = Offset - (NewOffset * Scale);
6394	}
6395
6396	if (EmittableOffset)
6397	*EmittableOffset = NewOffset;
6398	if (OutUseUnscaledOp)
6399	*OutUseUnscaledOp = useUnscaledOp;
6400	if (OutUnscaledOp && UnscaledOp)
6401	OutUnscaledOp = UnscaledOp;
6402
6403	if (IsMulVL)
6404	SOffset = StackOffset::get(Fixed: SOffset.getFixed(), Scalable: Offset);
6405	else
6406	SOffset = StackOffset::get(Fixed: Offset, Scalable: SOffset.getScalable());
6407	return AArch64FrameOffsetCanUpdate \|
6408	(SOffset ? `0` : AArch64FrameOffsetIsLegal);
6409	}
6410
6411	bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
6412	unsigned FrameReg, StackOffset &Offset,
6413	const AArch64InstrInfo *TII) {
6414	unsigned Opcode = MI.getOpcode();
6415	unsigned ImmIdx = FrameRegIdx + `1`;
6416
6417	if (Opcode == AArch64::ADDSXri \|\| Opcode == AArch64::ADDXri) {
6418	Offset += StackOffset::getFixed(Fixed: MI.getOperand(i: ImmIdx).getImm());
6419	emitFrameOffset(MBB&: *MI.getParent(), MBBI: MI, DL: MI.getDebugLoc(),
6420	DestReg: MI.getOperand(i: `0`).getReg(), SrcReg: FrameReg, Offset, TII,
6421	Flag: MachineInstr::NoFlags, SetNZCV: (Opcode == AArch64::ADDSXri));
6422	MI.eraseFromParent();
6423	Offset = StackOffset ();
6424	return true;
6425	}
6426
6427	int64_t NewOffset;
6428	unsigned UnscaledOp;
6429	bool UseUnscaledOp;
6430	int Status = isAArch64FrameOffsetLegal(MI, SOffset&: Offset, OutUseUnscaledOp: &UseUnscaledOp,
6431	OutUnscaledOp: &UnscaledOp, EmittableOffset: &NewOffset);
6432	if (Status & AArch64FrameOffsetCanUpdate) {
6433	if (Status & AArch64FrameOffsetIsLegal)
6434	// Replace the FrameIndex with FrameReg.
6435	MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false);
6436	if (UseUnscaledOp)
6437	MI.setDesc(TII->get(Opcode: UnscaledOp));
6438
6439	MI.getOperand(i: ImmIdx).ChangeToImmediate(ImmVal: NewOffset);
6440	return !Offset;
6441	}
6442
6443	return false;
6444	}
6445
6446	void AArch64InstrInfo::insertNoop(MachineBasicBlock &MBB,
6447	MachineBasicBlock::iterator MI) const {
6448	DebugLoc DL;
6449	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: AArch64::HINT)).addImm(Val: `0`);
6450	}
6451
6452	MCInst AArch64InstrInfo::getNop() const {
6453	return MCInstBuilder (AArch64::HINT).addImm(Val: `0`);
6454	}
6455
6456	// AArch64 supports MachineCombiner.
6457	bool AArch64InstrInfo::useMachineCombiner() const { return true; }
6458
6459	// True when Opc sets flag
6460	static bool isCombineInstrSettingFlag(unsigned Opc) {
6461	switch (Opc) {
6462	case AArch64::ADDSWrr:
6463	case AArch64::ADDSWri:
6464	case AArch64::ADDSXrr:
6465	case AArch64::ADDSXri:
6466	case AArch64::SUBSWrr:
6467	case AArch64::SUBSXrr:
6468	// Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6469	case AArch64::SUBSWri:
6470	case AArch64::SUBSXri:
6471	return true;
6472	default:
6473	break;
6474	}
6475	return false;
6476	}
6477
6478	// 32b Opcodes that can be combined with a MUL
6479	static bool isCombineInstrCandidate32(unsigned Opc) {
6480	switch (Opc) {
6481	case AArch64::ADDWrr:
6482	case AArch64::ADDWri:
6483	case AArch64::SUBWrr:
6484	case AArch64::ADDSWrr:
6485	case AArch64::ADDSWri:
6486	case AArch64::SUBSWrr:
6487	// Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6488	case AArch64::SUBWri:
6489	case AArch64::SUBSWri:
6490	return true;
6491	default:
6492	break;
6493	}
6494	return false;
6495	}
6496
6497	// 64b Opcodes that can be combined with a MUL
6498	static bool isCombineInstrCandidate64(unsigned Opc) {
6499	switch (Opc) {
6500	case AArch64::ADDXrr:
6501	case AArch64::ADDXri:
6502	case AArch64::SUBXrr:
6503	case AArch64::ADDSXrr:
6504	case AArch64::ADDSXri:
6505	case AArch64::SUBSXrr:
6506	// Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6507	case AArch64::SUBXri:
6508	case AArch64::SUBSXri:
6509	case AArch64::ADDv8i8:
6510	case AArch64::ADDv16i8:
6511	case AArch64::ADDv4i16:
6512	case AArch64::ADDv8i16:
6513	case AArch64::ADDv2i32:
6514	case AArch64::ADDv4i32:
6515	case AArch64::SUBv8i8:
6516	case AArch64::SUBv16i8:
6517	case AArch64::SUBv4i16:
6518	case AArch64::SUBv8i16:
6519	case AArch64::SUBv2i32:
6520	case AArch64::SUBv4i32:
6521	return true;
6522	default:
6523	break;
6524	}
6525	return false;
6526	}
6527
6528	// FP Opcodes that can be combined with a FMUL.
6529	static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
6530	switch (Inst.getOpcode()) {
6531	default:
6532	break;
6533	case AArch64::FADDHrr:
6534	case AArch64::FADDSrr:
6535	case AArch64::FADDDrr:
6536	case AArch64::FADDv4f16:
6537	case AArch64::FADDv8f16:
6538	case AArch64::FADDv2f32:
6539	case AArch64::FADDv2f64:
6540	case AArch64::FADDv4f32:
6541	case AArch64::FSUBHrr:
6542	case AArch64::FSUBSrr:
6543	case AArch64::FSUBDrr:
6544	case AArch64::FSUBv4f16:
6545	case AArch64::FSUBv8f16:
6546	case AArch64::FSUBv2f32:
6547	case AArch64::FSUBv2f64:
6548	case AArch64::FSUBv4f32:
6549	TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
6550	// We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
6551	// the target options or if FADD/FSUB has the contract fast-math flag.
6552	return Options.UnsafeFPMath \|\|
6553	Options.AllowFPOpFusion == FPOpFusion::Fast \|\|
6554	Inst.getFlag(Flag: MachineInstr::FmContract);
6555	return true;
6556	}
6557	return false;
6558	}
6559
6560	// Opcodes that can be combined with a MUL
6561	static bool isCombineInstrCandidate(unsigned Opc) {
6562	return (isCombineInstrCandidate32(Opc) \|\| isCombineInstrCandidate64(Opc));
6563	}
6564
6565	//
6566	// Utility routine that checks if \param MO is defined by an
6567	// \param CombineOpc instruction in the basic block \param MBB
6568	static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
6569	unsigned CombineOpc, unsigned ZeroReg = `0`,
6570	bool CheckZeroReg = false) {
6571	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
6572	MachineInstr MI = nullptr*;
6573
6574	if (MO.isReg() && MO.getReg().isVirtual())
6575	MI = MRI.getUniqueVRegDef(Reg: MO.getReg());
6576	// And it needs to be in the trace (otherwise, it won't have a depth).
6577	if (!MI \|\| MI->getParent() != &MBB \|\| (unsigned)MI->getOpcode() != CombineOpc)
6578	return false;
6579	// Must only used by the user we combine with.
6580	if (!MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: `0`).getReg()))
6581	return false;
6582
6583	if (CheckZeroReg) {
6584	assert(MI->getNumOperands() >= `4` && MI->getOperand(`0`).isReg() &&
6585	MI->getOperand(`1`).isReg() && MI->getOperand(`2`).isReg() &&
6586	MI->getOperand(`3`).isReg() && "MAdd/MSub must have a least 4 regs");
6587	// The third input reg must be zero.
6588	if (MI->getOperand(i: `3`).getReg() != ZeroReg)
6589	return false;
6590	}
6591
6592	if (isCombineInstrSettingFlag(Opc: CombineOpc) &&
6593	MI->findRegisterDefOperandIdx(Reg: AArch64::NZCV, /TRI=/nullptr, isDead: true) == -`1`)
6594	return false;
6595
6596	return true;
6597	}
6598
6599	//
6600	// Is \param MO defined by an integer multiply and can be combined?
6601	static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
6602	unsigned MulOpc, unsigned ZeroReg) {
6603	return canCombine(MBB, MO, CombineOpc: MulOpc, ZeroReg, CheckZeroReg: true);
6604	}
6605
6606	//
6607	// Is \param MO defined by a floating-point multiply and can be combined?
6608	static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
6609	unsigned MulOpc) {
6610	return canCombine(MBB, MO, CombineOpc: MulOpc);
6611	}
6612
6613	// TODO: There are many more machine instruction opcodes to match:
6614	// 1. Other data types (integer, vectors)
6615	// 2. Other math / logic operations (xor, or)
6616	// 3. Other forms of the same operation (intrinsics and other variants)
6617	bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
6618	bool Invert) const {
6619	if (Invert)
6620	return false;
6621	switch (Inst.getOpcode()) {
6622	// == Floating-point types ==
6623	// -- Floating-point instructions --
6624	case AArch64::FADDHrr:
6625	case AArch64::FADDSrr:
6626	case AArch64::FADDDrr:
6627	case AArch64::FMULHrr:
6628	case AArch64::FMULSrr:
6629	case AArch64::FMULDrr:
6630	case AArch64::FMULX16:
6631	case AArch64::FMULX32:
6632	case AArch64::FMULX64:
6633	// -- Advanced SIMD instructions --
6634	case AArch64::FADDv4f16:
6635	case AArch64::FADDv8f16:
6636	case AArch64::FADDv2f32:
6637	case AArch64::FADDv4f32:
6638	case AArch64::FADDv2f64:
6639	case AArch64::FMULv4f16:
6640	case AArch64::FMULv8f16:
6641	case AArch64::FMULv2f32:
6642	case AArch64::FMULv4f32:
6643	case AArch64::FMULv2f64:
6644	case AArch64::FMULXv4f16:
6645	case AArch64::FMULXv8f16:
6646	case AArch64::FMULXv2f32:
6647	case AArch64::FMULXv4f32:
6648	case AArch64::FMULXv2f64:
6649	// -- SVE instructions --
6650	// Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
6651	// in the SVE instruction set (though there are predicated ones).
6652	case AArch64::FADD_ZZZ_H:
6653	case AArch64::FADD_ZZZ_S:
6654	case AArch64::FADD_ZZZ_D:
6655	case AArch64::FMUL_ZZZ_H:
6656	case AArch64::FMUL_ZZZ_S:
6657	case AArch64::FMUL_ZZZ_D:
6658	return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath \|\|
6659	(Inst.getFlag(Flag: MachineInstr::MIFlag::FmReassoc) &&
6660	Inst.getFlag(Flag: MachineInstr::MIFlag::FmNsz));
6661
6662	// == Integer types ==
6663	// -- Base instructions --
6664	// Opcodes MULWrr and MULXrr don't exist because
6665	// `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
6666	// `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
6667	// The machine-combiner does not support three-source-operands machine
6668	// instruction. So we cannot reassociate MULs.
6669	case AArch64::ADDWrr:
6670	case AArch64::ADDXrr:
6671	case AArch64::ANDWrr:
6672	case AArch64::ANDXrr:
6673	case AArch64::ORRWrr:
6674	case AArch64::ORRXrr:
6675	case AArch64::EORWrr:
6676	case AArch64::EORXrr:
6677	case AArch64::EONWrr:
6678	case AArch64::EONXrr:
6679	// -- Advanced SIMD instructions --
6680	// Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
6681	// in the Advanced SIMD instruction set.
6682	case AArch64::ADDv8i8:
6683	case AArch64::ADDv16i8:
6684	case AArch64::ADDv4i16:
6685	case AArch64::ADDv8i16:
6686	case AArch64::ADDv2i32:
6687	case AArch64::ADDv4i32:
6688	case AArch64::ADDv1i64:
6689	case AArch64::ADDv2i64:
6690	case AArch64::MULv8i8:
6691	case AArch64::MULv16i8:
6692	case AArch64::MULv4i16:
6693	case AArch64::MULv8i16:
6694	case AArch64::MULv2i32:
6695	case AArch64::MULv4i32:
6696	case AArch64::ANDv8i8:
6697	case AArch64::ANDv16i8:
6698	case AArch64::ORRv8i8:
6699	case AArch64::ORRv16i8:
6700	case AArch64::EORv8i8:
6701	case AArch64::EORv16i8:
6702	// -- SVE instructions --
6703	case AArch64::ADD_ZZZ_B:
6704	case AArch64::ADD_ZZZ_H:
6705	case AArch64::ADD_ZZZ_S:
6706	case AArch64::ADD_ZZZ_D:
6707	case AArch64::MUL_ZZZ_B:
6708	case AArch64::MUL_ZZZ_H:
6709	case AArch64::MUL_ZZZ_S:
6710	case AArch64::MUL_ZZZ_D:
6711	case AArch64::AND_ZZZ:
6712	case AArch64::ORR_ZZZ:
6713	case AArch64::EOR_ZZZ:
6714	return true;
6715
6716	default:
6717	return false;
6718	}
6719	}
6720
6721	/// Find instructions that can be turned into madd.
6722	static bool getMaddPatterns(MachineInstr &Root,
6723	SmallVectorImpl<unsigned> &Patterns) {
6724	unsigned Opc = Root.getOpcode();
6725	MachineBasicBlock &MBB = *Root.getParent();
6726	bool Found = false;
6727
6728	if (!isCombineInstrCandidate(Opc))
6729	return false;
6730	if (isCombineInstrSettingFlag(Opc)) {
6731	int Cmp_NZCV =
6732	Root.findRegisterDefOperandIdx(Reg: AArch64::NZCV, /TRI=/nullptr, isDead: true);
6733	// When NZCV is live bail out.
6734	if (Cmp_NZCV == -`1`)
6735	return false;
6736	unsigned NewOpc = convertToNonFlagSettingOpc(MI: Root);
6737	// When opcode can't change bail out.
6738	// CHECKME: do we miss any cases for opcode conversion?
6739	if (NewOpc == Opc)
6740	return false;
6741	Opc = NewOpc;
6742	}
6743
6744	auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
6745	unsigned Pattern) {
6746	if (canCombineWithMUL(MBB, MO&: Root.getOperand(i: Operand), MulOpc: Opcode, ZeroReg)) {
6747	Patterns.push_back(Elt: Pattern);
6748	Found = true;
6749	}
6750	};
6751
6752	auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
6753	if (canCombine(MBB, MO&: Root.getOperand(i: Operand), CombineOpc: Opcode)) {
6754	Patterns.push_back(Elt: Pattern);
6755	Found = true;
6756	}
6757	};
6758
6759	typedef AArch64MachineCombinerPattern MCP;
6760
6761	switch (Opc) {
6762	default:
6763	break;
6764	case AArch64::ADDWrr:
6765	assert(Root.getOperand(`1`).isReg() && Root.getOperand(`2`).isReg() &&
6766	"ADDWrr does not have register operands");
6767	setFound (AArch64::MADDWrrr, `1`, AArch64::WZR, MCP::MULADDW_OP1);
6768	setFound (AArch64::MADDWrrr, `2`, AArch64::WZR, MCP::MULADDW_OP2);
6769	break;
6770	case AArch64::ADDXrr:
6771	setFound (AArch64::MADDXrrr, `1`, AArch64::XZR, MCP::MULADDX_OP1);
6772	setFound (AArch64::MADDXrrr, `2`, AArch64::XZR, MCP::MULADDX_OP2);
6773	break;
6774	case AArch64::SUBWrr:
6775	setFound (AArch64::MADDWrrr, `2`, AArch64::WZR, MCP::MULSUBW_OP2);
6776	setFound (AArch64::MADDWrrr, `1`, AArch64::WZR, MCP::MULSUBW_OP1);
6777	break;
6778	case AArch64::SUBXrr:
6779	setFound (AArch64::MADDXrrr, `2`, AArch64::XZR, MCP::MULSUBX_OP2);
6780	setFound (AArch64::MADDXrrr, `1`, AArch64::XZR, MCP::MULSUBX_OP1);
6781	break;
6782	case AArch64::ADDWri:
6783	setFound (AArch64::MADDWrrr, `1`, AArch64::WZR, MCP::MULADDWI_OP1);
6784	break;
6785	case AArch64::ADDXri:
6786	setFound (AArch64::MADDXrrr, `1`, AArch64::XZR, MCP::MULADDXI_OP1);
6787	break;
6788	case AArch64::SUBWri:
6789	setFound (AArch64::MADDWrrr, `1`, AArch64::WZR, MCP::MULSUBWI_OP1);
6790	break;
6791	case AArch64::SUBXri:
6792	setFound (AArch64::MADDXrrr, `1`, AArch64::XZR, MCP::MULSUBXI_OP1);
6793	break;
6794	case AArch64::ADDv8i8:
6795	setVFound (AArch64::MULv8i8, `1`, MCP::MULADDv8i8_OP1);
6796	setVFound (AArch64::MULv8i8, `2`, MCP::MULADDv8i8_OP2);
6797	break;
6798	case AArch64::ADDv16i8:
6799	setVFound (AArch64::MULv16i8, `1`, MCP::MULADDv16i8_OP1);
6800	setVFound (AArch64::MULv16i8, `2`, MCP::MULADDv16i8_OP2);
6801	break;
6802	case AArch64::ADDv4i16:
6803	setVFound (AArch64::MULv4i16, `1`, MCP::MULADDv4i16_OP1);
6804	setVFound (AArch64::MULv4i16, `2`, MCP::MULADDv4i16_OP2);
6805	setVFound (AArch64::MULv4i16_indexed, `1`, MCP::MULADDv4i16_indexed_OP1);
6806	setVFound (AArch64::MULv4i16_indexed, `2`, MCP::MULADDv4i16_indexed_OP2);
6807	break;
6808	case AArch64::ADDv8i16:
6809	setVFound (AArch64::MULv8i16, `1`, MCP::MULADDv8i16_OP1);
6810	setVFound (AArch64::MULv8i16, `2`, MCP::MULADDv8i16_OP2);
6811	setVFound (AArch64::MULv8i16_indexed, `1`, MCP::MULADDv8i16_indexed_OP1);
6812	setVFound (AArch64::MULv8i16_indexed, `2`, MCP::MULADDv8i16_indexed_OP2);
6813	break;
6814	case AArch64::ADDv2i32:
6815	setVFound (AArch64::MULv2i32, `1`, MCP::MULADDv2i32_OP1);
6816	setVFound (AArch64::MULv2i32, `2`, MCP::MULADDv2i32_OP2);
6817	setVFound (AArch64::MULv2i32_indexed, `1`, MCP::MULADDv2i32_indexed_OP1);
6818	setVFound (AArch64::MULv2i32_indexed, `2`, MCP::MULADDv2i32_indexed_OP2);
6819	break;
6820	case AArch64::ADDv4i32:
6821	setVFound (AArch64::MULv4i32, `1`, MCP::MULADDv4i32_OP1);
6822	setVFound (AArch64::MULv4i32, `2`, MCP::MULADDv4i32_OP2);
6823	setVFound (AArch64::MULv4i32_indexed, `1`, MCP::MULADDv4i32_indexed_OP1);
6824	setVFound (AArch64::MULv4i32_indexed, `2`, MCP::MULADDv4i32_indexed_OP2);
6825	break;
6826	case AArch64::SUBv8i8:
6827	setVFound (AArch64::MULv8i8, `1`, MCP::MULSUBv8i8_OP1);
6828	setVFound (AArch64::MULv8i8, `2`, MCP::MULSUBv8i8_OP2);
6829	break;
6830	case AArch64::SUBv16i8:
6831	setVFound (AArch64::MULv16i8, `1`, MCP::MULSUBv16i8_OP1);
6832	setVFound (AArch64::MULv16i8, `2`, MCP::MULSUBv16i8_OP2);
6833	break;
6834	case AArch64::SUBv4i16:
6835	setVFound (AArch64::MULv4i16, `1`, MCP::MULSUBv4i16_OP1);
6836	setVFound (AArch64::MULv4i16, `2`, MCP::MULSUBv4i16_OP2);
6837	setVFound (AArch64::MULv4i16_indexed, `1`, MCP::MULSUBv4i16_indexed_OP1);
6838	setVFound (AArch64::MULv4i16_indexed, `2`, MCP::MULSUBv4i16_indexed_OP2);
6839	break;
6840	case AArch64::SUBv8i16:
6841	setVFound (AArch64::MULv8i16, `1`, MCP::MULSUBv8i16_OP1);
6842	setVFound (AArch64::MULv8i16, `2`, MCP::MULSUBv8i16_OP2);
6843	setVFound (AArch64::MULv8i16_indexed, `1`, MCP::MULSUBv8i16_indexed_OP1);
6844	setVFound (AArch64::MULv8i16_indexed, `2`, MCP::MULSUBv8i16_indexed_OP2);
6845	break;
6846	case AArch64::SUBv2i32:
6847	setVFound (AArch64::MULv2i32, `1`, MCP::MULSUBv2i32_OP1);
6848	setVFound (AArch64::MULv2i32, `2`, MCP::MULSUBv2i32_OP2);
6849	setVFound (AArch64::MULv2i32_indexed, `1`, MCP::MULSUBv2i32_indexed_OP1);
6850	setVFound (AArch64::MULv2i32_indexed, `2`, MCP::MULSUBv2i32_indexed_OP2);
6851	break;
6852	case AArch64::SUBv4i32:
6853	setVFound (AArch64::MULv4i32, `1`, MCP::MULSUBv4i32_OP1);
6854	setVFound (AArch64::MULv4i32, `2`, MCP::MULSUBv4i32_OP2);
6855	setVFound (AArch64::MULv4i32_indexed, `1`, MCP::MULSUBv4i32_indexed_OP1);
6856	setVFound (AArch64::MULv4i32_indexed, `2`, MCP::MULSUBv4i32_indexed_OP2);
6857	break;
6858	}
6859	return Found;
6860	}
6861
6862	bool AArch64InstrInfo::isAccumulationOpcode(unsigned Opcode) const {
6863	switch (Opcode) {
6864	default:
6865	break;
6866	case AArch64::UABALB_ZZZ_D:
6867	case AArch64::UABALB_ZZZ_H:
6868	case AArch64::UABALB_ZZZ_S:
6869	case AArch64::UABALT_ZZZ_D:
6870	case AArch64::UABALT_ZZZ_H:
6871	case AArch64::UABALT_ZZZ_S:
6872	case AArch64::SABALB_ZZZ_D:
6873	case AArch64::SABALB_ZZZ_S:
6874	case AArch64::SABALB_ZZZ_H:
6875	case AArch64::SABALT_ZZZ_D:
6876	case AArch64::SABALT_ZZZ_S:
6877	case AArch64::SABALT_ZZZ_H:
6878	case AArch64::UABALv16i8_v8i16:
6879	case AArch64::UABALv2i32_v2i64:
6880	case AArch64::UABALv4i16_v4i32:
6881	case AArch64::UABALv4i32_v2i64:
6882	case AArch64::UABALv8i16_v4i32:
6883	case AArch64::UABALv8i8_v8i16:
6884	case AArch64::UABAv16i8:
6885	case AArch64::UABAv2i32:
6886	case AArch64::UABAv4i16:
6887	case AArch64::UABAv4i32:
6888	case AArch64::UABAv8i16:
6889	case AArch64::UABAv8i8:
6890	case AArch64::SABALv16i8_v8i16:
6891	case AArch64::SABALv2i32_v2i64:
6892	case AArch64::SABALv4i16_v4i32:
6893	case AArch64::SABALv4i32_v2i64:
6894	case AArch64::SABALv8i16_v4i32:
6895	case AArch64::SABALv8i8_v8i16:
6896	case AArch64::SABAv16i8:
6897	case AArch64::SABAv2i32:
6898	case AArch64::SABAv4i16:
6899	case AArch64::SABAv4i32:
6900	case AArch64::SABAv8i16:
6901	case AArch64::SABAv8i8:
6902	return true;
6903	}
6904
6905	return false;
6906	}
6907
6908	unsigned AArch64InstrInfo::getAccumulationStartOpcode(
6909	unsigned AccumulationOpcode) const {
6910	switch (AccumulationOpcode) {
6911	default:
6912	llvm_unreachable("Unsupported accumulation Opcode!");
6913	case AArch64::UABALB_ZZZ_D:
6914	return AArch64::UABDLB_ZZZ_D;
6915	case AArch64::UABALB_ZZZ_H:
6916	return AArch64::UABDLB_ZZZ_H;
6917	case AArch64::UABALB_ZZZ_S:
6918	return AArch64::UABDLB_ZZZ_S;
6919	case AArch64::UABALT_ZZZ_D:
6920	return AArch64::UABDLT_ZZZ_D;
6921	case AArch64::UABALT_ZZZ_H:
6922	return AArch64::UABDLT_ZZZ_H;
6923	case AArch64::UABALT_ZZZ_S:
6924	return AArch64::UABDLT_ZZZ_S;
6925	case AArch64::UABALv16i8_v8i16:
6926	return AArch64::UABDLv16i8_v8i16;
6927	case AArch64::UABALv2i32_v2i64:
6928	return AArch64::UABDLv2i32_v2i64;
6929	case AArch64::UABALv4i16_v4i32:
6930	return AArch64::UABDLv4i16_v4i32;
6931	case AArch64::UABALv4i32_v2i64:
6932	return AArch64::UABDLv4i32_v2i64;
6933	case AArch64::UABALv8i16_v4i32:
6934	return AArch64::UABDLv8i16_v4i32;
6935	case AArch64::UABALv8i8_v8i16:
6936	return AArch64::UABDLv8i8_v8i16;
6937	case AArch64::UABAv16i8:
6938	return AArch64::UABDv16i8;
6939	case AArch64::UABAv2i32:
6940	return AArch64::UABDv2i32;
6941	case AArch64::UABAv4i16:
6942	return AArch64::UABDv4i16;
6943	case AArch64::UABAv4i32:
6944	return AArch64::UABDv4i32;
6945	case AArch64::UABAv8i16:
6946	return AArch64::UABDv8i16;
6947	case AArch64::UABAv8i8:
6948	return AArch64::UABDv8i8;
6949	case AArch64::SABALB_ZZZ_D:
6950	return AArch64::SABDLB_ZZZ_D;
6951	case AArch64::SABALB_ZZZ_S:
6952	return AArch64::SABDLB_ZZZ_S;
6953	case AArch64::SABALB_ZZZ_H:
6954	return AArch64::SABDLB_ZZZ_H;
6955	case AArch64::SABALT_ZZZ_D:
6956	return AArch64::SABDLT_ZZZ_D;
6957	case AArch64::SABALT_ZZZ_S:
6958	return AArch64::SABDLT_ZZZ_S;
6959	case AArch64::SABALT_ZZZ_H:
6960	return AArch64::SABDLT_ZZZ_H;
6961	case AArch64::SABALv16i8_v8i16:
6962	return AArch64::SABDLv16i8_v8i16;
6963	case AArch64::SABALv2i32_v2i64:
6964	return AArch64::SABDLv2i32_v2i64;
6965	case AArch64::SABALv4i16_v4i32:
6966	return AArch64::SABDLv4i16_v4i32;
6967	case AArch64::SABALv4i32_v2i64:
6968	return AArch64::SABDLv4i32_v2i64;
6969	case AArch64::SABALv8i16_v4i32:
6970	return AArch64::SABDLv8i16_v4i32;
6971	case AArch64::SABALv8i8_v8i16:
6972	return AArch64::SABDLv8i8_v8i16;
6973	case AArch64::SABAv16i8:
6974	return AArch64::SABDv16i8;
6975	case AArch64::SABAv2i32:
6976	return AArch64::SABAv2i32;
6977	case AArch64::SABAv4i16:
6978	return AArch64::SABDv4i16;
6979	case AArch64::SABAv4i32:
6980	return AArch64::SABDv4i32;
6981	case AArch64::SABAv8i16:
6982	return AArch64::SABDv8i16;
6983	case AArch64::SABAv8i8:
6984	return AArch64::SABDv8i8;
6985	}
6986	}
6987
6988	/// Floating-Point Support
6989
6990	/// Find instructions that can be turned into madd.
6991	static bool getFMAPatterns(MachineInstr &Root,
6992	SmallVectorImpl<unsigned> &Patterns) {
6993
6994	if (!isCombineInstrCandidateFP(Inst: Root))
6995	return false;
6996
6997	MachineBasicBlock &MBB = *Root.getParent();
6998	bool Found = false;
6999
7000	auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
7001	if (canCombineWithFMUL(MBB, MO&: Root.getOperand(i: Operand), MulOpc: Opcode)) {
7002	Patterns.push_back(Elt: Pattern);
7003	return true;
7004	}
7005	return false;
7006	};
7007
7008	typedef AArch64MachineCombinerPattern MCP;
7009
7010	switch (Root.getOpcode()) {
7011	default:
7012	assert(false && "Unsupported FP instruction in combiner\n");
7013	break;
7014	case AArch64::FADDHrr:
7015	assert(Root.getOperand(`1`).isReg() && Root.getOperand(`2`).isReg() &&
7016	"FADDHrr does not have register operands");
7017
7018	Found = Match (AArch64::FMULHrr, `1`, MCP::FMULADDH_OP1);
7019	Found \|= Match (AArch64::FMULHrr, `2`, MCP::FMULADDH_OP2);
7020	break;
7021	case AArch64::FADDSrr:
7022	assert(Root.getOperand(`1`).isReg() && Root.getOperand(`2`).isReg() &&
7023	"FADDSrr does not have register operands");
7024
7025	Found \|= Match (AArch64::FMULSrr, `1`, MCP::FMULADDS_OP1) \|\|
7026	Match (AArch64::FMULv1i32_indexed, `1`, MCP::FMLAv1i32_indexed_OP1);
7027
7028	Found \|= Match (AArch64::FMULSrr, `2`, MCP::FMULADDS_OP2) \|\|
7029	Match (AArch64::FMULv1i32_indexed, `2`, MCP::FMLAv1i32_indexed_OP2);
7030	break;
7031	case AArch64::FADDDrr:
7032	Found \|= Match (AArch64::FMULDrr, `1`, MCP::FMULADDD_OP1) \|\|
7033	Match (AArch64::FMULv1i64_indexed, `1`, MCP::FMLAv1i64_indexed_OP1);
7034
7035	Found \|= Match (AArch64::FMULDrr, `2`, MCP::FMULADDD_OP2) \|\|
7036	Match (AArch64::FMULv1i64_indexed, `2`, MCP::FMLAv1i64_indexed_OP2);
7037	break;
7038	case AArch64::FADDv4f16:
7039	Found \|= Match (AArch64::FMULv4i16_indexed, `1`, MCP::FMLAv4i16_indexed_OP1) \|\|
7040	Match (AArch64::FMULv4f16, `1`, MCP::FMLAv4f16_OP1);
7041
7042	Found \|= Match (AArch64::FMULv4i16_indexed, `2`, MCP::FMLAv4i16_indexed_OP2) \|\|
7043	Match (AArch64::FMULv4f16, `2`, MCP::FMLAv4f16_OP2);
7044	break;
7045	case AArch64::FADDv8f16:
7046	Found \|= Match (AArch64::FMULv8i16_indexed, `1`, MCP::FMLAv8i16_indexed_OP1) \|\|
7047	Match (AArch64::FMULv8f16, `1`, MCP::FMLAv8f16_OP1);
7048
7049	Found \|= Match (AArch64::FMULv8i16_indexed, `2`, MCP::FMLAv8i16_indexed_OP2) \|\|
7050	Match (AArch64::FMULv8f16, `2`, MCP::FMLAv8f16_OP2);
7051	break;
7052	case AArch64::FADDv2f32:
7053	Found \|= Match (AArch64::FMULv2i32_indexed, `1`, MCP::FMLAv2i32_indexed_OP1) \|\|
7054	Match (AArch64::FMULv2f32, `1`, MCP::FMLAv2f32_OP1);
7055
7056	Found \|= Match (AArch64::FMULv2i32_indexed, `2`, MCP::FMLAv2i32_indexed_OP2) \|\|
7057	Match (AArch64::FMULv2f32, `2`, MCP::FMLAv2f32_OP2);
7058	break;
7059	case AArch64::FADDv2f64:
7060	Found \|= Match (AArch64::FMULv2i64_indexed, `1`, MCP::FMLAv2i64_indexed_OP1) \|\|
7061	Match (AArch64::FMULv2f64, `1`, MCP::FMLAv2f64_OP1);
7062
7063	Found \|= Match (AArch64::FMULv2i64_indexed, `2`, MCP::FMLAv2i64_indexed_OP2) \|\|
7064	Match (AArch64::FMULv2f64, `2`, MCP::FMLAv2f64_OP2);
7065	break;
7066	case AArch64::FADDv4f32:
7067	Found \|= Match (AArch64::FMULv4i32_indexed, `1`, MCP::FMLAv4i32_indexed_OP1) \|\|
7068	Match (AArch64::FMULv4f32, `1`, MCP::FMLAv4f32_OP1);
7069
7070	Found \|= Match (AArch64::FMULv4i32_indexed, `2`, MCP::FMLAv4i32_indexed_OP2) \|\|
7071	Match (AArch64::FMULv4f32, `2`, MCP::FMLAv4f32_OP2);
7072	break;
7073	case AArch64::FSUBHrr:
7074	Found = Match (AArch64::FMULHrr, `1`, MCP::FMULSUBH_OP1);
7075	Found \|= Match (AArch64::FMULHrr, `2`, MCP::FMULSUBH_OP2);
7076	Found \|= Match (AArch64::FNMULHrr, `1`, MCP::FNMULSUBH_OP1);
7077	break;
7078	case AArch64::FSUBSrr:
7079	Found = Match (AArch64::FMULSrr, `1`, MCP::FMULSUBS_OP1);
7080
7081	Found \|= Match (AArch64::FMULSrr, `2`, MCP::FMULSUBS_OP2) \|\|
7082	Match (AArch64::FMULv1i32_indexed, `2`, MCP::FMLSv1i32_indexed_OP2);
7083
7084	Found \|= Match (AArch64::FNMULSrr, `1`, MCP::FNMULSUBS_OP1);
7085	break;
7086	case AArch64::FSUBDrr:
7087	Found = Match (AArch64::FMULDrr, `1`, MCP::FMULSUBD_OP1);
7088
7089	Found \|= Match (AArch64::FMULDrr, `2`, MCP::FMULSUBD_OP2) \|\|
7090	Match (AArch64::FMULv1i64_indexed, `2`, MCP::FMLSv1i64_indexed_OP2);
7091
7092	Found \|= Match (AArch64::FNMULDrr, `1`, MCP::FNMULSUBD_OP1);
7093	break;
7094	case AArch64::FSUBv4f16:
7095	Found \|= Match (AArch64::FMULv4i16_indexed, `2`, MCP::FMLSv4i16_indexed_OP2) \|\|
7096	Match (AArch64::FMULv4f16, `2`, MCP::FMLSv4f16_OP2);
7097
7098	Found \|= Match (AArch64::FMULv4i16_indexed, `1`, MCP::FMLSv4i16_indexed_OP1) \|\|
7099	Match (AArch64::FMULv4f16, `1`, MCP::FMLSv4f16_OP1);
7100	break;
7101	case AArch64::FSUBv8f16:
7102	Found \|= Match (AArch64::FMULv8i16_indexed, `2`, MCP::FMLSv8i16_indexed_OP2) \|\|
7103	Match (AArch64::FMULv8f16, `2`, MCP::FMLSv8f16_OP2);
7104
7105	Found \|= Match (AArch64::FMULv8i16_indexed, `1`, MCP::FMLSv8i16_indexed_OP1) \|\|
7106	Match (AArch64::FMULv8f16, `1`, MCP::FMLSv8f16_OP1);
7107	break;
7108	case AArch64::FSUBv2f32:
7109	Found \|= Match (AArch64::FMULv2i32_indexed, `2`, MCP::FMLSv2i32_indexed_OP2) \|\|
7110	Match (AArch64::FMULv2f32, `2`, MCP::FMLSv2f32_OP2);
7111
7112	Found \|= Match (AArch64::FMULv2i32_indexed, `1`, MCP::FMLSv2i32_indexed_OP1) \|\|
7113	Match (AArch64::FMULv2f32, `1`, MCP::FMLSv2f32_OP1);
7114	break;
7115	case AArch64::FSUBv2f64:
7116	Found \|= Match (AArch64::FMULv2i64_indexed, `2`, MCP::FMLSv2i64_indexed_OP2) \|\|
7117	Match (AArch64::FMULv2f64, `2`, MCP::FMLSv2f64_OP2);
7118
7119	Found \|= Match (AArch64::FMULv2i64_indexed, `1`, MCP::FMLSv2i64_indexed_OP1) \|\|
7120	Match (AArch64::FMULv2f64, `1`, MCP::FMLSv2f64_OP1);
7121	break;
7122	case AArch64::FSUBv4f32:
7123	Found \|= Match (AArch64::FMULv4i32_indexed, `2`, MCP::FMLSv4i32_indexed_OP2) \|\|
7124	Match (AArch64::FMULv4f32, `2`, MCP::FMLSv4f32_OP2);
7125
7126	Found \|= Match (AArch64::FMULv4i32_indexed, `1`, MCP::FMLSv4i32_indexed_OP1) \|\|
7127	Match (AArch64::FMULv4f32, `1`, MCP::FMLSv4f32_OP1);
7128	break;
7129	}
7130	return Found;
7131	}
7132
7133	static bool getFMULPatterns(MachineInstr &Root,
7134	SmallVectorImpl<unsigned> &Patterns) {
7135	MachineBasicBlock &MBB = *Root.getParent();
7136	bool Found = false;
7137
7138	auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
7139	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7140	MachineOperand &MO = Root.getOperand(i: Operand);
7141	MachineInstr MI = nullptr*;
7142	if (MO.isReg() && MO.getReg().isVirtual())
7143	MI = MRI.getUniqueVRegDef(Reg: MO.getReg());
7144	// Ignore No-op COPYs in FMUL(COPY(DUP(..)))
7145	if (MI && MI->getOpcode() == TargetOpcode::COPY &&
7146	MI->getOperand(i: `1`).getReg().isVirtual())
7147	MI = MRI.getUniqueVRegDef(Reg: MI->getOperand(i: `1`).getReg());
7148	if (MI && MI->getOpcode() == Opcode) {
7149	Patterns.push_back(Elt: Pattern);
7150	return true;
7151	}
7152	return false;
7153	};
7154
7155	typedef AArch64MachineCombinerPattern MCP;
7156
7157	switch (Root.getOpcode()) {
7158	default:
7159	return false;
7160	case AArch64::FMULv2f32:
7161	Found = Match (AArch64::DUPv2i32lane, `1`, MCP::FMULv2i32_indexed_OP1);
7162	Found \|= Match (AArch64::DUPv2i32lane, `2`, MCP::FMULv2i32_indexed_OP2);
7163	break;
7164	case AArch64::FMULv2f64:
7165	Found = Match (AArch64::DUPv2i64lane, `1`, MCP::FMULv2i64_indexed_OP1);
7166	Found \|= Match (AArch64::DUPv2i64lane, `2`, MCP::FMULv2i64_indexed_OP2);
7167	break;
7168	case AArch64::FMULv4f16:
7169	Found = Match (AArch64::DUPv4i16lane, `1`, MCP::FMULv4i16_indexed_OP1);
7170	Found \|= Match (AArch64::DUPv4i16lane, `2`, MCP::FMULv4i16_indexed_OP2);
7171	break;
7172	case AArch64::FMULv4f32:
7173	Found = Match (AArch64::DUPv4i32lane, `1`, MCP::FMULv4i32_indexed_OP1);
7174	Found \|= Match (AArch64::DUPv4i32lane, `2`, MCP::FMULv4i32_indexed_OP2);
7175	break;
7176	case AArch64::FMULv8f16:
7177	Found = Match (AArch64::DUPv8i16lane, `1`, MCP::FMULv8i16_indexed_OP1);
7178	Found \|= Match (AArch64::DUPv8i16lane, `2`, MCP::FMULv8i16_indexed_OP2);
7179	break;
7180	}
7181
7182	return Found;
7183	}
7184
7185	static bool getFNEGPatterns(MachineInstr &Root,
7186	SmallVectorImpl<unsigned> &Patterns) {
7187	unsigned Opc = Root.getOpcode();
7188	MachineBasicBlock &MBB = *Root.getParent();
7189	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7190
7191	auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
7192	MachineOperand &MO = Root.getOperand(i: `1`);
7193	MachineInstr *MI = MRI.getUniqueVRegDef(Reg: MO.getReg());
7194	if (MI != nullptr && (MI->getOpcode() == Opcode) &&
7195	MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: `0`).getReg()) &&
7196	Root.getFlag(Flag: MachineInstr::MIFlag::FmContract) &&
7197	Root.getFlag(Flag: MachineInstr::MIFlag::FmNsz) &&
7198	MI->getFlag(Flag: MachineInstr::MIFlag::FmContract) &&
7199	MI->getFlag(Flag: MachineInstr::MIFlag::FmNsz)) {
7200	Patterns.push_back(Elt: Pattern);
7201	return true;
7202	}
7203	return false;
7204	};
7205
7206	switch (Opc) {
7207	default:
7208	break;
7209	case AArch64::FNEGDr:
7210	return Match (AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
7211	case AArch64::FNEGSr:
7212	return Match (AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
7213	}
7214
7215	return false;
7216	}
7217
7218	/// Return true when a code sequence can improve throughput. It
7219	/// should be called only for instructions in loops.
7220	/// \param Pattern - combiner pattern
7221	bool AArch64InstrInfo::isThroughputPattern(unsigned Pattern) const {
7222	switch (Pattern) {
7223	default:
7224	break;
7225	case AArch64MachineCombinerPattern::FMULADDH_OP1:
7226	case AArch64MachineCombinerPattern::FMULADDH_OP2:
7227	case AArch64MachineCombinerPattern::FMULSUBH_OP1:
7228	case AArch64MachineCombinerPattern::FMULSUBH_OP2:
7229	case AArch64MachineCombinerPattern::FMULADDS_OP1:
7230	case AArch64MachineCombinerPattern::FMULADDS_OP2:
7231	case AArch64MachineCombinerPattern::FMULSUBS_OP1:
7232	case AArch64MachineCombinerPattern::FMULSUBS_OP2:
7233	case AArch64MachineCombinerPattern::FMULADDD_OP1:
7234	case AArch64MachineCombinerPattern::FMULADDD_OP2:
7235	case AArch64MachineCombinerPattern::FMULSUBD_OP1:
7236	case AArch64MachineCombinerPattern::FMULSUBD_OP2:
7237	case AArch64MachineCombinerPattern::FNMULSUBH_OP1:
7238	case AArch64MachineCombinerPattern::FNMULSUBS_OP1:
7239	case AArch64MachineCombinerPattern::FNMULSUBD_OP1:
7240	case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP1:
7241	case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP2:
7242	case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP1:
7243	case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP2:
7244	case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP1:
7245	case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP2:
7246	case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP1:
7247	case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP2:
7248	case AArch64MachineCombinerPattern::FMLAv4f16_OP2:
7249	case AArch64MachineCombinerPattern::FMLAv4f16_OP1:
7250	case AArch64MachineCombinerPattern::FMLAv8f16_OP1:
7251	case AArch64MachineCombinerPattern::FMLAv8f16_OP2:
7252	case AArch64MachineCombinerPattern::FMLAv2f32_OP2:
7253	case AArch64MachineCombinerPattern::FMLAv2f32_OP1:
7254	case AArch64MachineCombinerPattern::FMLAv2f64_OP1:
7255	case AArch64MachineCombinerPattern::FMLAv2f64_OP2:
7256	case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1:
7257	case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2:
7258	case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1:
7259	case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2:
7260	case AArch64MachineCombinerPattern::FMLAv4f32_OP1:
7261	case AArch64MachineCombinerPattern::FMLAv4f32_OP2:
7262	case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1:
7263	case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2:
7264	case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP1:
7265	case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP2:
7266	case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP1:
7267	case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP2:
7268	case AArch64MachineCombinerPattern::FMLSv1i32_indexed_OP2:
7269	case AArch64MachineCombinerPattern::FMLSv1i64_indexed_OP2:
7270	case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2:
7271	case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2:
7272	case AArch64MachineCombinerPattern::FMLSv4f16_OP1:
7273	case AArch64MachineCombinerPattern::FMLSv4f16_OP2:
7274	case AArch64MachineCombinerPattern::FMLSv8f16_OP1:
7275	case AArch64MachineCombinerPattern::FMLSv8f16_OP2:
7276	case AArch64MachineCombinerPattern::FMLSv2f32_OP2:
7277	case AArch64MachineCombinerPattern::FMLSv2f64_OP2:
7278	case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2:
7279	case AArch64MachineCombinerPattern::FMLSv4f32_OP2:
7280	case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1:
7281	case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP2:
7282	case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1:
7283	case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP2:
7284	case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1:
7285	case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP2:
7286	case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1:
7287	case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP2:
7288	case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1:
7289	case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP2:
7290	case AArch64MachineCombinerPattern::MULADDv8i8_OP1:
7291	case AArch64MachineCombinerPattern::MULADDv8i8_OP2:
7292	case AArch64MachineCombinerPattern::MULADDv16i8_OP1:
7293	case AArch64MachineCombinerPattern::MULADDv16i8_OP2:
7294	case AArch64MachineCombinerPattern::MULADDv4i16_OP1:
7295	case AArch64MachineCombinerPattern::MULADDv4i16_OP2:
7296	case AArch64MachineCombinerPattern::MULADDv8i16_OP1:
7297	case AArch64MachineCombinerPattern::MULADDv8i16_OP2:
7298	case AArch64MachineCombinerPattern::MULADDv2i32_OP1:
7299	case AArch64MachineCombinerPattern::MULADDv2i32_OP2:
7300	case AArch64MachineCombinerPattern::MULADDv4i32_OP1:
7301	case AArch64MachineCombinerPattern::MULADDv4i32_OP2:
7302	case AArch64MachineCombinerPattern::MULSUBv8i8_OP1:
7303	case AArch64MachineCombinerPattern::MULSUBv8i8_OP2:
7304	case AArch64MachineCombinerPattern::MULSUBv16i8_OP1:
7305	case AArch64MachineCombinerPattern::MULSUBv16i8_OP2:
7306	case AArch64MachineCombinerPattern::MULSUBv4i16_OP1:
7307	case AArch64MachineCombinerPattern::MULSUBv4i16_OP2:
7308	case AArch64MachineCombinerPattern::MULSUBv8i16_OP1:
7309	case AArch64MachineCombinerPattern::MULSUBv8i16_OP2:
7310	case AArch64MachineCombinerPattern::MULSUBv2i32_OP1:
7311	case AArch64MachineCombinerPattern::MULSUBv2i32_OP2:
7312	case AArch64MachineCombinerPattern::MULSUBv4i32_OP1:
7313	case AArch64MachineCombinerPattern::MULSUBv4i32_OP2:
7314	case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP1:
7315	case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP2:
7316	case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP1:
7317	case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP2:
7318	case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP1:
7319	case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP2:
7320	case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP1:
7321	case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP2:
7322	case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
7323	case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
7324	case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
7325	case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
7326	case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
7327	case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
7328	case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
7329	case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
7330	return true;
7331	} // end switch (Pattern)
7332	return false;
7333	}
7334
7335	/// Find other MI combine patterns.
7336	static bool getMiscPatterns(MachineInstr &Root,
7337	SmallVectorImpl<unsigned> &Patterns) {
7338	// A - (B + C) ==> (A - B) - C or (A - C) - B
7339	unsigned Opc = Root.getOpcode();
7340	MachineBasicBlock &MBB = *Root.getParent();
7341
7342	switch (Opc) {
7343	case AArch64::SUBWrr:
7344	case AArch64::SUBSWrr:
7345	case AArch64::SUBXrr:
7346	case AArch64::SUBSXrr:
7347	// Found candidate root.
7348	break;
7349	default:
7350	return false;
7351	}
7352
7353	if (isCombineInstrSettingFlag(Opc) &&
7354	Root.findRegisterDefOperandIdx(Reg: AArch64::NZCV, /TRI=/nullptr, isDead: true) ==
7355	-`1`)
7356	return false;
7357
7358	if (canCombine(MBB, MO&: Root.getOperand(i: `2`), CombineOpc: AArch64::ADDWrr) \|\|
7359	canCombine(MBB, MO&: Root.getOperand(i: `2`), CombineOpc: AArch64::ADDSWrr) \|\|
7360	canCombine(MBB, MO&: Root.getOperand(i: `2`), CombineOpc: AArch64::ADDXrr) \|\|
7361	canCombine(MBB, MO&: Root.getOperand(i: `2`), CombineOpc: AArch64::ADDSXrr)) {
7362	Patterns.push_back(Elt: AArch64MachineCombinerPattern::SUBADD_OP1);
7363	Patterns.push_back(Elt: AArch64MachineCombinerPattern::SUBADD_OP2);
7364	return true;
7365	}
7366
7367	return false;
7368	}
7369
7370	CombinerObjective
7371	AArch64InstrInfo::getCombinerObjective(unsigned Pattern) const {
7372	switch (Pattern) {
7373	case AArch64MachineCombinerPattern::SUBADD_OP1:
7374	case AArch64MachineCombinerPattern::SUBADD_OP2:
7375	return CombinerObjective::MustReduceDepth;
7376	default:
7377	return TargetInstrInfo::getCombinerObjective(Pattern);
7378	}
7379	}
7380
7381	/// Return true when there is potentially a faster code sequence for an
7382	/// instruction chain ending in \p Root. All potential patterns are listed in
7383	/// the \p Pattern vector. Pattern should be sorted in priority order since the
7384	/// pattern evaluator stops checking as soon as it finds a faster sequence.
7385
7386	bool AArch64InstrInfo::getMachineCombinerPatterns(
7387	MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
7388	bool DoRegPressureReduce) const {
7389	// Integer patterns
7390	if (getMaddPatterns(Root, Patterns))
7391	return true;
7392	// Floating point patterns
7393	if (getFMULPatterns(Root, Patterns))
7394	return true;
7395	if (getFMAPatterns(Root, Patterns))
7396	return true;
7397	if (getFNEGPatterns(Root, Patterns))
7398	return true;
7399
7400	// Other patterns
7401	if (getMiscPatterns(Root, Patterns))
7402	return true;
7403
7404	return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
7405	DoRegPressureReduce);
7406	}
7407
7408	enum class FMAInstKind { Default, Indexed, Accumulator };
7409	/// genFusedMultiply - Generate fused multiply instructions.
7410	/// This function supports both integer and floating point instructions.
7411	/// A typical example:
7412	/// F\|MUL I=A,B,0
7413	/// F\|ADD R,I,C
7414	/// ==> F\|MADD R,A,B,C
7415	/// \param MF Containing MachineFunction
7416	/// \param MRI Register information
7417	/// \param TII Target information
7418	/// \param Root is the F\|ADD instruction
7419	/// \param [out] InsInstrs is a vector of machine instructions and will
7420	/// contain the generated madd instruction
7421	/// \param IdxMulOpd is index of operand in Root that is the result of
7422	/// the F\|MUL. In the example above IdxMulOpd is 1.
7423	/// \param MaddOpc the opcode fo the f\|madd instruction
7424	/// \param RC Register class of operands
7425	/// \param kind of fma instruction (addressing mode) to be generated
7426	/// \param ReplacedAddend is the result register from the instruction
7427	/// replacing the non-combined operand, if any.
7428	static MachineInstr *
7429	genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
7430	const TargetInstrInfo *TII, MachineInstr &Root,
7431	SmallVectorImpl<MachineInstr > &InsInstrs, unsigned* IdxMulOpd,
7432	unsigned MaddOpc, const TargetRegisterClass *RC,
7433	FMAInstKind kind = FMAInstKind::Default,
7434	const Register ReplacedAddend = nullptr*) {
7435	assert(IdxMulOpd == `1` \|\| IdxMulOpd == `2`);
7436
7437	unsigned IdxOtherOpd = IdxMulOpd == `1` ? `2` : `1`;
7438	MachineInstr *MUL = MRI.getUniqueVRegDef(Reg: Root.getOperand(i: IdxMulOpd).getReg());
7439	Register ResultReg = Root.getOperand(i: `0`).getReg();
7440	Register SrcReg0 = MUL->getOperand(i: `1`).getReg();
7441	bool Src0IsKill = MUL->getOperand(i: `1`).isKill();
7442	Register SrcReg1 = MUL->getOperand(i: `2`).getReg();
7443	bool Src1IsKill = MUL->getOperand(i: `2`).isKill();
7444
7445	Register SrcReg2;
7446	bool Src2IsKill;
7447	if (ReplacedAddend) {
7448	// If we just generated a new addend, we must be it's only use.
7449	SrcReg2 = *ReplacedAddend;
7450	Src2IsKill = true;
7451	} else {
7452	SrcReg2 = Root.getOperand(i: IdxOtherOpd).getReg();
7453	Src2IsKill = Root.getOperand(i: IdxOtherOpd).isKill();
7454	}
7455
7456	if (ResultReg.isVirtual())
7457	MRI.constrainRegClass(Reg: ResultReg, RC);
7458	if (SrcReg0.isVirtual())
7459	MRI.constrainRegClass(Reg: SrcReg0, RC);
7460	if (SrcReg1.isVirtual())
7461	MRI.constrainRegClass(Reg: SrcReg1, RC);
7462	if (SrcReg2.isVirtual())
7463	MRI.constrainRegClass(Reg: SrcReg2, RC);
7464
7465	MachineInstrBuilder MIB;
7466	if (kind == FMAInstKind::Default)
7467	MIB = BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: MaddOpc), DestReg: ResultReg)
7468	.addReg(RegNo: SrcReg0, flags: getKillRegState(B: Src0IsKill))
7469	.addReg(RegNo: SrcReg1, flags: getKillRegState(B: Src1IsKill))
7470	.addReg(RegNo: SrcReg2, flags: getKillRegState(B: Src2IsKill));
7471	else if (kind == FMAInstKind::Indexed)
7472	MIB = BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: MaddOpc), DestReg: ResultReg)
7473	.addReg(RegNo: SrcReg2, flags: getKillRegState(B: Src2IsKill))
7474	.addReg(RegNo: SrcReg0, flags: getKillRegState(B: Src0IsKill))
7475	.addReg(RegNo: SrcReg1, flags: getKillRegState(B: Src1IsKill))
7476	.addImm(Val: MUL->getOperand(i: `3`).getImm());
7477	else if (kind == FMAInstKind::Accumulator)
7478	MIB = BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: MaddOpc), DestReg: ResultReg)
7479	.addReg(RegNo: SrcReg2, flags: getKillRegState(B: Src2IsKill))
7480	.addReg(RegNo: SrcReg0, flags: getKillRegState(B: Src0IsKill))
7481	.addReg(RegNo: SrcReg1, flags: getKillRegState(B: Src1IsKill));
7482	else
7483	assert(false && "Invalid FMA instruction kind \n");
7484	// Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
7485	InsInstrs.push_back(Elt: MIB);
7486	return MUL;
7487	}
7488
7489	static MachineInstr *
7490	genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI,
7491	const TargetInstrInfo *TII, MachineInstr &Root,
7492	SmallVectorImpl<MachineInstr *> &InsInstrs) {
7493	MachineInstr *MAD = MRI.getUniqueVRegDef(Reg: Root.getOperand(i: `1`).getReg());
7494
7495	unsigned Opc = `0`;
7496	const TargetRegisterClass *RC = MRI.getRegClass(Reg: MAD->getOperand(i: `0`).getReg());
7497	if (AArch64::FPR32RegClass.hasSubClassEq(RC))
7498	Opc = AArch64::FNMADDSrrr;
7499	else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
7500	Opc = AArch64::FNMADDDrrr;
7501	else
7502	return nullptr;
7503
7504	Register ResultReg = Root.getOperand(i: `0`).getReg();
7505	Register SrcReg0 = MAD->getOperand(i: `1`).getReg();
7506	Register SrcReg1 = MAD->getOperand(i: `2`).getReg();
7507	Register SrcReg2 = MAD->getOperand(i: `3`).getReg();
7508	bool Src0IsKill = MAD->getOperand(i: `1`).isKill();
7509	bool Src1IsKill = MAD->getOperand(i: `2`).isKill();
7510	bool Src2IsKill = MAD->getOperand(i: `3`).isKill();
7511	if (ResultReg.isVirtual())
7512	MRI.constrainRegClass(Reg: ResultReg, RC);
7513	if (SrcReg0.isVirtual())
7514	MRI.constrainRegClass(Reg: SrcReg0, RC);
7515	if (SrcReg1.isVirtual())
7516	MRI.constrainRegClass(Reg: SrcReg1, RC);
7517	if (SrcReg2.isVirtual())
7518	MRI.constrainRegClass(Reg: SrcReg2, RC);
7519
7520	MachineInstrBuilder MIB =
7521	BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: Opc), DestReg: ResultReg)
7522	.addReg(RegNo: SrcReg0, flags: getKillRegState(B: Src0IsKill))
7523	.addReg(RegNo: SrcReg1, flags: getKillRegState(B: Src1IsKill))
7524	.addReg(RegNo: SrcReg2, flags: getKillRegState(B: Src2IsKill));
7525	InsInstrs.push_back(Elt: MIB);
7526
7527	return MAD;
7528	}
7529
7530	/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
7531	static MachineInstr *
7532	genIndexedMultiply(MachineInstr &Root,
7533	SmallVectorImpl<MachineInstr *> &InsInstrs,
7534	unsigned IdxDupOp, unsigned MulOpc,
7535	const TargetRegisterClass *RC, MachineRegisterInfo &MRI) {
7536	assert(((IdxDupOp == `1`) \|\| (IdxDupOp == `2`)) &&
7537	"Invalid index of FMUL operand");
7538
7539	MachineFunction &MF = *Root.getMF();
7540	const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
7541
7542	MachineInstr *Dup =
7543	MF.getRegInfo().getUniqueVRegDef(Reg: Root.getOperand(i: IdxDupOp).getReg());
7544
7545	if (Dup->getOpcode() == TargetOpcode::COPY)
7546	Dup = MRI.getUniqueVRegDef(Reg: Dup->getOperand(i: `1`).getReg());
7547
7548	Register DupSrcReg = Dup->getOperand(i: `1`).getReg();
7549	MRI.clearKillFlags(Reg: DupSrcReg);
7550	MRI.constrainRegClass(Reg: DupSrcReg, RC);
7551
7552	unsigned DupSrcLane = Dup->getOperand(i: `2`).getImm();
7553
7554	unsigned IdxMulOp = IdxDupOp == `1` ? `2` : `1`;
7555	MachineOperand &MulOp = Root.getOperand(i: IdxMulOp);
7556
7557	Register ResultReg = Root.getOperand(i: `0`).getReg();
7558
7559	MachineInstrBuilder MIB;
7560	MIB = BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: MulOpc), DestReg: ResultReg)
7561	.add(MO: MulOp)
7562	.addReg(RegNo: DupSrcReg)
7563	.addImm(Val: DupSrcLane);
7564
7565	InsInstrs.push_back(Elt: MIB);
7566	return &Root;
7567	}
7568
7569	/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
7570	/// instructions.
7571	///
7572	/// \see genFusedMultiply
7573	static MachineInstr *genFusedMultiplyAcc(
7574	MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
7575	MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
7576	unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
7577	return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
7578	kind: FMAInstKind::Accumulator);
7579	}
7580
7581	/// genNeg - Helper to generate an intermediate negation of the second operand
7582	/// of Root
7583	static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI,
7584	const TargetInstrInfo *TII, MachineInstr &Root,
7585	SmallVectorImpl<MachineInstr *> &InsInstrs,
7586	DenseMap<Register, unsigned> &InstrIdxForVirtReg,
7587	unsigned MnegOpc, const TargetRegisterClass *RC) {
7588	Register NewVR = MRI.createVirtualRegister(RegClass: RC);
7589	MachineInstrBuilder MIB =
7590	BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: MnegOpc), DestReg: NewVR)
7591	.add(MO: Root.getOperand(i: `2`));
7592	InsInstrs.push_back(Elt: MIB);
7593
7594	assert(InstrIdxForVirtReg.empty());
7595	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: `0`));
7596
7597	return NewVR;
7598	}
7599
7600	/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
7601	/// instructions with an additional negation of the accumulator
7602	static MachineInstr *genFusedMultiplyAccNeg(
7603	MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
7604	MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
7605	DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
7606	unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
7607	assert(IdxMulOpd == `1`);
7608
7609	Register NewVR =
7610	genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
7611	return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
7612	kind: FMAInstKind::Accumulator, ReplacedAddend: &NewVR);
7613	}
7614
7615	/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
7616	/// instructions.
7617	///
7618	/// \see genFusedMultiply
7619	static MachineInstr *genFusedMultiplyIdx(
7620	MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
7621	MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
7622	unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
7623	return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
7624	kind: FMAInstKind::Indexed);
7625	}
7626
7627	/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
7628	/// instructions with an additional negation of the accumulator
7629	static MachineInstr *genFusedMultiplyIdxNeg(
7630	MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
7631	MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
7632	DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
7633	unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
7634	assert(IdxMulOpd == `1`);
7635
7636	Register NewVR =
7637	genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
7638
7639	return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
7640	kind: FMAInstKind::Indexed, ReplacedAddend: &NewVR);
7641	}
7642
7643	/// genMaddR - Generate madd instruction and combine mul and add using
7644	/// an extra virtual register
7645	/// Example - an ADD intermediate needs to be stored in a register:
7646	/// MUL I=A,B,0
7647	/// ADD R,I,Imm
7648	/// ==> ORR V, ZR, Imm
7649	/// ==> MADD R,A,B,V
7650	/// \param MF Containing MachineFunction
7651	/// \param MRI Register information
7652	/// \param TII Target information
7653	/// \param Root is the ADD instruction
7654	/// \param [out] InsInstrs is a vector of machine instructions and will
7655	/// contain the generated madd instruction
7656	/// \param IdxMulOpd is index of operand in Root that is the result of
7657	/// the MUL. In the example above IdxMulOpd is 1.
7658	/// \param MaddOpc the opcode fo the madd instruction
7659	/// \param VR is a virtual register that holds the value of an ADD operand
7660	/// (V in the example above).
7661	/// \param RC Register class of operands
7662	static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
7663	const TargetInstrInfo *TII, MachineInstr &Root,
7664	SmallVectorImpl<MachineInstr *> &InsInstrs,
7665	unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
7666	const TargetRegisterClass *RC) {
7667	assert(IdxMulOpd == `1` \|\| IdxMulOpd == `2`);
7668
7669	MachineInstr *MUL = MRI.getUniqueVRegDef(Reg: Root.getOperand(i: IdxMulOpd).getReg());
7670	Register ResultReg = Root.getOperand(i: `0`).getReg();
7671	Register SrcReg0 = MUL->getOperand(i: `1`).getReg();
7672	bool Src0IsKill = MUL->getOperand(i: `1`).isKill();
7673	Register SrcReg1 = MUL->getOperand(i: `2`).getReg();
7674	bool Src1IsKill = MUL->getOperand(i: `2`).isKill();
7675
7676	if (ResultReg.isVirtual())
7677	MRI.constrainRegClass(Reg: ResultReg, RC);
7678	if (SrcReg0.isVirtual())
7679	MRI.constrainRegClass(Reg: SrcReg0, RC);
7680	if (SrcReg1.isVirtual())
7681	MRI.constrainRegClass(Reg: SrcReg1, RC);
7682	if (Register::isVirtualRegister(Reg: VR))
7683	MRI.constrainRegClass(Reg: VR, RC);
7684
7685	MachineInstrBuilder MIB =
7686	BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: MaddOpc), DestReg: ResultReg)
7687	.addReg(RegNo: SrcReg0, flags: getKillRegState(B: Src0IsKill))
7688	.addReg(RegNo: SrcReg1, flags: getKillRegState(B: Src1IsKill))
7689	.addReg(RegNo: VR);
7690	// Insert the MADD
7691	InsInstrs.push_back(Elt: MIB);
7692	return MUL;
7693	}
7694
7695	/// Do the following transformation
7696	/// A - (B + C) ==> (A - B) - C
7697	/// A - (B + C) ==> (A - C) - B
7698	static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI,
7699	const TargetInstrInfo *TII, MachineInstr &Root,
7700	SmallVectorImpl<MachineInstr *> &InsInstrs,
7701	SmallVectorImpl<MachineInstr *> &DelInstrs,
7702	unsigned IdxOpd1,
7703	DenseMap<Register, unsigned> &InstrIdxForVirtReg) {
7704	assert(IdxOpd1 == `1` \|\| IdxOpd1 == `2`);
7705	unsigned IdxOtherOpd = IdxOpd1 == `1` ? `2` : `1`;
7706	MachineInstr *AddMI = MRI.getUniqueVRegDef(Reg: Root.getOperand(i: `2`).getReg());
7707
7708	Register ResultReg = Root.getOperand(i: `0`).getReg();
7709	Register RegA = Root.getOperand(i: `1`).getReg();
7710	bool RegAIsKill = Root.getOperand(i: `1`).isKill();
7711	Register RegB = AddMI->getOperand(i: IdxOpd1).getReg();
7712	bool RegBIsKill = AddMI->getOperand(i: IdxOpd1).isKill();
7713	Register RegC = AddMI->getOperand(i: IdxOtherOpd).getReg();
7714	bool RegCIsKill = AddMI->getOperand(i: IdxOtherOpd).isKill();
7715	Register NewVR =
7716	MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: Root.getOperand(i: `2`).getReg()));
7717
7718	unsigned Opcode = Root.getOpcode();
7719	if (Opcode == AArch64::SUBSWrr)
7720	Opcode = AArch64::SUBWrr;
7721	else if (Opcode == AArch64::SUBSXrr)
7722	Opcode = AArch64::SUBXrr;
7723	else
7724	assert((Opcode == AArch64::SUBWrr \|\| Opcode == AArch64::SUBXrr) &&
7725	"Unexpected instruction opcode.");
7726
7727	uint32_t Flags = Root.mergeFlagsWith(Other: *AddMI);
7728	Flags &= ~MachineInstr::NoSWrap;
7729	Flags &= ~MachineInstr::NoUWrap;
7730
7731	MachineInstrBuilder MIB1 =
7732	BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode), DestReg: NewVR)
7733	.addReg(RegNo: RegA, flags: getKillRegState(B: RegAIsKill))
7734	.addReg(RegNo: RegB, flags: getKillRegState(B: RegBIsKill))
7735	.setMIFlags(Flags);
7736	MachineInstrBuilder MIB2 =
7737	BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode), DestReg: ResultReg)
7738	.addReg(RegNo: NewVR, flags: getKillRegState(B: true))
7739	.addReg(RegNo: RegC, flags: getKillRegState(B: RegCIsKill))
7740	.setMIFlags(Flags);
7741
7742	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: `0`));
7743	InsInstrs.push_back(Elt: MIB1);
7744	InsInstrs.push_back(Elt: MIB2);
7745	DelInstrs.push_back(Elt: AddMI);
7746	DelInstrs.push_back(Elt: &Root);
7747	}
7748
7749	unsigned AArch64InstrInfo::getReduceOpcodeForAccumulator(
7750	unsigned int AccumulatorOpCode) const {
7751	switch (AccumulatorOpCode) {
7752	case AArch64::UABALB_ZZZ_D:
7753	case AArch64::SABALB_ZZZ_D:
7754	case AArch64::UABALT_ZZZ_D:
7755	case AArch64::SABALT_ZZZ_D:
7756	return AArch64::ADD_ZZZ_D;
7757	case AArch64::UABALB_ZZZ_H:
7758	case AArch64::SABALB_ZZZ_H:
7759	case AArch64::UABALT_ZZZ_H:
7760	case AArch64::SABALT_ZZZ_H:
7761	return AArch64::ADD_ZZZ_H;
7762	case AArch64::UABALB_ZZZ_S:
7763	case AArch64::SABALB_ZZZ_S:
7764	case AArch64::UABALT_ZZZ_S:
7765	case AArch64::SABALT_ZZZ_S:
7766	return AArch64::ADD_ZZZ_S;
7767	case AArch64::UABALv16i8_v8i16:
7768	case AArch64::SABALv8i8_v8i16:
7769	case AArch64::SABAv8i16:
7770	case AArch64::UABAv8i16:
7771	return AArch64::ADDv8i16;
7772	case AArch64::SABALv2i32_v2i64:
7773	case AArch64::UABALv2i32_v2i64:
7774	case AArch64::SABALv4i32_v2i64:
7775	return AArch64::ADDv2i64;
7776	case AArch64::UABALv4i16_v4i32:
7777	case AArch64::SABALv4i16_v4i32:
7778	case AArch64::SABALv8i16_v4i32:
7779	case AArch64::SABAv4i32:
7780	case AArch64::UABAv4i32:
7781	return AArch64::ADDv4i32;
7782	case AArch64::UABALv4i32_v2i64:
7783	return AArch64::ADDv2i64;
7784	case AArch64::UABALv8i16_v4i32:
7785	return AArch64::ADDv4i32;
7786	case AArch64::UABALv8i8_v8i16:
7787	case AArch64::SABALv16i8_v8i16:
7788	return AArch64::ADDv8i16;
7789	case AArch64::UABAv16i8:
7790	case AArch64::SABAv16i8:
7791	return AArch64::ADDv16i8;
7792	case AArch64::UABAv4i16:
7793	case AArch64::SABAv4i16:
7794	return AArch64::ADDv4i16;
7795	case AArch64::UABAv2i32:
7796	case AArch64::SABAv2i32:
7797	return AArch64::ADDv2i32;
7798	case AArch64::UABAv8i8:
7799	case AArch64::SABAv8i8:
7800	return AArch64::ADDv8i8;
7801	default:
7802	llvm_unreachable("Unknown accumulator opcode");
7803	}
7804	}
7805
7806	/// When getMachineCombinerPatterns() finds potential patterns,
7807	/// this function generates the instructions that could replace the
7808	/// original code sequence
7809	void AArch64InstrInfo::genAlternativeCodeSequence(
7810	MachineInstr &Root, unsigned Pattern,
7811	SmallVectorImpl<MachineInstr *> &InsInstrs,
7812	SmallVectorImpl<MachineInstr *> &DelInstrs,
7813	DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
7814	MachineBasicBlock &MBB = *Root.getParent();
7815	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7816	MachineFunction &MF = *MBB.getParent();
7817	const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
7818
7819	MachineInstr MUL = nullptr*;
7820	const TargetRegisterClass *RC;
7821	unsigned Opc;
7822	switch (Pattern) {
7823	default:
7824	// Reassociate instructions.
7825	TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
7826	DelInstrs, InstIdxForVirtReg&: InstrIdxForVirtReg);
7827	return;
7828	case AArch64MachineCombinerPattern::SUBADD_OP1:
7829	// A - (B + C)
7830	// ==> (A - B) - C
7831	genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, IdxOpd1: `1`,
7832	InstrIdxForVirtReg);
7833	return;
7834	case AArch64MachineCombinerPattern::SUBADD_OP2:
7835	// A - (B + C)
7836	// ==> (A - C) - B
7837	genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, IdxOpd1: `2`,
7838	InstrIdxForVirtReg);
7839	return;
7840	case AArch64MachineCombinerPattern::MULADDW_OP1:
7841	case AArch64MachineCombinerPattern::MULADDX_OP1:
7842	// MUL I=A,B,0
7843	// ADD R,I,C
7844	// ==> MADD R,A,B,C
7845	// --- Create(MADD);
7846	if (Pattern == AArch64MachineCombinerPattern::MULADDW_OP1) {
7847	Opc = AArch64::MADDWrrr;
7848	RC = &AArch64::GPR32RegClass;
7849	} else {
7850	Opc = AArch64::MADDXrrr;
7851	RC = &AArch64::GPR64RegClass;
7852	}
7853	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
7854	break;
7855	case AArch64MachineCombinerPattern::MULADDW_OP2:
7856	case AArch64MachineCombinerPattern::MULADDX_OP2:
7857	// MUL I=A,B,0
7858	// ADD R,C,I
7859	// ==> MADD R,A,B,C
7860	// --- Create(MADD);
7861	if (Pattern == AArch64MachineCombinerPattern::MULADDW_OP2) {
7862	Opc = AArch64::MADDWrrr;
7863	RC = &AArch64::GPR32RegClass;
7864	} else {
7865	Opc = AArch64::MADDXrrr;
7866	RC = &AArch64::GPR64RegClass;
7867	}
7868	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7869	break;
7870	case AArch64MachineCombinerPattern::MULADDWI_OP1:
7871	case AArch64MachineCombinerPattern::MULADDXI_OP1: {
7872	// MUL I=A,B,0
7873	// ADD R,I,Imm
7874	// ==> MOV V, Imm
7875	// ==> MADD R,A,B,V
7876	// --- Create(MADD);
7877	const TargetRegisterClass *OrrRC;
7878	unsigned BitSize, OrrOpc, ZeroReg;
7879	if (Pattern == AArch64MachineCombinerPattern::MULADDWI_OP1) {
7880	OrrOpc = AArch64::ORRWri;
7881	OrrRC = &AArch64::GPR32spRegClass;
7882	BitSize = `32`;
7883	ZeroReg = AArch64::WZR;
7884	Opc = AArch64::MADDWrrr;
7885	RC = &AArch64::GPR32RegClass;
7886	} else {
7887	OrrOpc = AArch64::ORRXri;
7888	OrrRC = &AArch64::GPR64spRegClass;
7889	BitSize = `64`;
7890	ZeroReg = AArch64::XZR;
7891	Opc = AArch64::MADDXrrr;
7892	RC = &AArch64::GPR64RegClass;
7893	}
7894	Register NewVR = MRI.createVirtualRegister(RegClass: OrrRC);
7895	uint64_t Imm = Root.getOperand(i: `2`).getImm();
7896
7897	if (Root.getOperand(i: `3`).isImm()) {
7898	unsigned Val = Root.getOperand(i: `3`).getImm();
7899	Imm = Imm << Val;
7900	}
7901	uint64_t UImm = SignExtend64(X: Imm, B: BitSize);
7902	// The immediate can be composed via a single instruction.
7903	SmallVector<AArch64_IMM::ImmInsnModel, `4`> Insn;
7904	AArch64_IMM::expandMOVImm(Imm: UImm, BitSize, Insn);
7905	if (Insn.size() != `1`)
7906	return;
7907	auto MovI = Insn.begin();
7908	MachineInstrBuilder MIB1;
7909	// MOV is an alias for one of three instructions: movz, movn, and orr.
7910	if (MovI->Opcode == OrrOpc)
7911	MIB1 = BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: OrrOpc), DestReg: NewVR)
7912	.addReg(RegNo: ZeroReg)
7913	.addImm(Val: MovI->Op2);
7914	else {
7915	if (BitSize == `32`)
7916	assert((MovI->Opcode == AArch64::MOVNWi \|\|
7917	MovI->Opcode == AArch64::MOVZWi) &&
7918	"Expected opcode");
7919	else
7920	assert((MovI->Opcode == AArch64::MOVNXi \|\|
7921	MovI->Opcode == AArch64::MOVZXi) &&
7922	"Expected opcode");
7923	MIB1 = BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: MovI->Opcode), DestReg: NewVR)
7924	.addImm(Val: MovI->Op1)
7925	.addImm(Val: MovI->Op2);
7926	}
7927	InsInstrs.push_back(Elt: MIB1);
7928	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: `0`));
7929	MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, VR: NewVR, RC);
7930	break;
7931	}
7932	case AArch64MachineCombinerPattern::MULSUBW_OP1:
7933	case AArch64MachineCombinerPattern::MULSUBX_OP1: {
7934	// MUL I=A,B,0
7935	// SUB R,I, C
7936	// ==> SUB V, 0, C
7937	// ==> MADD R,A,B,V // = -C + AB*
7938	// --- Create(MADD);
7939	const TargetRegisterClass *SubRC;
7940	unsigned SubOpc, ZeroReg;
7941	if (Pattern == AArch64MachineCombinerPattern::MULSUBW_OP1) {
7942	SubOpc = AArch64::SUBWrr;
7943	SubRC = &AArch64::GPR32spRegClass;
7944	ZeroReg = AArch64::WZR;
7945	Opc = AArch64::MADDWrrr;
7946	RC = &AArch64::GPR32RegClass;
7947	} else {
7948	SubOpc = AArch64::SUBXrr;
7949	SubRC = &AArch64::GPR64spRegClass;
7950	ZeroReg = AArch64::XZR;
7951	Opc = AArch64::MADDXrrr;
7952	RC = &AArch64::GPR64RegClass;
7953	}
7954	Register NewVR = MRI.createVirtualRegister(RegClass: SubRC);
7955	// SUB NewVR, 0, C
7956	MachineInstrBuilder MIB1 =
7957	BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: SubOpc), DestReg: NewVR)
7958	.addReg(RegNo: ZeroReg)
7959	.add(MO: Root.getOperand(i: `2`));
7960	InsInstrs.push_back(Elt: MIB1);
7961	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: `0`));
7962	MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, VR: NewVR, RC);
7963	break;
7964	}
7965	case AArch64MachineCombinerPattern::MULSUBW_OP2:
7966	case AArch64MachineCombinerPattern::MULSUBX_OP2:
7967	// MUL I=A,B,0
7968	// SUB R,C,I
7969	// ==> MSUB R,A,B,C (computes C - AB)*
7970	// --- Create(MSUB);
7971	if (Pattern == AArch64MachineCombinerPattern::MULSUBW_OP2) {
7972	Opc = AArch64::MSUBWrrr;
7973	RC = &AArch64::GPR32RegClass;
7974	} else {
7975	Opc = AArch64::MSUBXrrr;
7976	RC = &AArch64::GPR64RegClass;
7977	}
7978	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
7979	break;
7980	case AArch64MachineCombinerPattern::MULSUBWI_OP1:
7981	case AArch64MachineCombinerPattern::MULSUBXI_OP1: {
7982	// MUL I=A,B,0
7983	// SUB R,I, Imm
7984	// ==> MOV V, -Imm
7985	// ==> MADD R,A,B,V // = -Imm + AB*
7986	// --- Create(MADD);
7987	const TargetRegisterClass *OrrRC;
7988	unsigned BitSize, OrrOpc, ZeroReg;
7989	if (Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1) {
7990	OrrOpc = AArch64::ORRWri;
7991	OrrRC = &AArch64::GPR32spRegClass;
7992	BitSize = `32`;
7993	ZeroReg = AArch64::WZR;
7994	Opc = AArch64::MADDWrrr;
7995	RC = &AArch64::GPR32RegClass;
7996	} else {
7997	OrrOpc = AArch64::ORRXri;
7998	OrrRC = &AArch64::GPR64spRegClass;
7999	BitSize = `64`;
8000	ZeroReg = AArch64::XZR;
8001	Opc = AArch64::MADDXrrr;
8002	RC = &AArch64::GPR64RegClass;
8003	}
8004	Register NewVR = MRI.createVirtualRegister(RegClass: OrrRC);
8005	uint64_t Imm = Root.getOperand(i: `2`).getImm();
8006	if (Root.getOperand(i: `3`).isImm()) {
8007	unsigned Val = Root.getOperand(i: `3`).getImm();
8008	Imm = Imm << Val;
8009	}
8010	uint64_t UImm = SignExtend64(X: -Imm, B: BitSize);
8011	// The immediate can be composed via a single instruction.
8012	SmallVector<AArch64_IMM::ImmInsnModel, `4`> Insn;
8013	AArch64_IMM::expandMOVImm(Imm: UImm, BitSize, Insn);
8014	if (Insn.size() != `1`)
8015	return;
8016	auto MovI = Insn.begin();
8017	MachineInstrBuilder MIB1;
8018	// MOV is an alias for one of three instructions: movz, movn, and orr.
8019	if (MovI->Opcode == OrrOpc)
8020	MIB1 = BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: OrrOpc), DestReg: NewVR)
8021	.addReg(RegNo: ZeroReg)
8022	.addImm(Val: MovI->Op2);
8023	else {
8024	if (BitSize == `32`)
8025	assert((MovI->Opcode == AArch64::MOVNWi \|\|
8026	MovI->Opcode == AArch64::MOVZWi) &&
8027	"Expected opcode");
8028	else
8029	assert((MovI->Opcode == AArch64::MOVNXi \|\|
8030	MovI->Opcode == AArch64::MOVZXi) &&
8031	"Expected opcode");
8032	MIB1 = BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: MovI->Opcode), DestReg: NewVR)
8033	.addImm(Val: MovI->Op1)
8034	.addImm(Val: MovI->Op2);
8035	}
8036	InsInstrs.push_back(Elt: MIB1);
8037	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: `0`));
8038	MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, VR: NewVR, RC);
8039	break;
8040	}
8041	case AArch64MachineCombinerPattern::MULADDv8i8_OP1:
8042	Opc = AArch64::MLAv8i8;
8043	RC = &AArch64::FPR64RegClass;
8044	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
8045	break;
8046	case AArch64MachineCombinerPattern::MULADDv8i8_OP2:
8047	Opc = AArch64::MLAv8i8;
8048	RC = &AArch64::FPR64RegClass;
8049	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8050	break;
8051	case AArch64MachineCombinerPattern::MULADDv16i8_OP1:
8052	Opc = AArch64::MLAv16i8;
8053	RC = &AArch64::FPR128RegClass;
8054	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
8055	break;
8056	case AArch64MachineCombinerPattern::MULADDv16i8_OP2:
8057	Opc = AArch64::MLAv16i8;
8058	RC = &AArch64::FPR128RegClass;
8059	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8060	break;
8061	case AArch64MachineCombinerPattern::MULADDv4i16_OP1:
8062	Opc = AArch64::MLAv4i16;
8063	RC = &AArch64::FPR64RegClass;
8064	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
8065	break;
8066	case AArch64MachineCombinerPattern::MULADDv4i16_OP2:
8067	Opc = AArch64::MLAv4i16;
8068	RC = &AArch64::FPR64RegClass;
8069	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8070	break;
8071	case AArch64MachineCombinerPattern::MULADDv8i16_OP1:
8072	Opc = AArch64::MLAv8i16;
8073	RC = &AArch64::FPR128RegClass;
8074	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
8075	break;
8076	case AArch64MachineCombinerPattern::MULADDv8i16_OP2:
8077	Opc = AArch64::MLAv8i16;
8078	RC = &AArch64::FPR128RegClass;
8079	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8080	break;
8081	case AArch64MachineCombinerPattern::MULADDv2i32_OP1:
8082	Opc = AArch64::MLAv2i32;
8083	RC = &AArch64::FPR64RegClass;
8084	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
8085	break;
8086	case AArch64MachineCombinerPattern::MULADDv2i32_OP2:
8087	Opc = AArch64::MLAv2i32;
8088	RC = &AArch64::FPR64RegClass;
8089	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8090	break;
8091	case AArch64MachineCombinerPattern::MULADDv4i32_OP1:
8092	Opc = AArch64::MLAv4i32;
8093	RC = &AArch64::FPR128RegClass;
8094	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
8095	break;
8096	case AArch64MachineCombinerPattern::MULADDv4i32_OP2:
8097	Opc = AArch64::MLAv4i32;
8098	RC = &AArch64::FPR128RegClass;
8099	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8100	break;
8101
8102	case AArch64MachineCombinerPattern::MULSUBv8i8_OP1:
8103	Opc = AArch64::MLAv8i8;
8104	RC = &AArch64::FPR64RegClass;
8105	MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8106	InstrIdxForVirtReg, IdxMulOpd: `1`, MaddOpc: Opc, MnegOpc: AArch64::NEGv8i8,
8107	RC);
8108	break;
8109	case AArch64MachineCombinerPattern::MULSUBv8i8_OP2:
8110	Opc = AArch64::MLSv8i8;
8111	RC = &AArch64::FPR64RegClass;
8112	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8113	break;
8114	case AArch64MachineCombinerPattern::MULSUBv16i8_OP1:
8115	Opc = AArch64::MLAv16i8;
8116	RC = &AArch64::FPR128RegClass;
8117	MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8118	InstrIdxForVirtReg, IdxMulOpd: `1`, MaddOpc: Opc, MnegOpc: AArch64::NEGv16i8,
8119	RC);
8120	break;
8121	case AArch64MachineCombinerPattern::MULSUBv16i8_OP2:
8122	Opc = AArch64::MLSv16i8;
8123	RC = &AArch64::FPR128RegClass;
8124	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8125	break;
8126	case AArch64MachineCombinerPattern::MULSUBv4i16_OP1:
8127	Opc = AArch64::MLAv4i16;
8128	RC = &AArch64::FPR64RegClass;
8129	MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8130	InstrIdxForVirtReg, IdxMulOpd: `1`, MaddOpc: Opc, MnegOpc: AArch64::NEGv4i16,
8131	RC);
8132	break;
8133	case AArch64MachineCombinerPattern::MULSUBv4i16_OP2:
8134	Opc = AArch64::MLSv4i16;
8135	RC = &AArch64::FPR64RegClass;
8136	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8137	break;
8138	case AArch64MachineCombinerPattern::MULSUBv8i16_OP1:
8139	Opc = AArch64::MLAv8i16;
8140	RC = &AArch64::FPR128RegClass;
8141	MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8142	InstrIdxForVirtReg, IdxMulOpd: `1`, MaddOpc: Opc, MnegOpc: AArch64::NEGv8i16,
8143	RC);
8144	break;
8145	case AArch64MachineCombinerPattern::MULSUBv8i16_OP2:
8146	Opc = AArch64::MLSv8i16;
8147	RC = &AArch64::FPR128RegClass;
8148	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8149	break;
8150	case AArch64MachineCombinerPattern::MULSUBv2i32_OP1:
8151	Opc = AArch64::MLAv2i32;
8152	RC = &AArch64::FPR64RegClass;
8153	MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8154	InstrIdxForVirtReg, IdxMulOpd: `1`, MaddOpc: Opc, MnegOpc: AArch64::NEGv2i32,
8155	RC);
8156	break;
8157	case AArch64MachineCombinerPattern::MULSUBv2i32_OP2:
8158	Opc = AArch64::MLSv2i32;
8159	RC = &AArch64::FPR64RegClass;
8160	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8161	break;
8162	case AArch64MachineCombinerPattern::MULSUBv4i32_OP1:
8163	Opc = AArch64::MLAv4i32;
8164	RC = &AArch64::FPR128RegClass;
8165	MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8166	InstrIdxForVirtReg, IdxMulOpd: `1`, MaddOpc: Opc, MnegOpc: AArch64::NEGv4i32,
8167	RC);
8168	break;
8169	case AArch64MachineCombinerPattern::MULSUBv4i32_OP2:
8170	Opc = AArch64::MLSv4i32;
8171	RC = &AArch64::FPR128RegClass;
8172	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8173	break;
8174
8175	case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP1:
8176	Opc = AArch64::MLAv4i16_indexed;
8177	RC = &AArch64::FPR64RegClass;
8178	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
8179	break;
8180	case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP2:
8181	Opc = AArch64::MLAv4i16_indexed;
8182	RC = &AArch64::FPR64RegClass;
8183	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8184	break;
8185	case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP1:
8186	Opc = AArch64::MLAv8i16_indexed;
8187	RC = &AArch64::FPR128RegClass;
8188	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
8189	break;
8190	case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP2:
8191	Opc = AArch64::MLAv8i16_indexed;
8192	RC = &AArch64::FPR128RegClass;
8193	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8194	break;
8195	case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP1:
8196	Opc = AArch64::MLAv2i32_indexed;
8197	RC = &AArch64::FPR64RegClass;
8198	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
8199	break;
8200	case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP2:
8201	Opc = AArch64::MLAv2i32_indexed;
8202	RC = &AArch64::FPR64RegClass;
8203	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8204	break;
8205	case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP1:
8206	Opc = AArch64::MLAv4i32_indexed;
8207	RC = &AArch64::FPR128RegClass;
8208	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
8209	break;
8210	case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP2:
8211	Opc = AArch64::MLAv4i32_indexed;
8212	RC = &AArch64::FPR128RegClass;
8213	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8214	break;
8215
8216	case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
8217	Opc = AArch64::MLAv4i16_indexed;
8218	RC = &AArch64::FPR64RegClass;
8219	MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8220	InstrIdxForVirtReg, IdxMulOpd: `1`, MaddOpc: Opc, MnegOpc: AArch64::NEGv4i16,
8221	RC);
8222	break;
8223	case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
8224	Opc = AArch64::MLSv4i16_indexed;
8225	RC = &AArch64::FPR64RegClass;
8226	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8227	break;
8228	case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
8229	Opc = AArch64::MLAv8i16_indexed;
8230	RC = &AArch64::FPR128RegClass;
8231	MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8232	InstrIdxForVirtReg, IdxMulOpd: `1`, MaddOpc: Opc, MnegOpc: AArch64::NEGv8i16,
8233	RC);
8234	break;
8235	case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
8236	Opc = AArch64::MLSv8i16_indexed;
8237	RC = &AArch64::FPR128RegClass;
8238	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8239	break;
8240	case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
8241	Opc = AArch64::MLAv2i32_indexed;
8242	RC = &AArch64::FPR64RegClass;
8243	MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8244	InstrIdxForVirtReg, IdxMulOpd: `1`, MaddOpc: Opc, MnegOpc: AArch64::NEGv2i32,
8245	RC);
8246	break;
8247	case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
8248	Opc = AArch64::MLSv2i32_indexed;
8249	RC = &AArch64::FPR64RegClass;
8250	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8251	break;
8252	case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
8253	Opc = AArch64::MLAv4i32_indexed;
8254	RC = &AArch64::FPR128RegClass;
8255	MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8256	InstrIdxForVirtReg, IdxMulOpd: `1`, MaddOpc: Opc, MnegOpc: AArch64::NEGv4i32,
8257	RC);
8258	break;
8259	case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
8260	Opc = AArch64::MLSv4i32_indexed;
8261	RC = &AArch64::FPR128RegClass;
8262	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8263	break;
8264
8265	// Floating Point Support
8266	case AArch64MachineCombinerPattern::FMULADDH_OP1:
8267	Opc = AArch64::FMADDHrrr;
8268	RC = &AArch64::FPR16RegClass;
8269	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
8270	break;
8271	case AArch64MachineCombinerPattern::FMULADDS_OP1:
8272	Opc = AArch64::FMADDSrrr;
8273	RC = &AArch64::FPR32RegClass;
8274	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
8275	break;
8276	case AArch64MachineCombinerPattern::FMULADDD_OP1:
8277	Opc = AArch64::FMADDDrrr;
8278	RC = &AArch64::FPR64RegClass;
8279	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
8280	break;
8281
8282	case AArch64MachineCombinerPattern::FMULADDH_OP2:
8283	Opc = AArch64::FMADDHrrr;
8284	RC = &AArch64::FPR16RegClass;
8285	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8286	break;
8287	case AArch64MachineCombinerPattern::FMULADDS_OP2:
8288	Opc = AArch64::FMADDSrrr;
8289	RC = &AArch64::FPR32RegClass;
8290	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8291	break;
8292	case AArch64MachineCombinerPattern::FMULADDD_OP2:
8293	Opc = AArch64::FMADDDrrr;
8294	RC = &AArch64::FPR64RegClass;
8295	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8296	break;
8297
8298	case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP1:
8299	Opc = AArch64::FMLAv1i32_indexed;
8300	RC = &AArch64::FPR32RegClass;
8301	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8302	kind: FMAInstKind::Indexed);
8303	break;
8304	case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP2:
8305	Opc = AArch64::FMLAv1i32_indexed;
8306	RC = &AArch64::FPR32RegClass;
8307	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8308	kind: FMAInstKind::Indexed);
8309	break;
8310
8311	case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP1:
8312	Opc = AArch64::FMLAv1i64_indexed;
8313	RC = &AArch64::FPR64RegClass;
8314	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8315	kind: FMAInstKind::Indexed);
8316	break;
8317	case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP2:
8318	Opc = AArch64::FMLAv1i64_indexed;
8319	RC = &AArch64::FPR64RegClass;
8320	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8321	kind: FMAInstKind::Indexed);
8322	break;
8323
8324	case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP1:
8325	RC = &AArch64::FPR64RegClass;
8326	Opc = AArch64::FMLAv4i16_indexed;
8327	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8328	kind: FMAInstKind::Indexed);
8329	break;
8330	case AArch64MachineCombinerPattern::FMLAv4f16_OP1:
8331	RC = &AArch64::FPR64RegClass;
8332	Opc = AArch64::FMLAv4f16;
8333	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8334	kind: FMAInstKind::Accumulator);
8335	break;
8336	case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP2:
8337	RC = &AArch64::FPR64RegClass;
8338	Opc = AArch64::FMLAv4i16_indexed;
8339	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8340	kind: FMAInstKind::Indexed);
8341	break;
8342	case AArch64MachineCombinerPattern::FMLAv4f16_OP2:
8343	RC = &AArch64::FPR64RegClass;
8344	Opc = AArch64::FMLAv4f16;
8345	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8346	kind: FMAInstKind::Accumulator);
8347	break;
8348
8349	case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1:
8350	case AArch64MachineCombinerPattern::FMLAv2f32_OP1:
8351	RC = &AArch64::FPR64RegClass;
8352	if (Pattern == AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
8353	Opc = AArch64::FMLAv2i32_indexed;
8354	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8355	kind: FMAInstKind::Indexed);
8356	} else {
8357	Opc = AArch64::FMLAv2f32;
8358	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8359	kind: FMAInstKind::Accumulator);
8360	}
8361	break;
8362	case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2:
8363	case AArch64MachineCombinerPattern::FMLAv2f32_OP2:
8364	RC = &AArch64::FPR64RegClass;
8365	if (Pattern == AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
8366	Opc = AArch64::FMLAv2i32_indexed;
8367	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8368	kind: FMAInstKind::Indexed);
8369	} else {
8370	Opc = AArch64::FMLAv2f32;
8371	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8372	kind: FMAInstKind::Accumulator);
8373	}
8374	break;
8375
8376	case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP1:
8377	RC = &AArch64::FPR128RegClass;
8378	Opc = AArch64::FMLAv8i16_indexed;
8379	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8380	kind: FMAInstKind::Indexed);
8381	break;
8382	case AArch64MachineCombinerPattern::FMLAv8f16_OP1:
8383	RC = &AArch64::FPR128RegClass;
8384	Opc = AArch64::FMLAv8f16;
8385	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8386	kind: FMAInstKind::Accumulator);
8387	break;
8388	case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP2:
8389	RC = &AArch64::FPR128RegClass;
8390	Opc = AArch64::FMLAv8i16_indexed;
8391	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8392	kind: FMAInstKind::Indexed);
8393	break;
8394	case AArch64MachineCombinerPattern::FMLAv8f16_OP2:
8395	RC = &AArch64::FPR128RegClass;
8396	Opc = AArch64::FMLAv8f16;
8397	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8398	kind: FMAInstKind::Accumulator);
8399	break;
8400
8401	case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1:
8402	case AArch64MachineCombinerPattern::FMLAv2f64_OP1:
8403	RC = &AArch64::FPR128RegClass;
8404	if (Pattern == AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
8405	Opc = AArch64::FMLAv2i64_indexed;
8406	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8407	kind: FMAInstKind::Indexed);
8408	} else {
8409	Opc = AArch64::FMLAv2f64;
8410	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8411	kind: FMAInstKind::Accumulator);
8412	}
8413	break;
8414	case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2:
8415	case AArch64MachineCombinerPattern::FMLAv2f64_OP2:
8416	RC = &AArch64::FPR128RegClass;
8417	if (Pattern == AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
8418	Opc = AArch64::FMLAv2i64_indexed;
8419	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8420	kind: FMAInstKind::Indexed);
8421	} else {
8422	Opc = AArch64::FMLAv2f64;
8423	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8424	kind: FMAInstKind::Accumulator);
8425	}
8426	break;
8427
8428	case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1:
8429	case AArch64MachineCombinerPattern::FMLAv4f32_OP1:
8430	RC = &AArch64::FPR128RegClass;
8431	if (Pattern == AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
8432	Opc = AArch64::FMLAv4i32_indexed;
8433	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8434	kind: FMAInstKind::Indexed);
8435	} else {
8436	Opc = AArch64::FMLAv4f32;
8437	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8438	kind: FMAInstKind::Accumulator);
8439	}
8440	break;
8441
8442	case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2:
8443	case AArch64MachineCombinerPattern::FMLAv4f32_OP2:
8444	RC = &AArch64::FPR128RegClass;
8445	if (Pattern == AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
8446	Opc = AArch64::FMLAv4i32_indexed;
8447	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8448	kind: FMAInstKind::Indexed);
8449	} else {
8450	Opc = AArch64::FMLAv4f32;
8451	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8452	kind: FMAInstKind::Accumulator);
8453	}
8454	break;
8455
8456	case AArch64MachineCombinerPattern::FMULSUBH_OP1:
8457	Opc = AArch64::FNMSUBHrrr;
8458	RC = &AArch64::FPR16RegClass;
8459	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
8460	break;
8461	case AArch64MachineCombinerPattern::FMULSUBS_OP1:
8462	Opc = AArch64::FNMSUBSrrr;
8463	RC = &AArch64::FPR32RegClass;
8464	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
8465	break;
8466	case AArch64MachineCombinerPattern::FMULSUBD_OP1:
8467	Opc = AArch64::FNMSUBDrrr;
8468	RC = &AArch64::FPR64RegClass;
8469	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
8470	break;
8471
8472	case AArch64MachineCombinerPattern::FNMULSUBH_OP1:
8473	Opc = AArch64::FNMADDHrrr;
8474	RC = &AArch64::FPR16RegClass;
8475	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
8476	break;
8477	case AArch64MachineCombinerPattern::FNMULSUBS_OP1:
8478	Opc = AArch64::FNMADDSrrr;
8479	RC = &AArch64::FPR32RegClass;
8480	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
8481	break;
8482	case AArch64MachineCombinerPattern::FNMULSUBD_OP1:
8483	Opc = AArch64::FNMADDDrrr;
8484	RC = &AArch64::FPR64RegClass;
8485	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC);
8486	break;
8487
8488	case AArch64MachineCombinerPattern::FMULSUBH_OP2:
8489	Opc = AArch64::FMSUBHrrr;
8490	RC = &AArch64::FPR16RegClass;
8491	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8492	break;
8493	case AArch64MachineCombinerPattern::FMULSUBS_OP2:
8494	Opc = AArch64::FMSUBSrrr;
8495	RC = &AArch64::FPR32RegClass;
8496	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8497	break;
8498	case AArch64MachineCombinerPattern::FMULSUBD_OP2:
8499	Opc = AArch64::FMSUBDrrr;
8500	RC = &AArch64::FPR64RegClass;
8501	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC);
8502	break;
8503
8504	case AArch64MachineCombinerPattern::FMLSv1i32_indexed_OP2:
8505	Opc = AArch64::FMLSv1i32_indexed;
8506	RC = &AArch64::FPR32RegClass;
8507	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8508	kind: FMAInstKind::Indexed);
8509	break;
8510
8511	case AArch64MachineCombinerPattern::FMLSv1i64_indexed_OP2:
8512	Opc = AArch64::FMLSv1i64_indexed;
8513	RC = &AArch64::FPR64RegClass;
8514	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8515	kind: FMAInstKind::Indexed);
8516	break;
8517
8518	case AArch64MachineCombinerPattern::FMLSv4f16_OP1:
8519	case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP1: {
8520	RC = &AArch64::FPR64RegClass;
8521	Register NewVR = MRI.createVirtualRegister(RegClass: RC);
8522	MachineInstrBuilder MIB1 =
8523	BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: AArch64::FNEGv4f16), DestReg: NewVR)
8524	.add(MO: Root.getOperand(i: `2`));
8525	InsInstrs.push_back(Elt: MIB1);
8526	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: `0`));
8527	if (Pattern == AArch64MachineCombinerPattern::FMLSv4f16_OP1) {
8528	Opc = AArch64::FMLAv4f16;
8529	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8530	kind: FMAInstKind::Accumulator, ReplacedAddend: &NewVR);
8531	} else {
8532	Opc = AArch64::FMLAv4i16_indexed;
8533	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8534	kind: FMAInstKind::Indexed, ReplacedAddend: &NewVR);
8535	}
8536	break;
8537	}
8538	case AArch64MachineCombinerPattern::FMLSv4f16_OP2:
8539	RC = &AArch64::FPR64RegClass;
8540	Opc = AArch64::FMLSv4f16;
8541	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8542	kind: FMAInstKind::Accumulator);
8543	break;
8544	case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP2:
8545	RC = &AArch64::FPR64RegClass;
8546	Opc = AArch64::FMLSv4i16_indexed;
8547	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8548	kind: FMAInstKind::Indexed);
8549	break;
8550
8551	case AArch64MachineCombinerPattern::FMLSv2f32_OP2:
8552	case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2:
8553	RC = &AArch64::FPR64RegClass;
8554	if (Pattern == AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
8555	Opc = AArch64::FMLSv2i32_indexed;
8556	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8557	kind: FMAInstKind::Indexed);
8558	} else {
8559	Opc = AArch64::FMLSv2f32;
8560	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8561	kind: FMAInstKind::Accumulator);
8562	}
8563	break;
8564
8565	case AArch64MachineCombinerPattern::FMLSv8f16_OP1:
8566	case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP1: {
8567	RC = &AArch64::FPR128RegClass;
8568	Register NewVR = MRI.createVirtualRegister(RegClass: RC);
8569	MachineInstrBuilder MIB1 =
8570	BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: AArch64::FNEGv8f16), DestReg: NewVR)
8571	.add(MO: Root.getOperand(i: `2`));
8572	InsInstrs.push_back(Elt: MIB1);
8573	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: `0`));
8574	if (Pattern == AArch64MachineCombinerPattern::FMLSv8f16_OP1) {
8575	Opc = AArch64::FMLAv8f16;
8576	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8577	kind: FMAInstKind::Accumulator, ReplacedAddend: &NewVR);
8578	} else {
8579	Opc = AArch64::FMLAv8i16_indexed;
8580	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8581	kind: FMAInstKind::Indexed, ReplacedAddend: &NewVR);
8582	}
8583	break;
8584	}
8585	case AArch64MachineCombinerPattern::FMLSv8f16_OP2:
8586	RC = &AArch64::FPR128RegClass;
8587	Opc = AArch64::FMLSv8f16;
8588	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8589	kind: FMAInstKind::Accumulator);
8590	break;
8591	case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP2:
8592	RC = &AArch64::FPR128RegClass;
8593	Opc = AArch64::FMLSv8i16_indexed;
8594	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8595	kind: FMAInstKind::Indexed);
8596	break;
8597
8598	case AArch64MachineCombinerPattern::FMLSv2f64_OP2:
8599	case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2:
8600	RC = &AArch64::FPR128RegClass;
8601	if (Pattern == AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
8602	Opc = AArch64::FMLSv2i64_indexed;
8603	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8604	kind: FMAInstKind::Indexed);
8605	} else {
8606	Opc = AArch64::FMLSv2f64;
8607	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8608	kind: FMAInstKind::Accumulator);
8609	}
8610	break;
8611
8612	case AArch64MachineCombinerPattern::FMLSv4f32_OP2:
8613	case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2:
8614	RC = &AArch64::FPR128RegClass;
8615	if (Pattern == AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
8616	Opc = AArch64::FMLSv4i32_indexed;
8617	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8618	kind: FMAInstKind::Indexed);
8619	} else {
8620	Opc = AArch64::FMLSv4f32;
8621	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `2`, MaddOpc: Opc, RC,
8622	kind: FMAInstKind::Accumulator);
8623	}
8624	break;
8625	case AArch64MachineCombinerPattern::FMLSv2f32_OP1:
8626	case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
8627	RC = &AArch64::FPR64RegClass;
8628	Register NewVR = MRI.createVirtualRegister(RegClass: RC);
8629	MachineInstrBuilder MIB1 =
8630	BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: AArch64::FNEGv2f32), DestReg: NewVR)
8631	.add(MO: Root.getOperand(i: `2`));
8632	InsInstrs.push_back(Elt: MIB1);
8633	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: `0`));
8634	if (Pattern == AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
8635	Opc = AArch64::FMLAv2i32_indexed;
8636	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8637	kind: FMAInstKind::Indexed, ReplacedAddend: &NewVR);
8638	} else {
8639	Opc = AArch64::FMLAv2f32;
8640	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8641	kind: FMAInstKind::Accumulator, ReplacedAddend: &NewVR);
8642	}
8643	break;
8644	}
8645	case AArch64MachineCombinerPattern::FMLSv4f32_OP1:
8646	case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
8647	RC = &AArch64::FPR128RegClass;
8648	Register NewVR = MRI.createVirtualRegister(RegClass: RC);
8649	MachineInstrBuilder MIB1 =
8650	BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: AArch64::FNEGv4f32), DestReg: NewVR)
8651	.add(MO: Root.getOperand(i: `2`));
8652	InsInstrs.push_back(Elt: MIB1);
8653	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: `0`));
8654	if (Pattern == AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
8655	Opc = AArch64::FMLAv4i32_indexed;
8656	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8657	kind: FMAInstKind::Indexed, ReplacedAddend: &NewVR);
8658	} else {
8659	Opc = AArch64::FMLAv4f32;
8660	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8661	kind: FMAInstKind::Accumulator, ReplacedAddend: &NewVR);
8662	}
8663	break;
8664	}
8665	case AArch64MachineCombinerPattern::FMLSv2f64_OP1:
8666	case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
8667	RC = &AArch64::FPR128RegClass;
8668	Register NewVR = MRI.createVirtualRegister(RegClass: RC);
8669	MachineInstrBuilder MIB1 =
8670	BuildMI(MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: AArch64::FNEGv2f64), DestReg: NewVR)
8671	.add(MO: Root.getOperand(i: `2`));
8672	InsInstrs.push_back(Elt: MIB1);
8673	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: `0`));
8674	if (Pattern == AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
8675	Opc = AArch64::FMLAv2i64_indexed;
8676	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8677	kind: FMAInstKind::Indexed, ReplacedAddend: &NewVR);
8678	} else {
8679	Opc = AArch64::FMLAv2f64;
8680	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd: `1`, MaddOpc: Opc, RC,
8681	kind: FMAInstKind::Accumulator, ReplacedAddend: &NewVR);
8682	}
8683	break;
8684	}
8685	case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1:
8686	case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP2: {
8687	unsigned IdxDupOp =
8688	(Pattern == AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1) ? `1`
8689	: `2`;
8690	genIndexedMultiply(Root, InsInstrs, IdxDupOp, MulOpc: AArch64::FMULv2i32_indexed,
8691	RC: &AArch64::FPR128RegClass, MRI);
8692	break;
8693	}
8694	case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1:
8695	case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP2: {
8696	unsigned IdxDupOp =
8697	(Pattern == AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1) ? `1`
8698	: `2`;
8699	genIndexedMultiply(Root, InsInstrs, IdxDupOp, MulOpc: AArch64::FMULv2i64_indexed,
8700	RC: &AArch64::FPR128RegClass, MRI);
8701	break;
8702	}
8703	case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1:
8704	case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP2: {
8705	unsigned IdxDupOp =
8706	(Pattern == AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1) ? `1`
8707	: `2`;
8708	genIndexedMultiply(Root, InsInstrs, IdxDupOp, MulOpc: AArch64::FMULv4i16_indexed,
8709	RC: &AArch64::FPR128_loRegClass, MRI);
8710	break;
8711	}
8712	case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1:
8713	case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP2: {
8714	unsigned IdxDupOp =
8715	(Pattern == AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1) ? `1`
8716	: `2`;
8717	genIndexedMultiply(Root, InsInstrs, IdxDupOp, MulOpc: AArch64::FMULv4i32_indexed,
8718	RC: &AArch64::FPR128RegClass, MRI);
8719	break;
8720	}
8721	case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1:
8722	case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP2: {
8723	unsigned IdxDupOp =
8724	(Pattern == AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1) ? `1`
8725	: `2`;
8726	genIndexedMultiply(Root, InsInstrs, IdxDupOp, MulOpc: AArch64::FMULv8i16_indexed,
8727	RC: &AArch64::FPR128_loRegClass, MRI);
8728	break;
8729	}
8730	case AArch64MachineCombinerPattern::FNMADD: {
8731	MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
8732	break;
8733	}
8734
8735	} // end switch (Pattern)
8736	// Record MUL and ADD/SUB for deletion
8737	if (MUL)
8738	DelInstrs.push_back(Elt: MUL);
8739	DelInstrs.push_back(Elt: &Root);
8740
8741	// Set the flags on the inserted instructions to be the merged flags of the
8742	// instructions that we have combined.
8743	uint32_t Flags = Root.getFlags();
8744	if (MUL)
8745	Flags = Root.mergeFlagsWith(Other: *MUL);
8746	for (auto *MI : InsInstrs)
8747	MI->setFlags(Flags);
8748	}
8749
8750	/// Replace csincr-branch sequence by simple conditional branch
8751	///
8752	/// Examples:
8753	/// 1. \code
8754	/// csinc w9, wzr, wzr, <condition code>
8755	/// tbnz w9, #0, 0x44
8756	/// \endcode
8757	/// to
8758	/// \code
8759	/// b.<inverted condition code>
8760	/// \endcode
8761	///
8762	/// 2. \code
8763	/// csinc w9, wzr, wzr, <condition code>
8764	/// tbz w9, #0, 0x44
8765	/// \endcode
8766	/// to
8767	/// \code
8768	/// b.<condition code>
8769	/// \endcode
8770	///
8771	/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
8772	/// compare's constant operand is power of 2.
8773	///
8774	/// Examples:
8775	/// \code
8776	/// and w8, w8, #0x400
8777	/// cbnz w8, L1
8778	/// \endcode
8779	/// to
8780	/// \code
8781	/// tbnz w8, #10, L1
8782	/// \endcode
8783	///
8784	/// \param MI Conditional Branch
8785	/// \return True when the simple conditional branch is generated
8786	///
8787	bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
8788	bool IsNegativeBranch = false;
8789	bool IsTestAndBranch = false;
8790	unsigned TargetBBInMI = `0`;
8791	switch (MI.getOpcode()) {
8792	default:
8793	llvm_unreachable("Unknown branch instruction?");
8794	case AArch64::Bcc:
8795	case AArch64::CBWPri:
8796	case AArch64::CBXPri:
8797	case AArch64::CBWPrr:
8798	case AArch64::CBXPrr:
8799	return false;
8800	case AArch64::CBZW:
8801	case AArch64::CBZX:
8802	TargetBBInMI = `1`;
8803	break;
8804	case AArch64::CBNZW:
8805	case AArch64::CBNZX:
8806	TargetBBInMI = `1`;
8807	IsNegativeBranch = true;
8808	break;
8809	case AArch64::TBZW:
8810	case AArch64::TBZX:
8811	TargetBBInMI = `2`;
8812	IsTestAndBranch = true;
8813	break;
8814	case AArch64::TBNZW:
8815	case AArch64::TBNZX:
8816	TargetBBInMI = `2`;
8817	IsNegativeBranch = true;
8818	IsTestAndBranch = true;
8819	break;
8820	}
8821	// So we increment a zero register and test for bits other
8822	// than bit 0? Conservatively bail out in case the verifier
8823	// missed this case.
8824	if (IsTestAndBranch && MI.getOperand(i: `1`).getImm())
8825	return false;
8826
8827	// Find Definition.
8828	assert(MI.getParent() && "Incomplete machine instruction\n");
8829	MachineBasicBlock *MBB = MI.getParent();
8830	MachineFunction *MF = MBB->getParent();
8831	MachineRegisterInfo *MRI = &MF->getRegInfo();
8832	Register VReg = MI.getOperand(i: `0`).getReg();
8833	if (!VReg.isVirtual())
8834	return false;
8835
8836	MachineInstr *DefMI = MRI->getVRegDef(Reg: VReg);
8837
8838	// Look through COPY instructions to find definition.
8839	while (DefMI->isCopy()) {
8840	Register CopyVReg = DefMI->getOperand(i: `1`).getReg();
8841	if (!MRI->hasOneNonDBGUse(RegNo: CopyVReg))
8842	return false;
8843	if (!MRI->hasOneDef(RegNo: CopyVReg))
8844	return false;
8845	DefMI = MRI->getVRegDef(Reg: CopyVReg);
8846	}
8847
8848	switch (DefMI->getOpcode()) {
8849	default:
8850	return false;
8851	// Fold AND into a TBZ/TBNZ if constant operand is power of 2.
8852	case AArch64::ANDWri:
8853	case AArch64::ANDXri: {
8854	if (IsTestAndBranch)
8855	return false;
8856	if (DefMI->getParent() != MBB)
8857	return false;
8858	if (!MRI->hasOneNonDBGUse(RegNo: VReg))
8859	return false;
8860
8861	bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
8862	uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
8863	val: DefMI->getOperand(i: `2`).getImm(), regSize: Is32Bit ? `32` : `64`);
8864	if (!isPowerOf2_64(Value: Mask))
8865	return false;
8866
8867	MachineOperand &MO = DefMI->getOperand(i: `1`);
8868	Register NewReg = MO.getReg();
8869	if (!NewReg.isVirtual())
8870	return false;
8871
8872	assert(!MRI->def_empty(NewReg) && "Register must be defined.");
8873
8874	MachineBasicBlock &RefToMBB = *MBB;
8875	MachineBasicBlock *TBB = MI.getOperand(i: `1`).getMBB();
8876	DebugLoc DL = MI.getDebugLoc();
8877	unsigned Imm = Log2_64(Value: Mask);
8878	unsigned Opc = (Imm < `32`)
8879	? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
8880	: (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
8881	MachineInstr *NewMI = BuildMI(BB&: RefToMBB, I&: MI, MIMD: DL, MCID: get(Opcode: Opc))
8882	.addReg(RegNo: NewReg)
8883	.addImm(Val: Imm)
8884	.addMBB(MBB: TBB);
8885	// Register lives on to the CBZ now.
8886	MO.setIsKill(false);
8887
8888	// For immediate smaller than 32, we need to use the 32-bit
8889	// variant (W) in all cases. Indeed the 64-bit variant does not
8890	// allow to encode them.
8891	// Therefore, if the input register is 64-bit, we need to take the
8892	// 32-bit sub-part.
8893	if (!Is32Bit && Imm < `32`)
8894	NewMI->getOperand(i: `0`).setSubReg(AArch64::sub_32);
8895	MI.eraseFromParent();
8896	return true;
8897	}
8898	// Look for CSINC
8899	case AArch64::CSINCWr:
8900	case AArch64::CSINCXr: {
8901	if (!(DefMI->getOperand(i: `1`).getReg() == AArch64::WZR &&
8902	DefMI->getOperand(i: `2`).getReg() == AArch64::WZR) &&
8903	!(DefMI->getOperand(i: `1`).getReg() == AArch64::XZR &&
8904	DefMI->getOperand(i: `2`).getReg() == AArch64::XZR))
8905	return false;
8906
8907	if (DefMI->findRegisterDefOperandIdx(Reg: AArch64::NZCV, /TRI=/nullptr,
8908	isDead: true) != -`1`)
8909	return false;
8910
8911	AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(i: `3`).getImm();
8912	// Convert only when the condition code is not modified between
8913	// the CSINC and the branch. The CC may be used by other
8914	// instructions in between.
8915	if (areCFlagsAccessedBetweenInstrs(From: DefMI, To: MI, TRI: &getRegisterInfo(), AccessToCheck: AK_Write))
8916	return false;
8917	MachineBasicBlock &RefToMBB = *MBB;
8918	MachineBasicBlock *TBB = MI.getOperand(i: TargetBBInMI).getMBB();
8919	DebugLoc DL = MI.getDebugLoc();
8920	if (IsNegativeBranch)
8921	CC = AArch64CC::getInvertedCondCode(Code: CC);
8922	BuildMI(BB&: RefToMBB, I&: MI, MIMD: DL, MCID: get(Opcode: AArch64::Bcc)).addImm(Val: CC).addMBB(MBB: TBB);
8923	MI.eraseFromParent();
8924	return true;
8925	}
8926	}
8927	}
8928
8929	std::pair<unsigned, unsigned>
8930	AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
8931	const unsigned Mask = AArch64II::MO_FRAGMENT;
8932	return std::make_pair(x: TF & Mask, y: TF & ~Mask);
8933	}
8934
8935	ArrayRef<std::pair<unsigned, const char *>>
8936	AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
8937	using namespace AArch64II;
8938
8939	static const std::pair<unsigned, const char *> TargetFlags[] = {
8940	{MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
8941	{MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
8942	{MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
8943	{MO_HI12, "aarch64-hi12"}};
8944	return ArrayRef(TargetFlags);
8945	}
8946
8947	ArrayRef<std::pair<unsigned, const char *>>
8948	AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
8949	using namespace AArch64II;
8950
8951	static const std::pair<unsigned, const char *> TargetFlags[] = {
8952	{MO_COFFSTUB, "aarch64-coffstub"},
8953	{MO_GOT, "aarch64-got"},
8954	{MO_NC, "aarch64-nc"},
8955	{MO_S, "aarch64-s"},
8956	{MO_TLS, "aarch64-tls"},
8957	{MO_DLLIMPORT, "aarch64-dllimport"},
8958	{MO_PREL, "aarch64-prel"},
8959	{MO_TAGGED, "aarch64-tagged"},
8960	{MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
8961	};
8962	return ArrayRef(TargetFlags);
8963	}
8964
8965	ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
8966	AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
8967	static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
8968	{{MOSuppressPair, "aarch64-suppress-pair"},
8969	{MOStridedAccess, "aarch64-strided-access"}};
8970	return ArrayRef(TargetFlags);
8971	}
8972
8973	/// Constants defining how certain sequences should be outlined.
8974	/// This encompasses how an outlined function should be called, and what kind of
8975	/// frame should be emitted for that outlined function.
8976	///
8977	/// \p MachineOutlinerDefault implies that the function should be called with
8978	/// a save and restore of LR to the stack.
8979	///
8980	/// That is,
8981	///
8982	/// I1 Save LR OUTLINED_FUNCTION:
8983	/// I2 --> BL OUTLINED_FUNCTION I1
8984	/// I3 Restore LR I2
8985	/// I3
8986	/// RET
8987	///
8988	/// Call construction overhead: 3 (save + BL + restore)*
8989	/// Frame construction overhead: 1 (ret)*
8990	/// Requires stack fixups? Yes*
8991	///
8992	/// \p MachineOutlinerTailCall implies that the function is being created from
8993	/// a sequence of instructions ending in a return.
8994	///
8995	/// That is,
8996	///
8997	/// I1 OUTLINED_FUNCTION:
8998	/// I2 --> B OUTLINED_FUNCTION I1
8999	/// RET I2
9000	/// RET
9001	///
9002	/// Call construction overhead: 1 (B)*
9003	/// Frame construction overhead: 0 (Return included in sequence)*
9004	/// Requires stack fixups? No*
9005	///
9006	/// \p MachineOutlinerNoLRSave implies that the function should be called using
9007	/// a BL instruction, but doesn't require LR to be saved and restored. This
9008	/// happens when LR is known to be dead.
9009	///
9010	/// That is,
9011	///
9012	/// I1 OUTLINED_FUNCTION:
9013	/// I2 --> BL OUTLINED_FUNCTION I1
9014	/// I3 I2
9015	/// I3
9016	/// RET
9017	///
9018	/// Call construction overhead: 1 (BL)*
9019	/// Frame construction overhead: 1 (RET)*
9020	/// Requires stack fixups? No*
9021	///
9022	/// \p MachineOutlinerThunk implies that the function is being created from
9023	/// a sequence of instructions ending in a call. The outlined function is
9024	/// called with a BL instruction, and the outlined function tail-calls the
9025	/// original call destination.
9026	///
9027	/// That is,
9028	///
9029	/// I1 OUTLINED_FUNCTION:
9030	/// I2 --> BL OUTLINED_FUNCTION I1
9031	/// BL f I2
9032	/// B f
9033	/// Call construction overhead: 1 (BL)*
9034	/// Frame construction overhead: 0*
9035	/// Requires stack fixups? No*
9036	///
9037	/// \p MachineOutlinerRegSave implies that the function should be called with a
9038	/// save and restore of LR to an available register. This allows us to avoid
9039	/// stack fixups. Note that this outlining variant is compatible with the
9040	/// NoLRSave case.
9041	///
9042	/// That is,
9043	///
9044	/// I1 Save LR OUTLINED_FUNCTION:
9045	/// I2 --> BL OUTLINED_FUNCTION I1
9046	/// I3 Restore LR I2
9047	/// I3
9048	/// RET
9049	///
9050	/// Call construction overhead: 3 (save + BL + restore)*
9051	/// Frame construction overhead: 1 (ret)*
9052	/// Requires stack fixups? No*
9053	enum MachineOutlinerClass {
9054	MachineOutlinerDefault, /// Emit a save, restore, call, and return.
9055	MachineOutlinerTailCall, /// Only emit a branch.
9056	MachineOutlinerNoLRSave, /// Emit a call and return.
9057	MachineOutlinerThunk, /// Emit a call and tail-call.
9058	MachineOutlinerRegSave /// Same as default, but save to a register.
9059	};
9060
9061	enum MachineOutlinerMBBFlags {
9062	LRUnavailableSomewhere = `0x2`,
9063	HasCalls = `0x4`,
9064	UnsafeRegsDead = `0x8`
9065	};
9066
9067	Register
9068	AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
9069	MachineFunction *MF = C.getMF();
9070	const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
9071	const AArch64RegisterInfo *ARI =
9072	static_cast<const AArch64RegisterInfo *>(&TRI);
9073	// Check if there is an available register across the sequence that we can
9074	// use.
9075	for (unsigned Reg : AArch64::GPR64RegClass) {
9076	if (!ARI->isReservedReg(MF: *MF, Reg) &&
9077	Reg != AArch64::LR && // LR is not reserved, but don't use it.
9078	Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
9079	Reg != AArch64::X17 && // Ditto for X17.
9080	C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
9081	C.isAvailableInsideSeq(Reg, TRI))
9082	return Reg;
9083	}
9084	return Register ();
9085	}
9086
9087	static bool
9088	outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a,
9089	const outliner::Candidate &b) {
9090	const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9091	const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9092
9093	return MFIa->shouldSignReturnAddress(SpillsLR: false) == MFIb->shouldSignReturnAddress(SpillsLR: false) &&
9094	MFIa->shouldSignReturnAddress(SpillsLR: true) == MFIb->shouldSignReturnAddress(SpillsLR: true);
9095	}
9096
9097	static bool
9098	outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a,
9099	const outliner::Candidate &b) {
9100	const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9101	const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9102
9103	return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
9104	}
9105
9106	static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,
9107	const outliner::Candidate &b) {
9108	const AArch64Subtarget &SubtargetA =
9109	a.getMF()->getSubtarget<AArch64Subtarget>();
9110	const AArch64Subtarget &SubtargetB =
9111	b.getMF()->getSubtarget<AArch64Subtarget>();
9112	return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
9113	}
9114
9115	std::optional<std::unique_ptr<outliner::OutlinedFunction>>
9116	AArch64InstrInfo::getOutliningCandidateInfo(
9117	const MachineModuleInfo &MMI,
9118	std::vector<outliner::Candidate> &RepeatedSequenceLocs,
9119	unsigned MinRepeats) const {
9120	unsigned SequenceSize = `0`;
9121	for (auto &MI : RepeatedSequenceLocs [`0`])
9122	SequenceSize += getInstSizeInBytes(MI);
9123
9124	unsigned NumBytesToCreateFrame = `0`;
9125
9126	// We only allow outlining for functions having exactly matching return
9127	// address signing attributes, i.e., all share the same value for the
9128	// attribute "sign-return-address" and all share the same type of key they
9129	// are signed with.
9130	// Additionally we require all functions to simultaneously either support
9131	// v8.3a features or not. Otherwise an outlined function could get signed
9132	// using dedicated v8.3 instructions and a call from a function that doesn't
9133	// support v8.3 instructions would therefore be invalid.
9134	if (std::adjacent_find(
9135	first: RepeatedSequenceLocs.begin(), last: RepeatedSequenceLocs.end(),
9136	binary_pred: [](const outliner::Candidate &a, const outliner::Candidate &b) {
9137	// Return true if a and b are non-equal w.r.t. return address
9138	// signing or support of v8.3a features
9139	if (outliningCandidatesSigningScopeConsensus(a, b) &&
9140	outliningCandidatesSigningKeyConsensus(a, b) &&
9141	outliningCandidatesV8_3OpsConsensus(a, b)) {
9142	return false;
9143	}
9144	return true;
9145	}) != RepeatedSequenceLocs.end()) {
9146	return std::nullopt;
9147	}
9148
9149	// Since at this point all candidates agree on their return address signing
9150	// picking just one is fine. If the candidate functions potentially sign their
9151	// return addresses, the outlined function should do the same. Note that in
9152	// the case of "sign-return-address"="non-leaf" this is an assumption: It is
9153	// not certainly true that the outlined function will have to sign its return
9154	// address but this decision is made later, when the decision to outline
9155	// has already been made.
9156	// The same holds for the number of additional instructions we need: On
9157	// v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
9158	// necessary. However, at this point we don't know if the outlined function
9159	// will have a RET instruction so we assume the worst.
9160	const TargetRegisterInfo &TRI = getRegisterInfo();
9161	// Performing a tail call may require extra checks when PAuth is enabled.
9162	// If PAuth is disabled, set it to zero for uniformity.
9163	unsigned NumBytesToCheckLRInTCEpilogue = `0`;
9164	if (RepeatedSequenceLocs [`0`]
9165	.getMF()
9166	->getInfo<AArch64FunctionInfo>()
9167	->shouldSignReturnAddress(SpillsLR: true)) {
9168	// One PAC and one AUT instructions
9169	NumBytesToCreateFrame += `8`;
9170
9171	// PAuth is enabled - set extra tail call cost, if any.
9172	auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(
9173	MF: *RepeatedSequenceLocs [`0`].getMF());
9174	NumBytesToCheckLRInTCEpilogue =
9175	AArch64PAuth::getCheckerSizeInBytes(Method: LRCheckMethod);
9176	// Checking the authenticated LR value may significantly impact
9177	// SequenceSize, so account for it for more precise results.
9178	if (isTailCallReturnInst(MI: RepeatedSequenceLocs [`0`].back()))
9179	SequenceSize += NumBytesToCheckLRInTCEpilogue;
9180
9181	// We have to check if sp modifying instructions would get outlined.
9182	// If so we only allow outlining if sp is unchanged overall, so matching
9183	// sub and add instructions are okay to outline, all other sp modifications
9184	// are not
9185	auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
9186	int SPValue = `0`;
9187	for (auto &MI : C) {
9188	if (MI.modifiesRegister(Reg: AArch64::SP, TRI: &TRI)) {
9189	switch (MI.getOpcode()) {
9190	case AArch64::ADDXri:
9191	case AArch64::ADDWri:
9192	assert(MI.getNumOperands() == `4` && "Wrong number of operands");
9193	assert(MI.getOperand(`2`).isImm() &&
9194	"Expected operand to be immediate");
9195	assert(MI.getOperand(`1`).isReg() &&
9196	"Expected operand to be a register");
9197	// Check if the add just increments sp. If so, we search for
9198	// matching sub instructions that decrement sp. If not, the
9199	// modification is illegal
9200	if (MI.getOperand(i: `1`).getReg() == AArch64::SP)
9201	SPValue += MI.getOperand(i: `2`).getImm();
9202	else
9203	return true;
9204	break;
9205	case AArch64::SUBXri:
9206	case AArch64::SUBWri:
9207	assert(MI.getNumOperands() == `4` && "Wrong number of operands");
9208	assert(MI.getOperand(`2`).isImm() &&
9209	"Expected operand to be immediate");
9210	assert(MI.getOperand(`1`).isReg() &&
9211	"Expected operand to be a register");
9212	// Check if the sub just decrements sp. If so, we search for
9213	// matching add instructions that increment sp. If not, the
9214	// modification is illegal
9215	if (MI.getOperand(i: `1`).getReg() == AArch64::SP)
9216	SPValue -= MI.getOperand(i: `2`).getImm();
9217	else
9218	return true;
9219	break;
9220	default:
9221	return true;
9222	}
9223	}
9224	}
9225	if (SPValue)
9226	return true;
9227	return false;
9228	};
9229	// Remove candidates with illegal stack modifying instructions
9230	llvm::erase_if(C&: RepeatedSequenceLocs, P: hasIllegalSPModification);
9231
9232	// If the sequence doesn't have enough candidates left, then we're done.
9233	if (RepeatedSequenceLocs.size() < MinRepeats)
9234	return std::nullopt;
9235	}
9236
9237	// Properties about candidate MBBs that hold for all of them.
9238	unsigned FlagsSetInAll = `0xF`;
9239
9240	// Compute liveness information for each candidate, and set FlagsSetInAll.
9241	for (outliner::Candidate &C : RepeatedSequenceLocs)
9242	FlagsSetInAll &= C.Flags;
9243
9244	unsigned LastInstrOpcode = RepeatedSequenceLocs [`0`].back().getOpcode();
9245
9246	// Helper lambda which sets call information for every candidate.
9247	auto SetCandidateCallInfo =
9248	[&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
9249	for (outliner::Candidate &C : RepeatedSequenceLocs)
9250	C.setCallInfo(CID: CallID, CO: NumBytesForCall);
9251	};
9252
9253	unsigned FrameID = MachineOutlinerDefault;
9254	NumBytesToCreateFrame += `4`;
9255
9256	bool HasBTI = any_of(Range&: RepeatedSequenceLocs, P: [](outliner::Candidate &C) {
9257	return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
9258	});
9259
9260	// We check to see if CFI Instructions are present, and if they are
9261	// we find the number of CFI Instructions in the candidates.
9262	unsigned CFICount = `0`;
9263	for (auto &I : RepeatedSequenceLocs [`0`]) {
9264	if (I.isCFIInstruction())
9265	CFICount++;
9266	}
9267
9268	// We compare the number of found CFI Instructions to the number of CFI
9269	// instructions in the parent function for each candidate. We must check this
9270	// since if we outline one of the CFI instructions in a function, we have to
9271	// outline them all for correctness. If we do not, the address offsets will be
9272	// incorrect between the two sections of the program.
9273	for (outliner::Candidate &C : RepeatedSequenceLocs) {
9274	std::vector<MCCFIInstruction> CFIInstructions =
9275	C.getMF()->getFrameInstructions();
9276
9277	if (CFICount > `0` && CFICount != CFIInstructions.size())
9278	return std::nullopt;
9279	}
9280
9281	// Returns true if an instructions is safe to fix up, false otherwise.
9282	auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
9283	if (MI.isCall())
9284	return true;
9285
9286	if (!MI.modifiesRegister(Reg: AArch64::SP, TRI: &TRI) &&
9287	!MI.readsRegister(Reg: AArch64::SP, TRI: &TRI))
9288	return true;
9289
9290	// Any modification of SP will break our code to save/restore LR.
9291	// FIXME: We could handle some instructions which add a constant
9292	// offset to SP, with a bit more work.
9293	if (MI.modifiesRegister(Reg: AArch64::SP, TRI: &TRI))
9294	return false;
9295
9296	// At this point, we have a stack instruction that we might need to
9297	// fix up. We'll handle it if it's a load or store.
9298	if (MI.mayLoadOrStore()) {
9299	const MachineOperand Base; // Filled with the base operand of MI.*
9300	int64_t Offset; // Filled with the offset of MI.
9301	bool OffsetIsScalable;
9302
9303	// Does it allow us to offset the base operand and is the base the
9304	// register SP?
9305	if (!getMemOperandWithOffset(MI, BaseOp&: Base, Offset, OffsetIsScalable, TRI: &TRI) \|\|
9306	!Base->isReg() \|\| Base->getReg() != AArch64::SP)
9307	return false;
9308
9309	// Fixe-up code below assumes bytes.
9310	if (OffsetIsScalable)
9311	return false;
9312
9313	// Find the minimum/maximum offset for this instruction and check
9314	// if fixing it up would be in range.
9315	int64_t MinOffset,
9316	MaxOffset; // Unscaled offsets for the instruction.
9317	// The scale to multiply the offsets by.
9318	TypeSize Scale(`0U`, false), DummyWidth(`0U`, false);
9319	getMemOpInfo(Opcode: MI.getOpcode(), Scale, Width&: DummyWidth, MinOffset, MaxOffset);
9320
9321	Offset += `16`; // Update the offset to what it would be if we outlined.
9322	if (Offset < MinOffset * (int64_t)Scale.getFixedValue() \|\|
9323	Offset > MaxOffset * (int64_t)Scale.getFixedValue())
9324	return false;
9325
9326	// It's in range, so we can outline it.
9327	return true;
9328	}
9329
9330	// FIXME: Add handling for instructions like "add x0, sp, #8".
9331
9332	// We can't fix it up, so don't outline it.
9333	return false;
9334	};
9335
9336	// True if it's possible to fix up each stack instruction in this sequence.
9337	// Important for frames/call variants that modify the stack.
9338	bool AllStackInstrsSafe =
9339	llvm::all_of(Range&: RepeatedSequenceLocs [`0`], P: IsSafeToFixup);
9340
9341	// If the last instruction in any candidate is a terminator, then we should
9342	// tail call all of the candidates.
9343	if (RepeatedSequenceLocs [`0`].back().isTerminator()) {
9344	FrameID = MachineOutlinerTailCall;
9345	NumBytesToCreateFrame = `0`;
9346	unsigned NumBytesForCall = `4` + NumBytesToCheckLRInTCEpilogue;
9347	SetCandidateCallInfo (MachineOutlinerTailCall, NumBytesForCall);
9348	}
9349
9350	else if (LastInstrOpcode == AArch64::BL \|\|
9351	((LastInstrOpcode == AArch64::BLR \|\|
9352	LastInstrOpcode == AArch64::BLRNoIP) &&
9353	!HasBTI)) {
9354	// FIXME: Do we need to check if the code after this uses the value of LR?
9355	FrameID = MachineOutlinerThunk;
9356	NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
9357	SetCandidateCallInfo (MachineOutlinerThunk, `4`);
9358	}
9359
9360	else {
9361	// We need to decide how to emit calls + frames. We can always emit the same
9362	// frame if we don't need to save to the stack. If we have to save to the
9363	// stack, then we need a different frame.
9364	unsigned NumBytesNoStackCalls = `0`;
9365	std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
9366
9367	// Check if we have to save LR.
9368	for (outliner::Candidate &C : RepeatedSequenceLocs) {
9369	bool LRAvailable =
9370	(C.Flags & MachineOutlinerMBBFlags::LRUnavailableSomewhere)
9371	? C.isAvailableAcrossAndOutOfSeq(Reg: AArch64::LR, TRI)
9372	: true;
9373	// If we have a noreturn caller, then we're going to be conservative and
9374	// say that we have to save LR. If we don't have a ret at the end of the
9375	// block, then we can't reason about liveness accurately.
9376	//
9377	// FIXME: We can probably do better than always disabling this in
9378	// noreturn functions by fixing up the liveness info.
9379	bool IsNoReturn =
9380	C.getMF()->getFunction().hasFnAttribute(Kind: Attribute::NoReturn);
9381
9382	// Is LR available? If so, we don't need a save.
9383	if (LRAvailable && !IsNoReturn) {
9384	NumBytesNoStackCalls += `4`;
9385	C.setCallInfo(CID: MachineOutlinerNoLRSave, CO: `4`);
9386	CandidatesWithoutStackFixups.push_back(x: C);
9387	}
9388
9389	// Is an unused register available? If so, we won't modify the stack, so
9390	// we can outline with the same frame type as those that don't save LR.
9391	else if (findRegisterToSaveLRTo(C)) {
9392	NumBytesNoStackCalls += `12`;
9393	C.setCallInfo(CID: MachineOutlinerRegSave, CO: `12`);
9394	CandidatesWithoutStackFixups.push_back(x: C);
9395	}
9396
9397	// Is SP used in the sequence at all? If not, we don't have to modify
9398	// the stack, so we are guaranteed to get the same frame.
9399	else if (C.isAvailableInsideSeq(Reg: AArch64::SP, TRI)) {
9400	NumBytesNoStackCalls += `12`;
9401	C.setCallInfo(CID: MachineOutlinerDefault, CO: `12`);
9402	CandidatesWithoutStackFixups.push_back(x: C);
9403	}
9404
9405	// If we outline this, we need to modify the stack. Pretend we don't
9406	// outline this by saving all of its bytes.
9407	else {
9408	NumBytesNoStackCalls += SequenceSize;
9409	}
9410	}
9411
9412	// If there are no places where we have to save LR, then note that we
9413	// don't have to update the stack. Otherwise, give every candidate the
9414	// default call type, as long as it's safe to do so.
9415	if (!AllStackInstrsSafe \|\|
9416	NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * `12`) {
9417	RepeatedSequenceLocs = CandidatesWithoutStackFixups;
9418	FrameID = MachineOutlinerNoLRSave;
9419	if (RepeatedSequenceLocs.size() < MinRepeats)
9420	return std::nullopt;
9421	} else {
9422	SetCandidateCallInfo (MachineOutlinerDefault, `12`);
9423
9424	// Bugzilla ID: 46767
9425	// TODO: Check if fixing up the stack more than once is safe so we can
9426	// outline these.
9427	//
9428	// An outline resulting in a caller that requires stack fixups at the
9429	// callsite to a callee that also requires stack fixups can happen when
9430	// there are no available registers at the candidate callsite for a
9431	// candidate that itself also has calls.
9432	//
9433	// In other words if function_containing_sequence in the following pseudo
9434	// assembly requires that we save LR at the point of the call, but there
9435	// are no available registers: in this case we save using SP and as a
9436	// result the SP offsets requires stack fixups by multiples of 16.
9437	//
9438	// function_containing_sequence:
9439	// ...
9440	// save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9441	// call OUTLINED_FUNCTION_N
9442	// restore LR from SP
9443	// ...
9444	//
9445	// OUTLINED_FUNCTION_N:
9446	// save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9447	// ...
9448	// bl foo
9449	// restore LR from SP
9450	// ret
9451	//
9452	// Because the code to handle more than one stack fixup does not
9453	// currently have the proper checks for legality, these cases will assert
9454	// in the AArch64 MachineOutliner. This is because the code to do this
9455	// needs more hardening, testing, better checks that generated code is
9456	// legal, etc and because it is only verified to handle a single pass of
9457	// stack fixup.
9458	//
9459	// The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
9460	// these cases until they are known to be handled. Bugzilla 46767 is
9461	// referenced in comments at the assert site.
9462	//
9463	// To avoid asserting (or generating non-legal code on noassert builds)
9464	// we remove all candidates which would need more than one stack fixup by
9465	// pruning the cases where the candidate has calls while also having no
9466	// available LR and having no available general purpose registers to copy
9467	// LR to (ie one extra stack save/restore).
9468	//
9469	if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9470	erase_if(C&: RepeatedSequenceLocs, P: [this, &TRI](outliner::Candidate &C) {
9471	auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
9472	return (llvm::any_of(Range&: C, P: IsCall)) &&
9473	(!C.isAvailableAcrossAndOutOfSeq(Reg: AArch64::LR, TRI) \|\|
9474	!findRegisterToSaveLRTo(C));
9475	});
9476	}
9477	}
9478
9479	// If we dropped all of the candidates, bail out here.
9480	if (RepeatedSequenceLocs.size() < MinRepeats)
9481	return std::nullopt;
9482	}
9483
9484	// Does every candidate's MBB contain a call? If so, then we might have a call
9485	// in the range.
9486	if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9487	// Check if the range contains a call. These require a save + restore of the
9488	// link register.
9489	outliner::Candidate &FirstCand = RepeatedSequenceLocs [`0`];
9490	bool ModStackToSaveLR = false;
9491	if (any_of(Range: drop_end(RangeOrContainer&: FirstCand),
9492	P: [](const MachineInstr &MI) { return MI.isCall(); }))
9493	ModStackToSaveLR = true;
9494
9495	// Handle the last instruction separately. If this is a tail call, then the
9496	// last instruction is a call. We don't want to save + restore in this case.
9497	// However, it could be possible that the last instruction is a call without
9498	// it being valid to tail call this sequence. We should consider this as
9499	// well.
9500	else if (FrameID != MachineOutlinerThunk &&
9501	FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
9502	ModStackToSaveLR = true;
9503
9504	if (ModStackToSaveLR) {
9505	// We can't fix up the stack. Bail out.
9506	if (!AllStackInstrsSafe)
9507	return std::nullopt;
9508
9509	// Save + restore LR.
9510	NumBytesToCreateFrame += `8`;
9511	}
9512	}
9513
9514	// If we have CFI instructions, we can only outline if the outlined section
9515	// can be a tail call
9516	if (FrameID != MachineOutlinerTailCall && CFICount > `0`)
9517	return std::nullopt;
9518
9519	return std::make_unique<outliner::OutlinedFunction>(
9520	args&: RepeatedSequenceLocs, args&: SequenceSize, args&: NumBytesToCreateFrame, args&: FrameID);
9521	}
9522
9523	void AArch64InstrInfo::mergeOutliningCandidateAttributes(
9524	Function &F, std::vector<outliner::Candidate> &Candidates) const {
9525	// If a bunch of candidates reach this point they must agree on their return
9526	// address signing. It is therefore enough to just consider the signing
9527	// behaviour of one of them
9528	const auto &CFn = Candidates.front().getMF()->getFunction();
9529
9530	if (CFn.hasFnAttribute(Kind: "ptrauth-returns"))
9531	F.addFnAttr(Attr: CFn.getFnAttribute(Kind: "ptrauth-returns"));
9532	if (CFn.hasFnAttribute(Kind: "ptrauth-auth-traps"))
9533	F.addFnAttr(Attr: CFn.getFnAttribute(Kind: "ptrauth-auth-traps"));
9534	// Since all candidates belong to the same module, just copy the
9535	// function-level attributes of an arbitrary function.
9536	if (CFn.hasFnAttribute(Kind: "sign-return-address"))
9537	F.addFnAttr(Attr: CFn.getFnAttribute(Kind: "sign-return-address"));
9538	if (CFn.hasFnAttribute(Kind: "sign-return-address-key"))
9539	F.addFnAttr(Attr: CFn.getFnAttribute(Kind: "sign-return-address-key"));
9540
9541	AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
9542	}
9543
9544	bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
9545	MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
9546	const Function &F = MF.getFunction();
9547
9548	// Can F be deduplicated by the linker? If it can, don't outline from it.
9549	if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
9550	return false;
9551
9552	// Don't outline from functions with section markings; the program could
9553	// expect that all the code is in the named section.
9554	// FIXME: Allow outlining from multiple functions with the same section
9555	// marking.
9556	if (F.hasSection())
9557	return false;
9558
9559	// Outlining from functions with redzones is unsafe since the outliner may
9560	// modify the stack. Check if hasRedZone is true or unknown; if yes, don't
9561	// outline from it.
9562	AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
9563	if (!AFI \|\| AFI->hasRedZone().value_or(u: true))
9564	return false;
9565
9566	// FIXME: Determine whether it is safe to outline from functions which contain
9567	// streaming-mode changes. We may need to ensure any smstart/smstop pairs are
9568	// outlined together and ensure it is safe to outline with async unwind info,
9569	// required for saving & restoring VG around calls.
9570	if (AFI->hasStreamingModeChanges())
9571	return false;
9572
9573	// FIXME: Teach the outliner to generate/handle Windows unwind info.
9574	if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
9575	return false;
9576
9577	// It's safe to outline from MF.
9578	return true;
9579	}
9580
9581	SmallVector<std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
9582	AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock &MBB,
9583	unsigned &Flags) const {
9584	assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
9585	"Must track liveness!");
9586	SmallVector<
9587	std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
9588	Ranges;
9589	// According to the AArch64 Procedure Call Standard, the following are
9590	// undefined on entry/exit from a function call:
9591	//
9592	// Registers x16, x17, (and thus w16, w17)*
9593	// Condition codes (and thus the NZCV register)*
9594	//
9595	// If any of these registers are used inside or live across an outlined
9596	// function, then they may be modified later, either by the compiler or
9597	// some other tool (like the linker).
9598	//
9599	// To avoid outlining in these situations, partition each block into ranges
9600	// where these registers are dead. We will only outline from those ranges.
9601	LiveRegUnits LRU(getRegisterInfo());
9602	auto AreAllUnsafeRegsDead = [&LRU]() {
9603	return LRU.available(Reg: AArch64::W16) && LRU.available(Reg: AArch64::W17) &&
9604	LRU.available(Reg: AArch64::NZCV);
9605	};
9606
9607	// We need to know if LR is live across an outlining boundary later on in
9608	// order to decide how we'll create the outlined call, frame, etc.
9609	//
9610	// It's pretty expensive to check this for every candidate* within a block.*
9611	// That's some potentially n^2 behaviour, since in the worst case, we'd need
9612	// to compute liveness from the end of the block for O(n) candidates within
9613	// the block.
9614	//
9615	// So, to improve the average case, let's keep track of liveness from the end
9616	// of the block to the beginning of every outlinable range. If we know that
9617	// LR is available in every range we could outline from, then we know that
9618	// we don't need to check liveness for any candidate within that range.
9619	bool LRAvailableEverywhere = true;
9620	// Compute liveness bottom-up.
9621	LRU.addLiveOuts(MBB);
9622	// Update flags that require info about the entire MBB.
9623	auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
9624	if (MI.isCall() && !MI.isTerminator())
9625	Flags \|= MachineOutlinerMBBFlags::HasCalls;
9626	};
9627	// Range: [RangeBegin, RangeEnd)
9628	MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
9629	unsigned RangeLen;
9630	auto CreateNewRangeStartingAt =
9631	[&RangeBegin, &RangeEnd,
9632	&RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
9633	RangeBegin = NewBegin;
9634	RangeEnd = std::next(x: RangeBegin);
9635	RangeLen = `0`;
9636	};
9637	auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
9638	// At least one unsafe register is not dead. We do not want to outline at
9639	// this point. If it is long enough to outline from, save the range
9640	// [RangeBegin, RangeEnd).
9641	if (RangeLen > `1`)
9642	Ranges.push_back(Elt: std::make_pair(x&: RangeBegin, y&: RangeEnd));
9643	};
9644	// Find the first point where all unsafe registers are dead.
9645	// FIND: <safe instr> <-- end of first potential range
9646	// SKIP: <unsafe def>
9647	// SKIP: ... everything between ...
9648	// SKIP: <unsafe use>
9649	auto FirstPossibleEndPt = MBB.instr_rbegin();
9650	for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
9651	LRU.stepBackward(MI: *FirstPossibleEndPt);
9652	// Update flags that impact how we outline across the entire block,
9653	// regardless of safety.
9654	UpdateWholeMBBFlags (*FirstPossibleEndPt);
9655	if (AreAllUnsafeRegsDead ())
9656	break;
9657	}
9658	// If we exhausted the entire block, we have no safe ranges to outline.
9659	if (FirstPossibleEndPt == MBB.instr_rend())
9660	return Ranges;
9661	// Current range.
9662	CreateNewRangeStartingAt (FirstPossibleEndPt ->getIterator());
9663	// StartPt points to the first place where all unsafe registers
9664	// are dead (if there is any such point). Begin partitioning the MBB into
9665	// ranges.
9666	for (auto &MI : make_range(x: FirstPossibleEndPt, y: MBB.instr_rend())) {
9667	LRU.stepBackward(MI);
9668	UpdateWholeMBBFlags (MI);
9669	if (!AreAllUnsafeRegsDead ()) {
9670	SaveRangeIfNonEmpty ();
9671	CreateNewRangeStartingAt (MI.getIterator());
9672	continue;
9673	}
9674	LRAvailableEverywhere &= LRU.available(Reg: AArch64::LR);
9675	RangeBegin = MI.getIterator();
9676	++RangeLen;
9677	}
9678	// Above loop misses the last (or only) range. If we are still safe, then
9679	// let's save the range.
9680	if (AreAllUnsafeRegsDead ())
9681	SaveRangeIfNonEmpty ();
9682	if (Ranges.empty())
9683	return Ranges;
9684	// We found the ranges bottom-up. Mapping expects the top-down. Reverse
9685	// the order.
9686	std::reverse(first: Ranges.begin(), last: Ranges.end());
9687	// If there is at least one outlinable range where LR is unavailable
9688	// somewhere, remember that.
9689	if (!LRAvailableEverywhere)
9690	Flags \|= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
9691	return Ranges;
9692	}
9693
9694	outliner::InstrType
9695	AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
9696	MachineBasicBlock::iterator &MIT,
9697	unsigned Flags) const {
9698	MachineInstr &MI = *MIT;
9699
9700	// Don't outline anything used for return address signing. The outlined
9701	// function will get signed later if needed
9702	switch (MI.getOpcode()) {
9703	case AArch64::PACM:
9704	case AArch64::PACIASP:
9705	case AArch64::PACIBSP:
9706	case AArch64::PACIASPPC:
9707	case AArch64::PACIBSPPC:
9708	case AArch64::AUTIASP:
9709	case AArch64::AUTIBSP:
9710	case AArch64::AUTIASPPCi:
9711	case AArch64::AUTIASPPCr:
9712	case AArch64::AUTIBSPPCi:
9713	case AArch64::AUTIBSPPCr:
9714	case AArch64::RETAA:
9715	case AArch64::RETAB:
9716	case AArch64::RETAASPPCi:
9717	case AArch64::RETAASPPCr:
9718	case AArch64::RETABSPPCi:
9719	case AArch64::RETABSPPCr:
9720	case AArch64::EMITBKEY:
9721	case AArch64::PAUTH_PROLOGUE:
9722	case AArch64::PAUTH_EPILOGUE:
9723	return outliner::InstrType::Illegal;
9724	}
9725
9726	// We can only outline these if we will tail call the outlined function, or
9727	// fix up the CFI offsets. Currently, CFI instructions are outlined only if
9728	// in a tail call.
9729	//
9730	// FIXME: If the proper fixups for the offset are implemented, this should be
9731	// possible.
9732	if (MI.isCFIInstruction())
9733	return outliner::InstrType::Legal;
9734
9735	// Is this a terminator for a basic block?
9736	if (MI.isTerminator())
9737	// TargetInstrInfo::getOutliningType has already filtered out anything
9738	// that would break this, so we can allow it here.
9739	return outliner::InstrType::Legal;
9740
9741	// Make sure none of the operands are un-outlinable.
9742	for (const MachineOperand &MOP : MI.operands()) {
9743	// A check preventing CFI indices was here before, but only CFI
9744	// instructions should have those.
9745	assert(!MOP.isCFIIndex());
9746
9747	// If it uses LR or W30 explicitly, then don't touch it.
9748	if (MOP.isReg() && !MOP.isImplicit() &&
9749	(MOP.getReg() == AArch64::LR \|\| MOP.getReg() == AArch64::W30))
9750	return outliner::InstrType::Illegal;
9751	}
9752
9753	// Special cases for instructions that can always be outlined, but will fail
9754	// the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
9755	// be outlined because they don't require a specific* value to be in LR.*
9756	if (MI.getOpcode() == AArch64::ADRP)
9757	return outliner::InstrType::Legal;
9758
9759	// If MI is a call we might be able to outline it. We don't want to outline
9760	// any calls that rely on the position of items on the stack. When we outline
9761	// something containing a call, we have to emit a save and restore of LR in
9762	// the outlined function. Currently, this always happens by saving LR to the
9763	// stack. Thus, if we outline, say, half the parameters for a function call
9764	// plus the call, then we'll break the callee's expectations for the layout
9765	// of the stack.
9766	//
9767	// FIXME: Allow calls to functions which construct a stack frame, as long
9768	// as they don't access arguments on the stack.
9769	// FIXME: Figure out some way to analyze functions defined in other modules.
9770	// We should be able to compute the memory usage based on the IR calling
9771	// convention, even if we can't see the definition.
9772	if (MI.isCall()) {
9773	// Get the function associated with the call. Look at each operand and find
9774	// the one that represents the callee and get its name.
9775	const Function Callee = nullptr*;
9776	for (const MachineOperand &MOP : MI.operands()) {
9777	if (MOP.isGlobal()) {
9778	Callee = dyn_cast<Function>(Val: MOP.getGlobal());
9779	break;
9780	}
9781	}
9782
9783	// Never outline calls to mcount. There isn't any rule that would require
9784	// this, but the Linux kernel's "ftrace" feature depends on it.
9785	if (Callee && Callee->getName() == "\01_mcount")
9786	return outliner::InstrType::Illegal;
9787
9788	// If we don't know anything about the callee, assume it depends on the
9789	// stack layout of the caller. In that case, it's only legal to outline
9790	// as a tail-call. Explicitly list the call instructions we know about so we
9791	// don't get unexpected results with call pseudo-instructions.
9792	auto UnknownCallOutlineType = outliner::InstrType::Illegal;
9793	if (MI.getOpcode() == AArch64::BLR \|\|
9794	MI.getOpcode() == AArch64::BLRNoIP \|\| MI.getOpcode() == AArch64::BL)
9795	UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
9796
9797	if (!Callee)
9798	return UnknownCallOutlineType;
9799
9800	// We have a function we have information about. Check it if it's something
9801	// can safely outline.
9802	MachineFunction CalleeMF = MMI.getMachineFunction(F: Callee);
9803
9804	// We don't know what's going on with the callee at all. Don't touch it.
9805	if (!CalleeMF)
9806	return UnknownCallOutlineType;
9807
9808	// Check if we know anything about the callee saves on the function. If we
9809	// don't, then don't touch it, since that implies that we haven't
9810	// computed anything about its stack frame yet.
9811	MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
9812	if (!MFI.isCalleeSavedInfoValid() \|\| MFI.getStackSize() > `0` \|\|
9813	MFI.getNumObjects() > `0`)
9814	return UnknownCallOutlineType;
9815
9816	// At this point, we can say that CalleeMF ought to not pass anything on the
9817	// stack. Therefore, we can outline it.
9818	return outliner::InstrType::Legal;
9819	}
9820
9821	// Don't touch the link register or W30.
9822	if (MI.readsRegister(Reg: AArch64::W30, TRI: &getRegisterInfo()) \|\|
9823	MI.modifiesRegister(Reg: AArch64::W30, TRI: &getRegisterInfo()))
9824	return outliner::InstrType::Illegal;
9825
9826	// Don't outline BTI instructions, because that will prevent the outlining
9827	// site from being indirectly callable.
9828	if (hasBTISemantics(MI))
9829	return outliner::InstrType::Illegal;
9830
9831	return outliner::InstrType::Legal;
9832	}
9833
9834	void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
9835	for (MachineInstr &MI : MBB) {
9836	const MachineOperand *Base;
9837	TypeSize Width(`0`, false);
9838	int64_t Offset;
9839	bool OffsetIsScalable;
9840
9841	// Is this a load or store with an immediate offset with SP as the base?
9842	if (!MI.mayLoadOrStore() \|\|
9843	!getMemOperandWithOffsetWidth(LdSt: MI, BaseOp&: Base, Offset, OffsetIsScalable, Width,
9844	TRI: &RI) \|\|
9845	(Base->isReg() && Base->getReg() != AArch64::SP))
9846	continue;
9847
9848	// It is, so we have to fix it up.
9849	TypeSize Scale(`0U`, false);
9850	int64_t Dummy1, Dummy2;
9851
9852	MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(LdSt&: MI);
9853	assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
9854	getMemOpInfo(Opcode: MI.getOpcode(), Scale, Width, MinOffset&: Dummy1, MaxOffset&: Dummy2);
9855	assert(Scale != `0` && "Unexpected opcode!");
9856	assert(!OffsetIsScalable && "Expected offset to be a byte offset");
9857
9858	// We've pushed the return address to the stack, so add 16 to the offset.
9859	// This is safe, since we already checked if it would overflow when we
9860	// checked if this instruction was legal to outline.
9861	int64_t NewImm = (Offset + `16`) / (int64_t)Scale.getFixedValue();
9862	StackOffsetOperand.setImm(NewImm);
9863	}
9864	}
9865
9866	static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
9867	const AArch64InstrInfo *TII,
9868	bool ShouldSignReturnAddr) {
9869	if (!ShouldSignReturnAddr)
9870	return;
9871
9872	BuildMI(BB&: MBB, I: MBB.begin(), MIMD: DebugLoc (), MCID: TII->get(Opcode: AArch64::PAUTH_PROLOGUE))
9873	.setMIFlag(MachineInstr::FrameSetup);
9874	BuildMI(BB&: MBB, I: MBB.getFirstInstrTerminator(), MIMD: DebugLoc (),
9875	MCID: TII->get(Opcode: AArch64::PAUTH_EPILOGUE))
9876	.setMIFlag(MachineInstr::FrameDestroy);
9877	}
9878
9879	void AArch64InstrInfo::buildOutlinedFrame(
9880	MachineBasicBlock &MBB, MachineFunction &MF,
9881	const outliner::OutlinedFunction &OF) const {
9882
9883	AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
9884
9885	if (OF.FrameConstructionID == MachineOutlinerTailCall)
9886	FI->setOutliningStyle("Tail Call");
9887	else if (OF.FrameConstructionID == MachineOutlinerThunk) {
9888	// For thunk outlining, rewrite the last instruction from a call to a
9889	// tail-call.
9890	MachineInstr Call = &--MBB.instr_end();
9891	unsigned TailOpcode;
9892	if (Call->getOpcode() == AArch64::BL) {
9893	TailOpcode = AArch64::TCRETURNdi;
9894	} else {
9895	assert(Call->getOpcode() == AArch64::BLR \|\|
9896	Call->getOpcode() == AArch64::BLRNoIP);
9897	TailOpcode = AArch64::TCRETURNriALL;
9898	}
9899	MachineInstr *TC = BuildMI(MF, MIMD: DebugLoc (), MCID: get(Opcode: TailOpcode))
9900	.add(MO: Call->getOperand(i: `0`))
9901	.addImm(Val: `0`);
9902	MBB.insert(I: MBB.end(), MI: TC);
9903	Call->eraseFromParent();
9904
9905	FI->setOutliningStyle("Thunk");
9906	}
9907
9908	bool IsLeafFunction = true;
9909
9910	// Is there a call in the outlined range?
9911	auto IsNonTailCall = [](const MachineInstr &MI) {
9912	return MI.isCall() && !MI.isReturn();
9913	};
9914
9915	if (llvm::any_of(Range: MBB.instrs(), P: IsNonTailCall)) {
9916	// Fix up the instructions in the range, since we're going to modify the
9917	// stack.
9918
9919	// Bugzilla ID: 46767
9920	// TODO: Check if fixing up twice is safe so we can outline these.
9921	assert(OF.FrameConstructionID != MachineOutlinerDefault &&
9922	"Can only fix up stack references once");
9923	fixupPostOutline(MBB);
9924
9925	IsLeafFunction = false;
9926
9927	// LR has to be a live in so that we can save it.
9928	if (!MBB.isLiveIn(Reg: AArch64::LR))
9929	MBB.addLiveIn(PhysReg: AArch64::LR);
9930
9931	MachineBasicBlock::iterator It = MBB.begin();
9932	MachineBasicBlock::iterator Et = MBB.end();
9933
9934	if (OF.FrameConstructionID == MachineOutlinerTailCall \|\|
9935	OF.FrameConstructionID == MachineOutlinerThunk)
9936	Et = std::prev(x: MBB.end());
9937
9938	// Insert a save before the outlined region
9939	MachineInstr *STRXpre = BuildMI(MF, MIMD: DebugLoc (), MCID: get(Opcode: AArch64::STRXpre))
9940	.addReg(RegNo: AArch64::SP, flags: RegState::Define)
9941	.addReg(RegNo: AArch64::LR)
9942	.addReg(RegNo: AArch64::SP)
9943	.addImm(Val: -`16`);
9944	It = MBB.insert(I: It, MI: STRXpre);
9945
9946	if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo(MF)) {
9947	CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);
9948
9949	// Add a CFI saying the stack was moved 16 B down.
9950	CFIBuilder.buildDefCFAOffset(Offset: `16`);
9951
9952	// Add a CFI saying that the LR that we want to find is now 16 B higher
9953	// than before.
9954	CFIBuilder.buildOffset(Reg: AArch64::LR, Offset: -`16`);
9955	}
9956
9957	// Insert a restore before the terminator for the function.
9958	MachineInstr *LDRXpost = BuildMI(MF, MIMD: DebugLoc (), MCID: get(Opcode: AArch64::LDRXpost))
9959	.addReg(RegNo: AArch64::SP, flags: RegState::Define)
9960	.addReg(RegNo: AArch64::LR, flags: RegState::Define)
9961	.addReg(RegNo: AArch64::SP)
9962	.addImm(Val: `16`);
9963	Et = MBB.insert(I: Et, MI: LDRXpost);
9964	}
9965
9966	bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(SpillsLR: !IsLeafFunction);
9967
9968	// If this is a tail call outlined function, then there's already a return.
9969	if (OF.FrameConstructionID == MachineOutlinerTailCall \|\|
9970	OF.FrameConstructionID == MachineOutlinerThunk) {
9971	signOutlinedFunction(MF, MBB, TII: this, ShouldSignReturnAddr);
9972	return;
9973	}
9974
9975	// It's not a tail call, so we have to insert the return ourselves.
9976
9977	// LR has to be a live in so that we can return to it.
9978	if (!MBB.isLiveIn(Reg: AArch64::LR))
9979	MBB.addLiveIn(PhysReg: AArch64::LR);
9980
9981	MachineInstr *ret = BuildMI(MF, MIMD: DebugLoc (), MCID: get(Opcode: AArch64::RET))
9982	.addReg(RegNo: AArch64::LR);
9983	MBB.insert(I: MBB.end(), MI: ret);
9984
9985	signOutlinedFunction(MF, MBB, TII: this, ShouldSignReturnAddr);
9986
9987	FI->setOutliningStyle("Function");
9988
9989	// Did we have to modify the stack by saving the link register?
9990	if (OF.FrameConstructionID != MachineOutlinerDefault)
9991	return;
9992
9993	// We modified the stack.
9994	// Walk over the basic block and fix up all the stack accesses.
9995	fixupPostOutline(MBB);
9996	}
9997
9998	MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
9999	Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
10000	MachineFunction &MF, outliner::Candidate &C) const {
10001
10002	// Are we tail calling?
10003	if (C.CallConstructionID == MachineOutlinerTailCall) {
10004	// If yes, then we can just branch to the label.
10005	It = MBB.insert(I: It, MI: BuildMI(MF, MIMD: DebugLoc (), MCID: get(Opcode: AArch64::TCRETURNdi))
10006	.addGlobalAddress(GV: M.getNamedValue(Name: MF.getName()))
10007	.addImm(Val: `0`));
10008	return It;
10009	}
10010
10011	// Are we saving the link register?
10012	if (C.CallConstructionID == MachineOutlinerNoLRSave \|\|
10013	C.CallConstructionID == MachineOutlinerThunk) {
10014	// No, so just insert the call.
10015	It = MBB.insert(I: It, MI: BuildMI(MF, MIMD: DebugLoc (), MCID: get(Opcode: AArch64::BL))
10016	.addGlobalAddress(GV: M.getNamedValue(Name: MF.getName())));
10017	return It;
10018	}
10019
10020	// We want to return the spot where we inserted the call.
10021	MachineBasicBlock::iterator CallPt;
10022
10023	// Instructions for saving and restoring LR around the call instruction we're
10024	// going to insert.
10025	MachineInstr *Save;
10026	MachineInstr *Restore;
10027	// Can we save to a register?
10028	if (C.CallConstructionID == MachineOutlinerRegSave) {
10029	// FIXME: This logic should be sunk into a target-specific interface so that
10030	// we don't have to recompute the register.
10031	Register Reg = findRegisterToSaveLRTo(C);
10032	assert(Reg && "No callee-saved register available?");
10033
10034	// LR has to be a live in so that we can save it.
10035	if (!MBB.isLiveIn(Reg: AArch64::LR))
10036	MBB.addLiveIn(PhysReg: AArch64::LR);
10037
10038	// Save and restore LR from Reg.
10039	Save = BuildMI(MF, MIMD: DebugLoc (), MCID: get(Opcode: AArch64::ORRXrs), DestReg: Reg)
10040	.addReg(RegNo: AArch64::XZR)
10041	.addReg(RegNo: AArch64::LR)
10042	.addImm(Val: `0`);
10043	Restore = BuildMI(MF, MIMD: DebugLoc (), MCID: get(Opcode: AArch64::ORRXrs), DestReg: AArch64::LR)
10044	.addReg(RegNo: AArch64::XZR)
10045	.addReg(RegNo: Reg)
10046	.addImm(Val: `0`);
10047	} else {
10048	// We have the default case. Save and restore from SP.
10049	Save = BuildMI(MF, MIMD: DebugLoc (), MCID: get(Opcode: AArch64::STRXpre))
10050	.addReg(RegNo: AArch64::SP, flags: RegState::Define)
10051	.addReg(RegNo: AArch64::LR)
10052	.addReg(RegNo: AArch64::SP)
10053	.addImm(Val: -`16`);
10054	Restore = BuildMI(MF, MIMD: DebugLoc (), MCID: get(Opcode: AArch64::LDRXpost))
10055	.addReg(RegNo: AArch64::SP, flags: RegState::Define)
10056	.addReg(RegNo: AArch64::LR, flags: RegState::Define)
10057	.addReg(RegNo: AArch64::SP)
10058	.addImm(Val: `16`);
10059	}
10060
10061	It = MBB.insert(I: It, MI: Save);
10062	It ++;
10063
10064	// Insert the call.
10065	It = MBB.insert(I: It, MI: BuildMI(MF, MIMD: DebugLoc (), MCID: get(Opcode: AArch64::BL))
10066	.addGlobalAddress(GV: M.getNamedValue(Name: MF.getName())));
10067	CallPt = It;
10068	It ++;
10069
10070	It = MBB.insert(I: It, MI: Restore);
10071	return CallPt;
10072	}
10073
10074	bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
10075	MachineFunction &MF) const {
10076	return MF.getFunction().hasMinSize();
10077	}
10078
10079	void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
10080	MachineBasicBlock::iterator Iter,
10081	DebugLoc &DL,
10082	bool AllowSideEffects) const {
10083	const MachineFunction &MF = *MBB.getParent();
10084	const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
10085	const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
10086
10087	if (TRI.isGeneralPurposeRegister(MF, Reg)) {
10088	BuildMI(BB&: MBB, I: Iter, MIMD: DL, MCID: get(Opcode: AArch64::MOVZXi), DestReg: Reg).addImm(Val: `0`).addImm(Val: `0`);
10089	} else if (STI.isSVEorStreamingSVEAvailable()) {
10090	BuildMI(BB&: MBB, I: Iter, MIMD: DL, MCID: get(Opcode: AArch64::DUP_ZI_D), DestReg: Reg)
10091	.addImm(Val: `0`)
10092	.addImm(Val: `0`);
10093	} else if (STI.isNeonAvailable()) {
10094	BuildMI(BB&: MBB, I: Iter, MIMD: DL, MCID: get(Opcode: AArch64::MOVIv2d_ns), DestReg: Reg)
10095	.addImm(Val: `0`);
10096	} else {
10097	// This is a streaming-compatible function without SVE. We don't have full
10098	// Neon (just FPRs), so we can at most use the first 64-bit sub-register.
10099	// So given `movi v..` would be illegal use `fmov d..` instead.
10100	assert(STI.hasNEON() && "Expected to have NEON.");
10101	Register Reg64 = TRI.getSubReg(Reg, Idx: AArch64::dsub);
10102	BuildMI(BB&: MBB, I: Iter, MIMD: DL, MCID: get(Opcode: AArch64::FMOVD0), DestReg: Reg64);
10103	}
10104	}
10105
10106	std::optional<DestSourcePair>
10107	AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
10108
10109	// AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
10110	// and zero immediate operands used as an alias for mov instruction.
10111	if (((MI.getOpcode() == AArch64::ORRWrs &&
10112	MI.getOperand(i: `1`).getReg() == AArch64::WZR &&
10113	MI.getOperand(i: `3`).getImm() == `0x0`) \|\|
10114	(MI.getOpcode() == AArch64::ORRWrr &&
10115	MI.getOperand(i: `1`).getReg() == AArch64::WZR)) &&
10116	// Check that the w->w move is not a zero-extending w->x mov.
10117	(!MI.getOperand(i: `0`).getReg().isVirtual() \|\|
10118	MI.getOperand(i: `0`).getSubReg() == `0`) &&
10119	(!MI.getOperand(i: `0`).getReg().isPhysical() \|\|
10120	MI.findRegisterDefOperandIdx(Reg: getXRegFromWReg(Reg: MI.getOperand(i: `0`).getReg()),
10121	/TRI=/nullptr) == -`1`))
10122	return DestSourcePair {MI.getOperand(i: `0`), MI.getOperand(i: `2`)};
10123
10124	if (MI.getOpcode() == AArch64::ORRXrs &&
10125	MI.getOperand(i: `1`).getReg() == AArch64::XZR &&
10126	MI.getOperand(i: `3`).getImm() == `0x0`)
10127	return DestSourcePair {MI.getOperand(i: `0`), MI.getOperand(i: `2`)};
10128
10129	return std::nullopt;
10130	}
10131
10132	std::optional<DestSourcePair>
10133	AArch64InstrInfo::isCopyLikeInstrImpl(const MachineInstr &MI) const {
10134	if ((MI.getOpcode() == AArch64::ORRWrs &&
10135	MI.getOperand(i: `1`).getReg() == AArch64::WZR &&
10136	MI.getOperand(i: `3`).getImm() == `0x0`) \|\|
10137	(MI.getOpcode() == AArch64::ORRWrr &&
10138	MI.getOperand(i: `1`).getReg() == AArch64::WZR))
10139	return DestSourcePair {MI.getOperand(i: `0`), MI.getOperand(i: `2`)};
10140	return std::nullopt;
10141	}
10142
10143	std::optional<RegImmPair>
10144	AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {
10145	int Sign = `1`;
10146	int64_t Offset = `0`;
10147
10148	// TODO: Handle cases where Reg is a super- or sub-register of the
10149	// destination register.
10150	const MachineOperand &Op0 = MI.getOperand(i: `0`);
10151	if (!Op0.isReg() \|\| Reg != Op0.getReg())
10152	return std::nullopt;
10153
10154	switch (MI.getOpcode()) {
10155	default:
10156	return std::nullopt;
10157	case AArch64::SUBWri:
10158	case AArch64::SUBXri:
10159	case AArch64::SUBSWri:
10160	case AArch64::SUBSXri:
10161	Sign *= -`1`;
10162	[[fallthrough]];
10163	case AArch64::ADDSWri:
10164	case AArch64::ADDSXri:
10165	case AArch64::ADDWri:
10166	case AArch64::ADDXri: {
10167	// TODO: Third operand can be global address (usually some string).
10168	if (!MI.getOperand(i: `0`).isReg() \|\| !MI.getOperand(i: `1`).isReg() \|\|
10169	!MI.getOperand(i: `2`).isImm())
10170	return std::nullopt;
10171	int Shift = MI.getOperand(i: `3`).getImm();
10172	assert((Shift == `0` \|\| Shift == `12`) && "Shift can be either 0 or 12");
10173	Offset = Sign * (MI.getOperand(i: `2`).getImm() << Shift);
10174	}
10175	}
10176	return RegImmPair {MI.getOperand(i: `1`).getReg(), Offset};
10177	}
10178
10179	/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
10180	/// the destination register then, if possible, describe the value in terms of
10181	/// the source register.
10182	static std::optional<ParamLoadedValue>
10183	describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg,
10184	const TargetInstrInfo *TII,
10185	const TargetRegisterInfo *TRI) {
10186	auto DestSrc = TII->isCopyLikeInstr(MI);
10187	if (!DestSrc)
10188	return std::nullopt;
10189
10190	Register DestReg = DestSrc ->Destination->getReg();
10191	Register SrcReg = DestSrc ->Source->getReg();
10192
10193	auto Expr = DIExpression::get(Context&: MI.getMF()->getFunction().getContext(), Elements: {});
10194
10195	// If the described register is the destination, just return the source.
10196	if (DestReg == DescribedReg)
10197	return ParamLoadedValue (MachineOperand::CreateReg(Reg: SrcReg, isDef: false), Expr);
10198
10199	// ORRWrs zero-extends to 64-bits, so we need to consider such cases.
10200	if (MI.getOpcode() == AArch64::ORRWrs &&
10201	TRI->isSuperRegister(RegA: DestReg, RegB: DescribedReg))
10202	return ParamLoadedValue (MachineOperand::CreateReg(Reg: SrcReg, isDef: false), Expr);
10203
10204	// We may need to describe the lower part of a ORRXrs move.
10205	if (MI.getOpcode() == AArch64::ORRXrs &&
10206	TRI->isSubRegister(RegA: DestReg, RegB: DescribedReg)) {
10207	Register SrcSubReg = TRI->getSubReg(Reg: SrcReg, Idx: AArch64::sub_32);
10208	return ParamLoadedValue (MachineOperand::CreateReg(Reg: SrcSubReg, isDef: false), Expr);
10209	}
10210
10211	assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
10212	"Unhandled ORR[XW]rs copy case");
10213
10214	return std::nullopt;
10215	}
10216
10217	bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {
10218	// Functions cannot be split to different sections on AArch64 if they have
10219	// a red zone. This is because relaxing a cross-section branch may require
10220	// incrementing the stack pointer to spill a register, which would overwrite
10221	// the red zone.
10222	if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(u: true))
10223	return false;
10224
10225	return TargetInstrInfo::isFunctionSafeToSplit(MF);
10226	}
10227
10228	bool AArch64InstrInfo::isMBBSafeToSplitToCold(
10229	const MachineBasicBlock &MBB) const {
10230	// Asm Goto blocks can contain conditional branches to goto labels, which can
10231	// get moved out of range of the branch instruction.
10232	auto isAsmGoto = [](const MachineInstr &MI) {
10233	return MI.getOpcode() == AArch64::INLINEASM_BR;
10234	};
10235	if (llvm::any_of(Range: MBB, P: isAsmGoto) \|\| MBB.isInlineAsmBrIndirectTarget())
10236	return false;
10237
10238	// Because jump tables are label-relative instead of table-relative, they all
10239	// must be in the same section or relocation fixup handling will fail.
10240
10241	// Check if MBB is a jump table target
10242	const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo();
10243	auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
10244	return llvm::is_contained(Range: JTE.MBBs, Element: &MBB);
10245	};
10246	if (MJTI != nullptr && llvm::any_of(Range: MJTI->getJumpTables(), P: containsMBB))
10247	return false;
10248
10249	// Check if MBB contains a jump table lookup
10250	for (const MachineInstr &MI : MBB) {
10251	switch (MI.getOpcode()) {
10252	case TargetOpcode::G_BRJT:
10253	case AArch64::JumpTableDest32:
10254	case AArch64::JumpTableDest16:
10255	case AArch64::JumpTableDest8:
10256	return false;
10257	default:
10258	continue;
10259	}
10260	}
10261
10262	// MBB isn't a special case, so it's safe to be split to the cold section.
10263	return true;
10264	}
10265
10266	std::optional<ParamLoadedValue>
10267	AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
10268	Register Reg) const {
10269	const MachineFunction *MF = MI.getMF();
10270	const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
10271	switch (MI.getOpcode()) {
10272	case AArch64::MOVZWi:
10273	case AArch64::MOVZXi: {
10274	// MOVZWi may be used for producing zero-extended 32-bit immediates in
10275	// 64-bit parameters, so we need to consider super-registers.
10276	if (!TRI->isSuperRegisterEq(RegA: MI.getOperand(i: `0`).getReg(), RegB: Reg))
10277	return std::nullopt;
10278
10279	if (!MI.getOperand(i: `1`).isImm())
10280	return std::nullopt;
10281	int64_t Immediate = MI.getOperand(i: `1`).getImm();
10282	int Shift = MI.getOperand(i: `2`).getImm();
10283	return ParamLoadedValue (MachineOperand::CreateImm(Val: Immediate << Shift),
10284	nullptr);
10285	}
10286	case AArch64::ORRWrs:
10287	case AArch64::ORRXrs:
10288	return describeORRLoadedValue(MI, DescribedReg: Reg, TII: this, TRI);
10289	}
10290
10291	return TargetInstrInfo::describeLoadedValue(MI, Reg);
10292	}
10293
10294	bool AArch64InstrInfo::isExtendLikelyToBeFolded(
10295	MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
10296	assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT \|\|
10297	ExtMI.getOpcode() == TargetOpcode::G_ZEXT \|\|
10298	ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
10299
10300	// Anyexts are nops.
10301	if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
10302	return true;
10303
10304	Register DefReg = ExtMI.getOperand(i: `0`).getReg();
10305	if (!MRI.hasOneNonDBGUse(RegNo: DefReg))
10306	return false;
10307
10308	// It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
10309	// addressing mode.
10310	auto UserMI = &MRI.use_instr_nodbg_begin(RegNo: DefReg);
10311	return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
10312	}
10313
10314	uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
10315	return get(Opcode: Opc).TSFlags & AArch64::ElementSizeMask;
10316	}
10317
10318	bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
10319	return get(Opcode: Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
10320	}
10321
10322	bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
10323	return get(Opcode: Opc).TSFlags & AArch64::InstrFlagIsWhile;
10324	}
10325
10326	unsigned int
10327	AArch64InstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
10328	return OptLevel >= CodeGenOptLevel::Aggressive ? `6` : `2`;
10329	}
10330
10331	bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
10332	unsigned Scale) const {
10333	if (Offset && Scale)
10334	return false;
10335
10336	// Check Reg + Imm
10337	if (!Scale) {
10338	// 9-bit signed offset
10339	if (isInt<`9`>(x: Offset))
10340	return true;
10341
10342	// 12-bit unsigned offset
10343	unsigned Shift = Log2_64(Value: NumBytes);
10344	if (NumBytes && Offset > `0` && (Offset / NumBytes) <= (`1LL` << `12`) - `1` &&
10345	// Must be a multiple of NumBytes (NumBytes is a power of 2)
10346	(Offset >> Shift) << Shift == Offset)
10347	return true;
10348	return false;
10349	}
10350
10351	// Check reg1 + SIZE_IN_BYTES reg2 and reg1 + reg2*
10352	return Scale == `1` \|\| (Scale > `0` && Scale == NumBytes);
10353	}
10354
10355	unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
10356	if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
10357	return AArch64::BLRNoIP;
10358	else
10359	return AArch64::BLR;
10360	}
10361
10362	MachineBasicBlock::iterator
10363	AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
10364	Register TargetReg, bool FrameSetup) const {
10365	assert(TargetReg != AArch64::SP && "New top of stack cannot already be in SP");
10366
10367	MachineBasicBlock &MBB = *MBBI ->getParent();
10368	MachineFunction &MF = *MBB.getParent();
10369	const AArch64InstrInfo *TII =
10370	MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
10371	int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
10372	DebugLoc DL = MBB.findDebugLoc(MBBI);
10373
10374	MachineFunction::iterator MBBInsertPoint = std::next(x: MBB.getIterator());
10375	MachineBasicBlock *LoopTestMBB =
10376	MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock());
10377	MF.insert(MBBI: MBBInsertPoint, MBB: LoopTestMBB);
10378	MachineBasicBlock *LoopBodyMBB =
10379	MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock());
10380	MF.insert(MBBI: MBBInsertPoint, MBB: LoopBodyMBB);
10381	MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock());
10382	MF.insert(MBBI: MBBInsertPoint, MBB: ExitMBB);
10383	MachineInstr::MIFlag Flags =
10384	FrameSetup ? MachineInstr::FrameSetup : MachineInstr::NoFlags;
10385
10386	// LoopTest:
10387	// SUB SP, SP, #ProbeSize
10388	emitFrameOffset(MBB&: *LoopTestMBB, MBBI: LoopTestMBB->end(), DL, DestReg: AArch64::SP,
10389	SrcReg: AArch64::SP, Offset: StackOffset::getFixed(Fixed: -ProbeSize), TII, Flag: Flags);
10390
10391	// CMP SP, TargetReg
10392	BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL, MCID: TII->get(Opcode: AArch64::SUBSXrx64),
10393	DestReg: AArch64::XZR)
10394	.addReg(RegNo: AArch64::SP)
10395	.addReg(RegNo: TargetReg)
10396	.addImm(Val: AArch64_AM::getArithExtendImm(ET: AArch64_AM::UXTX, Imm: `0`))
10397	.setMIFlags(Flags);
10398
10399	// B.<Cond> LoopExit
10400	BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL, MCID: TII->get(Opcode: AArch64::Bcc))
10401	.addImm(Val: AArch64CC::LE)
10402	.addMBB(MBB: ExitMBB)
10403	.setMIFlags(Flags);
10404
10405	// STR XZR, [SP]
10406	BuildMI(BB&: *LoopBodyMBB, I: LoopBodyMBB->end(), MIMD: DL, MCID: TII->get(Opcode: AArch64::STRXui))
10407	.addReg(RegNo: AArch64::XZR)
10408	.addReg(RegNo: AArch64::SP)
10409	.addImm(Val: `0`)
10410	.setMIFlags(Flags);
10411
10412	// B loop
10413	BuildMI(BB&: *LoopBodyMBB, I: LoopBodyMBB->end(), MIMD: DL, MCID: TII->get(Opcode: AArch64::B))
10414	.addMBB(MBB: LoopTestMBB)
10415	.setMIFlags(Flags);
10416
10417	// LoopExit:
10418	// MOV SP, TargetReg
10419	BuildMI(BB&: *ExitMBB, I: ExitMBB->end(), MIMD: DL, MCID: TII->get(Opcode: AArch64::ADDXri), DestReg: AArch64::SP)
10420	.addReg(RegNo: TargetReg)
10421	.addImm(Val: `0`)
10422	.addImm(Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: `0`))
10423	.setMIFlags(Flags);
10424
10425	// LDR XZR, [SP]
10426	BuildMI(BB&: *ExitMBB, I: ExitMBB->end(), MIMD: DL, MCID: TII->get(Opcode: AArch64::LDRXui))
10427	.addReg(RegNo: AArch64::XZR, flags: RegState::Define)
10428	.addReg(RegNo: AArch64::SP)
10429	.addImm(Val: `0`)
10430	.setMIFlags(Flags);
10431
10432	ExitMBB->splice(Where: ExitMBB->end(), Other: &MBB, From: std::next(x: MBBI), To: MBB.end());
10433	ExitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB);
10434
10435	LoopTestMBB->addSuccessor(Succ: ExitMBB);
10436	LoopTestMBB->addSuccessor(Succ: LoopBodyMBB);
10437	LoopBodyMBB->addSuccessor(Succ: LoopTestMBB);
10438	MBB.addSuccessor(Succ: LoopTestMBB);
10439
10440	// Update liveins.
10441	if (MF.getRegInfo().reservedRegsFrozen())
10442	fullyRecomputeLiveIns(MBBs: {ExitMBB, LoopBodyMBB, LoopTestMBB});
10443
10444	return ExitMBB->begin();
10445	}
10446
10447	namespace {
10448	class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
10449	MachineFunction *MF;
10450	const TargetInstrInfo *TII;
10451	const TargetRegisterInfo *TRI;
10452	MachineRegisterInfo &MRI;
10453
10454	/// The block of the loop
10455	MachineBasicBlock *LoopBB;
10456	/// The conditional branch of the loop
10457	MachineInstr *CondBranch;
10458	/// The compare instruction for loop control
10459	MachineInstr *Comp;
10460	/// The number of the operand of the loop counter value in Comp
10461	unsigned CompCounterOprNum;
10462	/// The instruction that updates the loop counter value
10463	MachineInstr *Update;
10464	/// The number of the operand of the loop counter value in Update
10465	unsigned UpdateCounterOprNum;
10466	/// The initial value of the loop counter
10467	Register Init;
10468	/// True iff Update is a predecessor of Comp
10469	bool IsUpdatePriorComp;
10470
10471	/// The normalized condition used by createTripCountGreaterCondition()
10472	SmallVector<MachineOperand, `4`> Cond;
10473
10474	public:
10475	AArch64PipelinerLoopInfo(MachineBasicBlock LoopBB, MachineInstr CondBranch,
10476	MachineInstr Comp, unsigned* CompCounterOprNum,
10477	MachineInstr Update, unsigned* UpdateCounterOprNum,
10478	Register Init, bool IsUpdatePriorComp,
10479	const SmallVectorImpl<MachineOperand> &Cond)
10480	: MF(Comp->getParent()->getParent()),
10481	TII(MF->getSubtarget().getInstrInfo()),
10482	TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
10483	LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
10484	CompCounterOprNum(CompCounterOprNum), Update(Update),
10485	UpdateCounterOprNum(UpdateCounterOprNum), Init (Init),
10486	IsUpdatePriorComp(IsUpdatePriorComp), Cond (Cond.begin(), Cond.end()) {}
10487
10488	bool shouldIgnoreForPipelining(const MachineInstr MI) const* override {
10489	// Make the instructions for loop control be placed in stage 0.
10490	// The predecessors of Comp are considered by the caller.
10491	return MI == Comp;
10492	}
10493
10494	std::optional<bool> createTripCountGreaterCondition(
10495	int TC, MachineBasicBlock &MBB,
10496	SmallVectorImpl<MachineOperand> &CondParam) override {
10497	// A branch instruction will be inserted as "if (Cond) goto epilogue".
10498	// Cond is normalized for such use.
10499	// The predecessors of the branch are assumed to have already been inserted.
10500	CondParam = Cond;
10501	return {};
10502	}
10503
10504	void createRemainingIterationsGreaterCondition(
10505	int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
10506	DenseMap<MachineInstr , MachineInstr > &LastStage0Insts) override;
10507
10508	void setPreheader(MachineBasicBlock *NewPreheader) override {}
10509
10510	void adjustTripCount(int TripCountAdjust) override {}
10511
10512	bool isMVEExpanderSupported() override { return true; }
10513	};
10514	} // namespace
10515
10516	/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
10517	/// is replaced by ReplaceReg. The output register is newly created.
10518	/// The other operands are unchanged from MI.
10519	static Register cloneInstr(const MachineInstr MI, unsigned* ReplaceOprNum,
10520	Register ReplaceReg, MachineBasicBlock &MBB,
10521	MachineBasicBlock::iterator InsertTo) {
10522	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
10523	const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
10524	const TargetRegisterInfo *TRI =
10525	MBB.getParent()->getSubtarget().getRegisterInfo();
10526	MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(Orig: MI);
10527	Register Result = `0`;
10528	for (unsigned I = `0`; I < NewMI->getNumOperands(); ++I) {
10529	if (I == `0` && NewMI->getOperand(i: `0`).getReg().isVirtual()) {
10530	Result = MRI.createVirtualRegister(
10531	RegClass: MRI.getRegClass(Reg: NewMI->getOperand(i: `0`).getReg()));
10532	NewMI->getOperand(i: I).setReg(Result);
10533	} else if (I == ReplaceOprNum) {
10534	MRI.constrainRegClass(
10535	Reg: ReplaceReg,
10536	RC: TII->getRegClass(MCID: NewMI->getDesc(), OpNum: I, TRI, MF: *MBB.getParent()));
10537	NewMI->getOperand(i: I).setReg(ReplaceReg);
10538	}
10539	}
10540	MBB.insert(I: InsertTo, MI: NewMI);
10541	return Result;
10542	}
10543
10544	void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
10545	int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
10546	DenseMap<MachineInstr , MachineInstr > &LastStage0Insts) {
10547	// Create and accumulate conditions for next TC iterations.
10548	// Example:
10549	// SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
10550	// # iteration of the kernel
10551	//
10552	// # insert the following instructions
10553	// cond = CSINCXr 0, 0, C, implicit $nzcv
10554	// counter = ADDXri counter, 1 # clone from this->Update
10555	// SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
10556	// cond = CSINCXr cond, cond, C, implicit $nzcv
10557	// ... (repeat TC times)
10558	// SUBSXri cond, 0, implicit-def $nzcv
10559
10560	assert(CondBranch->getOpcode() == AArch64::Bcc);
10561	// CondCode to exit the loop
10562	AArch64CC::CondCode CC =
10563	(AArch64CC::CondCode)CondBranch->getOperand(i: `0`).getImm();
10564	if (CondBranch->getOperand(i: `1`).getMBB() == LoopBB)
10565	CC = AArch64CC::getInvertedCondCode(Code: CC);
10566
10567	// Accumulate conditions to exit the loop
10568	Register AccCond = AArch64::XZR;
10569
10570	// If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
10571	auto AccumulateCond = [&](Register CurCond,
10572	AArch64CC::CondCode CC) -> Register {
10573	Register NewCond = MRI.createVirtualRegister(RegClass: &AArch64::GPR64commonRegClass);
10574	BuildMI(BB&: MBB, I: MBB.end(), MIMD: Comp->getDebugLoc(), MCID: TII->get(Opcode: AArch64::CSINCXr))
10575	.addReg(RegNo: NewCond, flags: RegState::Define)
10576	.addReg(RegNo: CurCond)
10577	.addReg(RegNo: CurCond)
10578	.addImm(Val: AArch64CC::getInvertedCondCode(Code: CC));
10579	return NewCond;
10580	};
10581
10582	if (!LastStage0Insts.empty() && LastStage0Insts [Comp]->getParent() == &MBB) {
10583	// Update and Comp for I==0 are already exists in MBB
10584	// (MBB is an unrolled kernel)
10585	Register Counter;
10586	for (int I = `0`; I <= TC; ++I) {
10587	Register NextCounter;
10588	if (I != `0`)
10589	NextCounter =
10590	cloneInstr(MI: Comp, ReplaceOprNum: CompCounterOprNum, ReplaceReg: Counter, MBB, InsertTo: MBB.end());
10591
10592	AccCond = AccumulateCond (AccCond, CC);
10593
10594	if (I != TC) {
10595	if (I == `0`) {
10596	if (Update != Comp && IsUpdatePriorComp) {
10597	Counter =
10598	LastStage0Insts [Comp]->getOperand(i: CompCounterOprNum).getReg();
10599	NextCounter = cloneInstr(MI: Update, ReplaceOprNum: UpdateCounterOprNum, ReplaceReg: Counter, MBB,
10600	InsertTo: MBB.end());
10601	} else {
10602	// can use already calculated value
10603	NextCounter = LastStage0Insts [Update]->getOperand(i: `0`).getReg();
10604	}
10605	} else if (Update != Comp) {
10606	NextCounter =
10607	cloneInstr(MI: Update, ReplaceOprNum: UpdateCounterOprNum, ReplaceReg: Counter, MBB, InsertTo: MBB.end());
10608	}
10609	}
10610	Counter = NextCounter;
10611	}
10612	} else {
10613	Register Counter;
10614	if (LastStage0Insts.empty()) {
10615	// use initial counter value (testing if the trip count is sufficient to
10616	// be executed by pipelined code)
10617	Counter = Init;
10618	if (IsUpdatePriorComp)
10619	Counter =
10620	cloneInstr(MI: Update, ReplaceOprNum: UpdateCounterOprNum, ReplaceReg: Counter, MBB, InsertTo: MBB.end());
10621	} else {
10622	// MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
10623	Counter = LastStage0Insts [Comp]->getOperand(i: CompCounterOprNum).getReg();
10624	}
10625
10626	for (int I = `0`; I <= TC; ++I) {
10627	Register NextCounter;
10628	NextCounter =
10629	cloneInstr(MI: Comp, ReplaceOprNum: CompCounterOprNum, ReplaceReg: Counter, MBB, InsertTo: MBB.end());
10630	AccCond = AccumulateCond (AccCond, CC);
10631	if (I != TC && Update != Comp)
10632	NextCounter =
10633	cloneInstr(MI: Update, ReplaceOprNum: UpdateCounterOprNum, ReplaceReg: Counter, MBB, InsertTo: MBB.end());
10634	Counter = NextCounter;
10635	}
10636	}
10637
10638	// If AccCond == 0, the remainder is greater than TC.
10639	BuildMI(BB&: MBB, I: MBB.end(), MIMD: Comp->getDebugLoc(), MCID: TII->get(Opcode: AArch64::SUBSXri))
10640	.addReg(RegNo: AArch64::XZR, flags: RegState::Define \| RegState::Dead)
10641	.addReg(RegNo: AccCond)
10642	.addImm(Val: `0`)
10643	.addImm(Val: `0`);
10644	Cond.clear();
10645	Cond.push_back(Elt: MachineOperand::CreateImm(Val: AArch64CC::EQ));
10646	}
10647
10648	static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
10649	Register &RegMBB, Register &RegOther) {
10650	assert(Phi.getNumOperands() == `5`);
10651	if (Phi.getOperand(i: `2`).getMBB() == MBB) {
10652	RegMBB = Phi.getOperand(i: `1`).getReg();
10653	RegOther = Phi.getOperand(i: `3`).getReg();
10654	} else {
10655	assert(Phi.getOperand(`4`).getMBB() == MBB);
10656	RegMBB = Phi.getOperand(i: `3`).getReg();
10657	RegOther = Phi.getOperand(i: `1`).getReg();
10658	}
10659	}
10660
10661	static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB) {
10662	if (!Reg.isVirtual())
10663	return false;
10664	const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
10665	return MRI.getVRegDef(Reg)->getParent() != BB;
10666	}
10667
10668	/// If Reg is an induction variable, return true and set some parameters
10669	static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
10670	MachineInstr *&UpdateInst,
10671	unsigned &UpdateCounterOprNum, Register &InitReg,
10672	bool &IsUpdatePriorComp) {
10673	// Example:
10674	//
10675	// Preheader:
10676	// InitReg = ...
10677	// LoopBB:
10678	// Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
10679	// Reg = COPY Reg0 ; COPY is ignored.
10680	// Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
10681	// ; Reg is the value calculated in the previous
10682	// ; iteration, so IsUpdatePriorComp == false.
10683
10684	if (LoopBB->pred_size() != `2`)
10685	return false;
10686	if (!Reg.isVirtual())
10687	return false;
10688	const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
10689	UpdateInst = nullptr;
10690	UpdateCounterOprNum = `0`;
10691	InitReg = `0`;
10692	IsUpdatePriorComp = true;
10693	Register CurReg = Reg;
10694	while (true) {
10695	MachineInstr *Def = MRI.getVRegDef(Reg: CurReg);
10696	if (Def->getParent() != LoopBB)
10697	return false;
10698	if (Def->isCopy()) {
10699	// Ignore copy instructions unless they contain subregisters
10700	if (Def->getOperand(i: `0`).getSubReg() \|\| Def->getOperand(i: `1`).getSubReg())
10701	return false;
10702	CurReg = Def->getOperand(i: `1`).getReg();
10703	} else if (Def->isPHI()) {
10704	if (InitReg != `0`)
10705	return false;
10706	if (!UpdateInst)
10707	IsUpdatePriorComp = false;
10708	extractPhiReg(Phi: *Def, MBB: LoopBB, RegMBB&: CurReg, RegOther&: InitReg);
10709	} else {
10710	if (UpdateInst)
10711	return false;
10712	switch (Def->getOpcode()) {
10713	case AArch64::ADDSXri:
10714	case AArch64::ADDSWri:
10715	case AArch64::SUBSXri:
10716	case AArch64::SUBSWri:
10717	case AArch64::ADDXri:
10718	case AArch64::ADDWri:
10719	case AArch64::SUBXri:
10720	case AArch64::SUBWri:
10721	UpdateInst = Def;
10722	UpdateCounterOprNum = `1`;
10723	break;
10724	case AArch64::ADDSXrr:
10725	case AArch64::ADDSWrr:
10726	case AArch64::SUBSXrr:
10727	case AArch64::SUBSWrr:
10728	case AArch64::ADDXrr:
10729	case AArch64::ADDWrr:
10730	case AArch64::SUBXrr:
10731	case AArch64::SUBWrr:
10732	UpdateInst = Def;
10733	if (isDefinedOutside(Reg: Def->getOperand(i: `2`).getReg(), BB: LoopBB))
10734	UpdateCounterOprNum = `1`;
10735	else if (isDefinedOutside(Reg: Def->getOperand(i: `1`).getReg(), BB: LoopBB))
10736	UpdateCounterOprNum = `2`;
10737	else
10738	return false;
10739	break;
10740	default:
10741	return false;
10742	}
10743	CurReg = Def->getOperand(i: UpdateCounterOprNum).getReg();
10744	}
10745
10746	if (!CurReg.isVirtual())
10747	return false;
10748	if (Reg == CurReg)
10749	break;
10750	}
10751
10752	if (!UpdateInst)
10753	return false;
10754
10755	return true;
10756	}
10757
10758	std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
10759	AArch64InstrInfo::analyzeLoopForPipelining(MachineBasicBlock LoopBB) const* {
10760	// Accept loops that meet the following conditions
10761	// The conditional branch is BCC*
10762	// The compare instruction is ADDS/SUBS/WHILEXX*
10763	// One operand of the compare is an induction variable and the other is a*
10764	// loop invariant value
10765	// The induction variable is incremented/decremented by a single instruction*
10766	// Does not contain CALL or instructions which have unmodeled side effects*
10767
10768	for (MachineInstr &MI : *LoopBB)
10769	if (MI.isCall() \|\| MI.hasUnmodeledSideEffects())
10770	// This instruction may use NZCV, which interferes with the instruction to
10771	// be inserted for loop control.
10772	return nullptr;
10773
10774	MachineBasicBlock TBB = nullptr, FBB = nullptr;
10775	SmallVector<MachineOperand, `4`> Cond;
10776	if (analyzeBranch(MBB&: *LoopBB, TBB, FBB, Cond))
10777	return nullptr;
10778
10779	// Infinite loops are not supported
10780	if (TBB == LoopBB && FBB == LoopBB)
10781	return nullptr;
10782
10783	// Must be conditional branch
10784	if (TBB != LoopBB && FBB == nullptr)
10785	return nullptr;
10786
10787	assert((TBB == LoopBB \|\| FBB == LoopBB) &&
10788	"The Loop must be a single-basic-block loop");
10789
10790	MachineInstr CondBranch = &LoopBB->getFirstTerminator();
10791	const TargetRegisterInfo &TRI = getRegisterInfo();
10792
10793	if (CondBranch->getOpcode() != AArch64::Bcc)
10794	return nullptr;
10795
10796	// Normalization for createTripCountGreaterCondition()
10797	if (TBB == LoopBB)
10798	reverseBranchCondition(Cond);
10799
10800	MachineInstr Comp = nullptr*;
10801	unsigned CompCounterOprNum = `0`;
10802	for (MachineInstr &MI : reverse(C&: *LoopBB)) {
10803	if (MI.modifiesRegister(Reg: AArch64::NZCV, TRI: &TRI)) {
10804	// Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
10805	// operands is a loop invariant value
10806
10807	switch (MI.getOpcode()) {
10808	case AArch64::SUBSXri:
10809	case AArch64::SUBSWri:
10810	case AArch64::ADDSXri:
10811	case AArch64::ADDSWri:
10812	Comp = &MI;
10813	CompCounterOprNum = `1`;
10814	break;
10815	case AArch64::ADDSWrr:
10816	case AArch64::ADDSXrr:
10817	case AArch64::SUBSWrr:
10818	case AArch64::SUBSXrr:
10819	Comp = &MI;
10820	break;
10821	default:
10822	if (isWhileOpcode(Opc: MI.getOpcode())) {
10823	Comp = &MI;
10824	break;
10825	}
10826	return nullptr;
10827	}
10828
10829	if (CompCounterOprNum == `0`) {
10830	if (isDefinedOutside(Reg: Comp->getOperand(i: `1`).getReg(), BB: LoopBB))
10831	CompCounterOprNum = `2`;
10832	else if (isDefinedOutside(Reg: Comp->getOperand(i: `2`).getReg(), BB: LoopBB))
10833	CompCounterOprNum = `1`;
10834	else
10835	return nullptr;
10836	}
10837	break;
10838	}
10839	}
10840	if (!Comp)
10841	return nullptr;
10842
10843	MachineInstr Update = nullptr*;
10844	Register Init;
10845	bool IsUpdatePriorComp;
10846	unsigned UpdateCounterOprNum;
10847	if (!getIndVarInfo(Reg: Comp->getOperand(i: CompCounterOprNum).getReg(), LoopBB,
10848	UpdateInst&: Update, UpdateCounterOprNum, InitReg&: Init, IsUpdatePriorComp))
10849	return nullptr;
10850
10851	return std::make_unique<AArch64PipelinerLoopInfo>(
10852	args&: LoopBB, args&: CondBranch, args&: Comp, args&: CompCounterOprNum, args&: Update, args&: UpdateCounterOprNum,
10853	args&: Init, args&: IsUpdatePriorComp, args&: Cond);
10854	}
10855
10856	/// verifyInstruction - Perform target specific instruction verification.
10857	bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI,
10858	StringRef &ErrInfo) const {
10859
10860	// Verify that immediate offsets on load/store instructions are within range.
10861	// Stack objects with an FI operand are excluded as they can be fixed up
10862	// during PEI.
10863	TypeSize Scale(`0U`, false), Width(`0U`, false);
10864	int64_t MinOffset, MaxOffset;
10865	if (getMemOpInfo(Opcode: MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) {
10866	unsigned ImmIdx = getLoadStoreImmIdx(Opc: MI.getOpcode());
10867	if (MI.getOperand(i: ImmIdx).isImm() && !MI.getOperand(i: ImmIdx - `1`).isFI()) {
10868	int64_t Imm = MI.getOperand(i: ImmIdx).getImm();
10869	if (Imm < MinOffset \|\| Imm > MaxOffset) {
10870	ErrInfo = "Unexpected immediate on load/store instruction";
10871	return false;
10872	}
10873	}
10874	}
10875	return true;
10876	}
10877
10878	#define GET_INSTRINFO_HELPERS
10879	#define GET_INSTRMAP_INFO
10880	#include "AArch64GenInstrInfo.inc"
10881

Browse the source code of llvm_projects/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp