AArch64ISelDAGToDAG.cpp source code [llvm_projects/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp]

1	//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines an instruction selector for the AArch64 target.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "AArch64MachineFunctionInfo.h"
14	#include "AArch64TargetMachine.h"
15	#include "MCTargetDesc/AArch64AddressingModes.h"
16	#include "llvm/ADT/APSInt.h"
17	#include "llvm/CodeGen/ISDOpcodes.h"
18	#include "llvm/CodeGen/SelectionDAGISel.h"
19	#include "llvm/IR/Function.h" // To access function attributes.
20	#include "llvm/IR/GlobalValue.h"
21	#include "llvm/IR/Intrinsics.h"
22	#include "llvm/IR/IntrinsicsAArch64.h"
23	#include "llvm/Support/Debug.h"
24	#include "llvm/Support/ErrorHandling.h"
25	#include "llvm/Support/KnownBits.h"
26	#include "llvm/Support/MathExtras.h"
27	#include "llvm/Support/raw_ostream.h"
28
29	using namespace llvm;
30
31	#define DEBUG_TYPE "aarch64-isel"
32	#define PASS_NAME "AArch64 Instruction Selection"
33
34	// https://github.com/llvm/llvm-project/issues/114425
35	#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36	#pragma inline_depth(0)
37	#endif
38
39	//===--------------------------------------------------------------------===//
40	/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41	/// instructions for SelectionDAG operations.
42	///
43	namespace {
44
45	class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47	/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48	/// make the right decision when generating code for different targets.
49	const AArch64Subtarget *Subtarget;
50
51	public:
52	AArch64DAGToDAGISel() = delete;
53
54	explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55	CodeGenOptLevel OptLevel)
56	: SelectionDAGISel (tm, OptLevel), Subtarget(nullptr) {}
57
58	bool runOnMachineFunction(MachineFunction &MF) override {
59	Subtarget = &MF.getSubtarget<AArch64Subtarget>();
60	return SelectionDAGISel::runOnMachineFunction(mf&: MF);
61	}
62
63	void Select(SDNode *Node) override;
64	void PreprocessISelDAG() override;
65
66	/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
67	/// inline asm expressions.
68	bool SelectInlineAsmMemoryOperand(const SDValue &Op,
69	InlineAsm::ConstraintCode ConstraintID,
70	std::vector<SDValue> &OutOps) override;
71
72	template <signed Low, signed High, signed Scale>
73	bool SelectRDVLImm(SDValue N, SDValue &Imm);
74
75	template <signed Low, signed High>
76	bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
77
78	bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
79	bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
80	bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
81	bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
82	bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
83	return SelectShiftedRegister(N, AllowROR: false, Reg, Shift);
84	}
85	bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
86	return SelectShiftedRegister(N, AllowROR: true, Reg, Shift);
87	}
88	bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
89	return SelectAddrModeIndexed7S(N, Size: `1`, Base, OffImm);
90	}
91	bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
92	return SelectAddrModeIndexed7S(N, Size: `2`, Base, OffImm);
93	}
94	bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
95	return SelectAddrModeIndexed7S(N, Size: `4`, Base, OffImm);
96	}
97	bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
98	return SelectAddrModeIndexed7S(N, Size: `8`, Base, OffImm);
99	}
100	bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
101	return SelectAddrModeIndexed7S(N, Size: `16`, Base, OffImm);
102	}
103	bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
104	return SelectAddrModeIndexedBitWidth(N, IsSignedImm: true, BW: `9`, Size: `16`, Base, OffImm);
105	}
106	bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
107	return SelectAddrModeIndexedBitWidth(N, IsSignedImm: false, BW: `6`, Size: `16`, Base, OffImm);
108	}
109	bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
110	return SelectAddrModeIndexed(N, Size: `1`, Base, OffImm);
111	}
112	bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
113	return SelectAddrModeIndexed(N, Size: `2`, Base, OffImm);
114	}
115	bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
116	return SelectAddrModeIndexed(N, Size: `4`, Base, OffImm);
117	}
118	bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
119	return SelectAddrModeIndexed(N, Size: `8`, Base, OffImm);
120	}
121	bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
122	return SelectAddrModeIndexed(N, Size: `16`, Base, OffImm);
123	}
124	bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
125	return SelectAddrModeUnscaled(N, Size: `1`, Base, OffImm);
126	}
127	bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
128	return SelectAddrModeUnscaled(N, Size: `2`, Base, OffImm);
129	}
130	bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
131	return SelectAddrModeUnscaled(N, Size: `4`, Base, OffImm);
132	}
133	bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
134	return SelectAddrModeUnscaled(N, Size: `8`, Base, OffImm);
135	}
136	bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
137	return SelectAddrModeUnscaled(N, Size: `16`, Base, OffImm);
138	}
139	template <unsigned Size, unsigned Max>
140	bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
141	// Test if there is an appropriate addressing mode and check if the
142	// immediate fits.
143	bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
144	if (Found) {
145	if (auto *CI = dyn_cast<ConstantSDNode>(Val&: OffImm)) {
146	int64_t C = CI->getSExtValue();
147	if (C <= Max)
148	return true;
149	}
150	}
151
152	// Otherwise, base only, materialize address in register.
153	Base = N;
154	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i64);
155	return true;
156	}
157
158	template<int Width>
159	bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
160	SDValue &SignExtend, SDValue &DoShift) {
161	return SelectAddrModeWRO(N, Size: Width / `8`, Base, Offset, SignExtend, DoShift);
162	}
163
164	template<int Width>
165	bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
166	SDValue &SignExtend, SDValue &DoShift) {
167	return SelectAddrModeXRO(N, Size: Width / `8`, Base, Offset, SignExtend, DoShift);
168	}
169
170	bool SelectExtractHigh(SDValue N, SDValue &Res) {
171	if (Subtarget->isLittleEndian() && N ->getOpcode() == ISD::BITCAST)
172	N = N ->getOperand(Num: `0`);
173	if (N ->getOpcode() != ISD::EXTRACT_SUBVECTOR \|\|
174	!isa<ConstantSDNode>(Val: N ->getOperand(Num: `1`)))
175	return false;
176	EVT VT = N ->getValueType(ResNo: `0`);
177	EVT LVT = N ->getOperand(Num: `0`).getValueType();
178	unsigned Index = N ->getConstantOperandVal(Num: `1`);
179	if (!VT.is64BitVector() \|\| !LVT.is128BitVector() \|\|
180	Index != VT.getVectorNumElements())
181	return false;
182	Res = N ->getOperand(Num: `0`);
183	return true;
184	}
185
186	bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
187	if (N.getOpcode() != AArch64ISD::VLSHR)
188	return false;
189	SDValue Op = N ->getOperand(Num: `0`);
190	EVT VT = Op.getValueType();
191	unsigned ShtAmt = N ->getConstantOperandVal(Num: `1`);
192	if (ShtAmt > VT.getScalarSizeInBits() / `2` \|\| Op.getOpcode() != ISD::ADD)
193	return false;
194
195	APInt Imm;
196	if (Op.getOperand(i: `1`).getOpcode() == AArch64ISD::MOVIshift)
197	Imm = APInt (VT.getScalarSizeInBits(),
198	Op.getOperand(i: `1`).getConstantOperandVal(i: `0`)
199	<< Op.getOperand(i: `1`).getConstantOperandVal(i: `1`));
200	else if (Op.getOperand(i: `1`).getOpcode() == AArch64ISD::DUP &&
201	isa<ConstantSDNode>(Val: Op.getOperand(i: `1`).getOperand(i: `0`)))
202	Imm = APInt (VT.getScalarSizeInBits(),
203	Op.getOperand(i: `1`).getConstantOperandVal(i: `0`));
204	else
205	return false;
206
207	if (Imm != `1ULL` << (ShtAmt - `1`))
208	return false;
209
210	Res1 = Op.getOperand(i: `0`);
211	Res2 = CurDAG->getTargetConstant(Val: ShtAmt, DL: SDLoc (N), VT: MVT::i32);
212	return true;
213	}
214
215	bool SelectDupZeroOrUndef(SDValue N) {
216	switch(N ->getOpcode()) {
217	case ISD::UNDEF:
218	return true;
219	case AArch64ISD::DUP:
220	case ISD::SPLAT_VECTOR: {
221	auto Opnd0 = N ->getOperand(Num: `0`);
222	if (isNullConstant(V: Opnd0))
223	return true;
224	if (isNullFPConstant(V: Opnd0))
225	return true;
226	break;
227	}
228	default:
229	break;
230	}
231
232	return false;
233	}
234
235	bool SelectAny(SDValue) { return true; }
236
237	bool SelectDupZero(SDValue N) {
238	switch(N ->getOpcode()) {
239	case AArch64ISD::DUP:
240	case ISD::SPLAT_VECTOR: {
241	auto Opnd0 = N ->getOperand(Num: `0`);
242	if (isNullConstant(V: Opnd0))
243	return true;
244	if (isNullFPConstant(V: Opnd0))
245	return true;
246	break;
247	}
248	}
249
250	return false;
251	}
252
253	template <MVT::SimpleValueType VT, bool Negate>
254	bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
255	return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
256	}
257
258	template <MVT::SimpleValueType VT, bool Negate>
259	bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
260	return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
261	}
262
263	template <MVT::SimpleValueType VT>
264	bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
265	return SelectSVECpyDupImm(N, VT, Imm, Shift);
266	}
267
268	template <MVT::SimpleValueType VT, bool Invert = false>
269	bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
270	return SelectSVELogicalImm(N, VT, Imm, Invert);
271	}
272
273	template <MVT::SimpleValueType VT>
274	bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
275	return SelectSVEArithImm(N, VT, Imm);
276	}
277
278	template <unsigned Low, unsigned High, bool AllowSaturation = false>
279	bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
280	return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
281	}
282
283	bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
284	if (N ->getOpcode() != ISD::SPLAT_VECTOR)
285	return false;
286
287	EVT EltVT = N ->getValueType(ResNo: `0`).getVectorElementType();
288	return SelectSVEShiftImm(N: N ->getOperand(Num: `0`), / Low / `1`,
289	/ High / EltVT.getFixedSizeInBits(),
290	/ AllowSaturation / true, Imm);
291	}
292
293	// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALEN.*
294	template<signed Min, signed Max, signed Scale, bool Shift>
295	bool SelectCntImm(SDValue N, SDValue &Imm) {
296	if (!isa<ConstantSDNode>(Val: N))
297	return false;
298
299	int64_t MulImm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
300	if (Shift)
301	MulImm = `1LL` << MulImm;
302
303	if ((MulImm % std::abs(x: Scale)) != `0`)
304	return false;
305
306	MulImm /= Scale;
307	if ((MulImm >= Min) && (MulImm <= Max)) {
308	Imm = CurDAG->getTargetConstant(Val: MulImm, DL: SDLoc (N), VT: MVT::i32);
309	return true;
310	}
311
312	return false;
313	}
314
315	template <signed Max, signed Scale>
316	bool SelectEXTImm(SDValue N, SDValue &Imm) {
317	if (!isa<ConstantSDNode>(Val: N))
318	return false;
319
320	int64_t MulImm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
321
322	if (MulImm >= `0` && MulImm <= Max) {
323	MulImm *= Scale;
324	Imm = CurDAG->getTargetConstant(Val: MulImm, DL: SDLoc (N), VT: MVT::i32);
325	return true;
326	}
327
328	return false;
329	}
330
331	template <unsigned BaseReg, unsigned Max>
332	bool ImmToReg(SDValue N, SDValue &Imm) {
333	if (auto *CI = dyn_cast<ConstantSDNode>(Val&: N)) {
334	uint64_t C = CI->getZExtValue();
335
336	if (C > Max)
337	return false;
338
339	Imm = CurDAG->getRegister(Reg: BaseReg + C, VT: MVT::Other);
340	return true;
341	}
342	return false;
343	}
344
345	/// Form sequences of consecutive 64/128-bit registers for use in NEON
346	/// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
347	/// between 1 and 4 elements. If it contains a single element that is returned
348	/// unchanged; otherwise a REG_SEQUENCE value is returned.
349	SDValue createDTuple(ArrayRef<SDValue> Vecs);
350	SDValue createQTuple(ArrayRef<SDValue> Vecs);
351	// Form a sequence of SVE registers for instructions using list of vectors,
352	// e.g. structured loads and stores (ldN, stN).
353	SDValue createZTuple(ArrayRef<SDValue> Vecs);
354
355	// Similar to above, except the register must start at a multiple of the
356	// tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
357	SDValue createZMulTuple(ArrayRef<SDValue> Regs);
358
359	/// Generic helper for the createDTuple/createQTuple
360	/// functions. Those should almost always be called instead.
361	SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
362	const unsigned SubRegs[]);
363
364	void SelectTable(SDNode N, unsigned* NumVecs, unsigned Opc, bool isExt);
365
366	bool tryIndexedLoad(SDNode *N);
367
368	void SelectPtrauthAuth(SDNode *N);
369	void SelectPtrauthResign(SDNode *N);
370
371	bool trySelectStackSlotTagP(SDNode *N);
372	void SelectTagP(SDNode *N);
373
374	void SelectLoad(SDNode N, unsigned* NumVecs, unsigned Opc,
375	unsigned SubRegIdx);
376	void SelectPostLoad(SDNode N, unsigned* NumVecs, unsigned Opc,
377	unsigned SubRegIdx);
378	void SelectLoadLane(SDNode N, unsigned* NumVecs, unsigned Opc);
379	void SelectPostLoadLane(SDNode N, unsigned* NumVecs, unsigned Opc);
380	void SelectPredicatedLoad(SDNode N, unsigned* NumVecs, unsigned Scale,
381	unsigned Opc_rr, unsigned Opc_ri,
382	bool IsIntr = false);
383	void SelectContiguousMultiVectorLoad(SDNode N, unsigned* NumVecs,
384	unsigned Scale, unsigned Opc_ri,
385	unsigned Opc_rr);
386	void SelectDestructiveMultiIntrinsic(SDNode N, unsigned* NumVecs,
387	bool IsZmMulti, unsigned Opcode,
388	bool HasPred = false);
389	void SelectPExtPair(SDNode N, unsigned* Opc);
390	void SelectWhilePair(SDNode N, unsigned* Opc);
391	void SelectCVTIntrinsic(SDNode N, unsigned* NumVecs, unsigned Opcode);
392	void SelectCVTIntrinsicFP8(SDNode N, unsigned* NumVecs, unsigned Opcode);
393	void SelectClamp(SDNode N, unsigned* NumVecs, unsigned Opcode);
394	void SelectUnaryMultiIntrinsic(SDNode N, unsigned* NumOutVecs,
395	bool IsTupleInput, unsigned Opc);
396	void SelectFrintFromVT(SDNode N, unsigned* NumVecs, unsigned Opcode);
397
398	template <unsigned MaxIdx, unsigned Scale>
399	void SelectMultiVectorMove(SDNode N, unsigned* NumVecs, unsigned BaseReg,
400	unsigned Op);
401	void SelectMultiVectorMoveZ(SDNode N, unsigned* NumVecs,
402	unsigned Op, unsigned MaxIdx, unsigned Scale,
403	unsigned BaseReg = `0`);
404	bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
405	/// SVE Reg+Imm addressing mode.
406	template <int64_t Min, int64_t Max>
407	bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
408	SDValue &OffImm);
409	/// SVE Reg+Reg address mode.
410	template <unsigned Scale>
411	bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
412	return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
413	}
414
415	void SelectMultiVectorLutiLane(SDNode Node, unsigned* NumOutVecs,
416	unsigned Opc, uint32_t MaxImm);
417
418	void SelectMultiVectorLuti(SDNode Node, unsigned* NumOutVecs, unsigned Opc);
419
420	template <unsigned MaxIdx, unsigned Scale>
421	bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
422	return SelectSMETileSlice(N, MaxSize: MaxIdx, Vector, Offset, Scale);
423	}
424
425	void SelectStore(SDNode N, unsigned* NumVecs, unsigned Opc);
426	void SelectPostStore(SDNode N, unsigned* NumVecs, unsigned Opc);
427	void SelectStoreLane(SDNode N, unsigned* NumVecs, unsigned Opc);
428	void SelectPostStoreLane(SDNode N, unsigned* NumVecs, unsigned Opc);
429	void SelectPredicatedStore(SDNode N, unsigned* NumVecs, unsigned Scale,
430	unsigned Opc_rr, unsigned Opc_ri);
431	std::tuple<unsigned, SDValue, SDValue>
432	findAddrModeSVELoadStore(SDNode N, unsigned* Opc_rr, unsigned Opc_ri,
433	const SDValue &OldBase, const SDValue &OldOffset,
434	unsigned Scale);
435
436	bool tryBitfieldExtractOp(SDNode *N);
437	bool tryBitfieldExtractOpFromSExt(SDNode *N);
438	bool tryBitfieldInsertOp(SDNode *N);
439	bool tryBitfieldInsertInZeroOp(SDNode *N);
440	bool tryShiftAmountMod(SDNode *N);
441
442	bool tryReadRegister(SDNode *N);
443	bool tryWriteRegister(SDNode *N);
444
445	bool trySelectCastFixedLengthToScalableVector(SDNode *N);
446	bool trySelectCastScalableToFixedLengthVector(SDNode *N);
447
448	bool trySelectXAR(SDNode *N);
449
450	// Include the pieces autogenerated from the target description.
451	#include "AArch64GenDAGISel.inc"
452
453	private:
454	bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
455	SDValue &Shift);
456	bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
457	bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
458	SDValue &OffImm) {
459	return SelectAddrModeIndexedBitWidth(N, IsSignedImm: true, BW: `7`, Size, Base, OffImm);
460	}
461	bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
462	unsigned Size, SDValue &Base,
463	SDValue &OffImm);
464	bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
465	SDValue &OffImm);
466	bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
467	SDValue &OffImm);
468	bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
469	SDValue &Offset, SDValue &SignExtend,
470	SDValue &DoShift);
471	bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
472	SDValue &Offset, SDValue &SignExtend,
473	SDValue &DoShift);
474	bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
475	bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
476	bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
477	SDValue &Offset, SDValue &SignExtend);
478
479	template<unsigned RegWidth>
480	bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
481	return SelectCVTFixedPosOperand(N, FixedPos, Width: RegWidth);
482	}
483	bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
484
485	template <unsigned RegWidth>
486	bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos) {
487	return SelectCVTFixedPointVec(N, FixedPos, Width: RegWidth);
488	}
489	bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos, unsigned Width);
490
491	template<unsigned RegWidth>
492	bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
493	return SelectCVTFixedPosRecipOperand(N, FixedPos, Width: RegWidth);
494	}
495
496	bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
497	unsigned Width);
498
499	bool SelectCMP_SWAP(SDNode *N);
500
501	bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
502	bool Negate);
503	bool SelectSVEAddSubImm(SDLoc DL, APInt Value, MVT VT, SDValue &Imm,
504	SDValue &Shift, bool Negate);
505	bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
506	bool Negate);
507	bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
508	bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
509
510	// Match `<NEON Splat> SVEImm` (where <NEON Splat> could be fmov, movi, etc).
511	bool SelectNEONSplatOfSVELogicalImm(SDValue N, SDValue &Imm);
512	bool SelectNEONSplatOfSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift);
513	bool SelectNEONSplatOfSVEArithSImm(SDValue N, SDValue &Imm);
514
515	bool SelectSVESignedArithImm(SDLoc DL, APInt Value, SDValue &Imm);
516	bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
517	bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
518	bool AllowSaturation, SDValue &Imm);
519
520	bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
521	bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
522	SDValue &Offset);
523	bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
524	SDValue &Offset, unsigned Scale = `1`);
525
526	bool SelectAllActivePredicate(SDValue N);
527	bool SelectAnyPredicate(SDValue N);
528
529	bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
530
531	template <bool MatchCBB>
532	bool SelectCmpBranchExtOperand(SDValue N, SDValue &Reg, SDValue &ExtType);
533	};
534
535	class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
536	public:
537	static char ID;
538	explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
539	CodeGenOptLevel OptLevel)
540	: SelectionDAGISelLegacy (
541	ID, std::make_unique<AArch64DAGToDAGISel>(args&: tm, args&: OptLevel)) {}
542	};
543	} // end anonymous namespace
544
545	char AArch64DAGToDAGISelLegacy::ID = `0`;
546
547	INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
548
549	/// addBitcastHints - This method adds bitcast hints to the operands of a node
550	/// to help instruction selector determine which operands are in Neon registers.
551	static SDValue addBitcastHints(SelectionDAG &DAG, SDNode &N) {
552	SDLoc DL(&N);
553	auto getFloatVT = [&](EVT VT) {
554	EVT ScalarVT = VT.getScalarType();
555	assert((ScalarVT == MVT::i32 \|\| ScalarVT == MVT::i64) && "Unexpected VT");
556	return VT.changeElementType(Context&: *(DAG.getContext()),
557	EltVT: ScalarVT == MVT::i32 ? MVT::f32 : MVT::f64);
558	};
559	SmallVector<SDValue, `2`> NewOps;
560	NewOps.reserve(N: N.getNumOperands());
561
562	for (unsigned I = `0`, E = N.getNumOperands(); I < E; ++I) {
563	auto bitcasted = DAG.getBitcast(VT: getFloatVT (N.getOperand(Num: I).getValueType()),
564	V: N.getOperand(Num: I));
565	NewOps.push_back(Elt: bitcasted);
566	}
567	EVT OrigVT = N.getValueType(ResNo: `0`);
568	SDValue OpNode = DAG.getNode(Opcode: N.getOpcode(), DL, VT: getFloatVT (OrigVT), Ops: NewOps);
569	return DAG.getBitcast(VT: OrigVT, V: OpNode);
570	}
571
572	/// isIntImmediate - This method tests to see if the node is a constant
573	/// operand. If so Imm will receive the 32-bit value.
574	static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
575	if (const ConstantSDNode C = dyn_cast<const* ConstantSDNode>(Val: N)) {
576	Imm = C->getZExtValue();
577	return true;
578	}
579	return false;
580	}
581
582	// isIntImmediate - This method tests to see if a constant operand.
583	// If so Imm will receive the value.
584	static bool isIntImmediate(SDValue N, uint64_t &Imm) {
585	return isIntImmediate(N: N.getNode(), Imm);
586	}
587
588	// isOpcWithIntImmediate - This method tests to see if the node is a specific
589	// opcode and that it has a immediate integer right operand.
590	// If so Imm will receive the 32 bit value.
591	static bool isOpcWithIntImmediate(const SDNode N, unsigned* Opc,
592	uint64_t &Imm) {
593	return N->getOpcode() == Opc &&
594	isIntImmediate(N: N->getOperand(Num: `1`).getNode(), Imm);
595	}
596
597	// isIntImmediateEq - This method tests to see if N is a constant operand that
598	// is equivalent to 'ImmExpected'.
599	#ifndef NDEBUG
600	static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
601	uint64_t Imm;
602	if (!isIntImmediate(N.getNode(), Imm))
603	return false;
604	return Imm == ImmExpected;
605	}
606	#endif
607
608	static APInt DecodeFMOVImm(uint64_t Imm, unsigned RegWidth) {
609	assert(RegWidth == `32` \|\| RegWidth == `64`);
610	if (RegWidth == `32`)
611	return APInt (RegWidth,
612	uint32_t(AArch64_AM::decodeAdvSIMDModImmType11(Imm)));
613	return APInt (RegWidth, AArch64_AM::decodeAdvSIMDModImmType12(Imm));
614	}
615
616	// Decodes the integer splat value from a NEON splat operation.
617	static std::optional<APInt> DecodeNEONSplat(SDValue N) {
618	assert(N.getValueType().isInteger() && "Only integers are supported");
619	unsigned SplatWidth = N.getScalarValueSizeInBits();
620	if (N ->getOpcode() == AArch64ISD::NVCAST) {
621	SDValue Op = N ->getOperand(Num: `0`);
622	if (Op.getOpcode() != AArch64ISD::FMOV \|\|
623	Op.getScalarValueSizeInBits() != N.getScalarValueSizeInBits())
624	return std::nullopt;
625	return DecodeFMOVImm(Imm: Op.getConstantOperandVal(i: `0`), RegWidth: SplatWidth);
626	}
627	if (N ->getOpcode() == AArch64ISD::MOVI)
628	return APInt (SplatWidth, N.getConstantOperandVal(i: `0`));
629	if (N ->getOpcode() == AArch64ISD::MOVIshift)
630	return APInt (SplatWidth, N.getConstantOperandVal(i: `0`)
631	<< N.getConstantOperandVal(i: `1`));
632	if (N ->getOpcode() == AArch64ISD::MVNIshift)
633	return ~APInt (SplatWidth, N.getConstantOperandVal(i: `0`)
634	<< N.getConstantOperandVal(i: `1`));
635	if (N ->getOpcode() == AArch64ISD::DUP)
636	if (auto *Const = dyn_cast<ConstantSDNode>(Val: N ->getOperand(Num: `0`)))
637	return Const->getAPIntValue().trunc(width: SplatWidth);
638	// TODO: Recognize more splat-like NEON operations. See ConstantBuildVector
639	// in AArch64ISelLowering. AArch64ISD::MOVIedit support will allow more folds.
640	return std::nullopt;
641	}
642
643	bool AArch64DAGToDAGISel::SelectNEONSplatOfSVELogicalImm(SDValue N,
644	SDValue &Imm) {
645	std::optional<APInt> ImmVal = DecodeNEONSplat(N);
646	if (!ImmVal)
647	return false;
648	uint64_t Encoding;
649	if (!AArch64_AM::isSVELogicalImm(SizeInBits: N.getScalarValueSizeInBits(),
650	ImmVal: ImmVal ->getZExtValue(), Encoding))
651	return false;
652
653	Imm = CurDAG->getTargetConstant(Val: Encoding, DL: SDLoc (N), VT: MVT::i64);
654	return true;
655	}
656
657	bool AArch64DAGToDAGISel::SelectNEONSplatOfSVEAddSubImm(SDValue N, SDValue &Imm,
658	SDValue &Shift) {
659	if (std::optional<APInt> ImmVal = DecodeNEONSplat(N))
660	return SelectSVEAddSubImm(DL: SDLoc (N), Value: *ImmVal,
661	VT: N.getValueType().getScalarType().getSimpleVT(),
662	Imm, Shift,
663	/Negate=/false);
664	return false;
665	}
666
667	bool AArch64DAGToDAGISel::SelectNEONSplatOfSVEArithSImm(SDValue N,
668	SDValue &Imm) {
669	if (std::optional<APInt> ImmVal = DecodeNEONSplat(N))
670	return SelectSVESignedArithImm(DL: SDLoc (N), Value: *ImmVal, Imm);
671	return false;
672	}
673
674	bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
675	const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
676	std::vector<SDValue> &OutOps) {
677	switch(ConstraintID) {
678	default:
679	llvm_unreachable("Unexpected asm memory constraint");
680	case InlineAsm::ConstraintCode::m:
681	case InlineAsm::ConstraintCode::o:
682	case InlineAsm::ConstraintCode::Q:
683	// We need to make sure that this one operand does not end up in XZR, thus
684	// require the address to be in a PointerRegClass register.
685	const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
686	const TargetRegisterClass *TRC = TRI->getPointerRegClass();
687	SDLoc dl(Op);
688	SDValue RC = CurDAG->getTargetConstant(Val: TRC->getID(), DL: dl, VT: MVT::i64);
689	SDValue NewOp =
690	SDValue (CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS,
691	dl, VT: Op.getValueType(),
692	Op1: Op, Op2: RC), `0`);
693	OutOps.push_back(x: NewOp);
694	return false;
695	}
696	return true;
697	}
698
699	/// SelectArithImmed - Select an immediate value that can be represented as
700	/// a 12-bit value shifted left by either 0 or 12. If so, return true with
701	/// Val set to the 12-bit value and Shift set to the shifter operand.
702	bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
703	SDValue &Shift) {
704	// This function is called from the addsub_shifted_imm ComplexPattern,
705	// which lists [imm] as the list of opcode it's interested in, however
706	// we still need to check whether the operand is actually an immediate
707	// here because the ComplexPattern opcode list is only used in
708	// root-level opcode matching.
709	if (!isa<ConstantSDNode>(Val: N.getNode()))
710	return false;
711
712	uint64_t Immed = N.getNode()->getAsZExtVal();
713	unsigned ShiftAmt;
714
715	if (Immed >> `12` == `0`) {
716	ShiftAmt = `0`;
717	} else if ((Immed & `0xfff`) == `0` && Immed >> `24` == `0`) {
718	ShiftAmt = `12`;
719	Immed = Immed >> `12`;
720	} else
721	return false;
722
723	unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftAmt);
724	SDLoc dl(N);
725	Val = CurDAG->getTargetConstant(Val: Immed, DL: dl, VT: MVT::i32);
726	Shift = CurDAG->getTargetConstant(Val: ShVal, DL: dl, VT: MVT::i32);
727	return true;
728	}
729
730	/// SelectNegArithImmed - As above, but negates the value before trying to
731	/// select it.
732	bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
733	SDValue &Shift) {
734	// This function is called from the addsub_shifted_imm ComplexPattern,
735	// which lists [imm] as the list of opcode it's interested in, however
736	// we still need to check whether the operand is actually an immediate
737	// here because the ComplexPattern opcode list is only used in
738	// root-level opcode matching.
739	if (!isa<ConstantSDNode>(Val: N.getNode()))
740	return false;
741
742	// The immediate operand must be a 24-bit zero-extended immediate.
743	uint64_t Immed = N.getNode()->getAsZExtVal();
744
745	// This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
746	// have the opposite effect on the C flag, so this pattern mustn't match under
747	// those circumstances.
748	if (Immed == `0`)
749	return false;
750
751	if (N.getValueType() == MVT::i32)
752	Immed = ~((uint32_t)Immed) + `1`;
753	else
754	Immed = ~Immed + `1ULL`;
755	if (Immed & `0xFFFFFFFFFF000000ULL`)
756	return false;
757
758	Immed &= `0xFFFFFFULL`;
759	return SelectArithImmed(N: CurDAG->getConstant(Val: Immed, DL: SDLoc (N), VT: MVT::i32), Val,
760	Shift);
761	}
762
763	/// getShiftTypeForNode - Translate a shift node to the corresponding
764	/// ShiftType value.
765	static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
766	switch (N.getOpcode()) {
767	default:
768	return AArch64_AM::InvalidShiftExtend;
769	case ISD::SHL:
770	return AArch64_AM::LSL;
771	case ISD::SRL:
772	return AArch64_AM::LSR;
773	case ISD::SRA:
774	return AArch64_AM::ASR;
775	case ISD::ROTR:
776	return AArch64_AM::ROR;
777	}
778	}
779
780	static bool isMemOpOrPrefetch(SDNode *N) {
781	return isa<MemSDNode>(Val: *N) \|\| N->getOpcode() == AArch64ISD::PREFETCH;
782	}
783
784	/// Determine whether it is worth it to fold SHL into the addressing
785	/// mode.
786	static bool isWorthFoldingSHL(SDValue V) {
787	assert(V.getOpcode() == ISD::SHL && "invalid opcode");
788	// It is worth folding logical shift of up to three places.
789	auto *CSD = dyn_cast<ConstantSDNode>(Val: V.getOperand(i: `1`));
790	if (!CSD)
791	return false;
792	unsigned ShiftVal = CSD->getZExtValue();
793	if (ShiftVal > `3`)
794	return false;
795
796	// Check if this particular node is reused in any non-memory related
797	// operation. If yes, do not try to fold this node into the address
798	// computation, since the computation will be kept.
799	const SDNode *Node = V.getNode();
800	for (SDNode *UI : Node->users())
801	if (!isMemOpOrPrefetch(N: UI))
802	for (SDNode *UII : UI->users())
803	if (!isMemOpOrPrefetch(N: UII))
804	return false;
805	return true;
806	}
807
808	/// Determine whether it is worth to fold V into an extended register addressing
809	/// mode.
810	bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
811	// Trivial if we are optimizing for code size or if there is only
812	// one use of the value.
813	if (CurDAG->shouldOptForSize() \|\| V.hasOneUse())
814	return true;
815
816	// If a subtarget has a slow shift, folding a shift into multiple loads
817	// costs additional micro-ops.
818	if (Subtarget->hasAddrLSLSlow14() && (Size == `2` \|\| Size == `16`))
819	return false;
820
821	// Check whether we're going to emit the address arithmetic anyway because
822	// it's used by a non-address operation.
823	if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
824	return true;
825	if (V.getOpcode() == ISD::ADD) {
826	const SDValue LHS = V.getOperand(i: `0`);
827	const SDValue RHS = V.getOperand(i: `1`);
828	if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(V: LHS))
829	return true;
830	if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(V: RHS))
831	return true;
832	}
833
834	// It hurts otherwise, since the value will be reused.
835	return false;
836	}
837
838	/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
839	/// to select more shifted register
840	bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
841	SDValue &Shift) {
842	EVT VT = N.getValueType();
843	if (VT != MVT::i32 && VT != MVT::i64)
844	return false;
845
846	if (N ->getOpcode() != ISD::AND \|\| !N ->hasOneUse())
847	return false;
848	SDValue LHS = N.getOperand(i: `0`);
849	if (!LHS ->hasOneUse())
850	return false;
851
852	unsigned LHSOpcode = LHS ->getOpcode();
853	if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
854	return false;
855
856	ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(Val: LHS.getOperand(i: `1`));
857	if (!ShiftAmtNode)
858	return false;
859
860	uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
861	ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
862	if (!RHSC)
863	return false;
864
865	APInt AndMask = RHSC->getAPIntValue();
866	unsigned LowZBits, MaskLen;
867	if (!AndMask.isShiftedMask(MaskIdx&: LowZBits, MaskLen))
868	return false;
869
870	unsigned BitWidth = N.getValueSizeInBits();
871	SDLoc DL(LHS);
872	uint64_t NewShiftC;
873	unsigned NewShiftOp;
874	if (LHSOpcode == ISD::SHL) {
875	// LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
876	// BitWidth != LowZBits + MaskLen doesn't match the pattern
877	if (LowZBits <= ShiftAmtC \|\| (BitWidth != LowZBits + MaskLen))
878	return false;
879
880	NewShiftC = LowZBits - ShiftAmtC;
881	NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
882	} else {
883	if (LowZBits == `0`)
884	return false;
885
886	// NewShiftC >= BitWidth will fall into isBitfieldExtractOp
887	NewShiftC = LowZBits + ShiftAmtC;
888	if (NewShiftC >= BitWidth)
889	return false;
890
891	// SRA need all high bits
892	if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
893	return false;
894
895	// SRL high bits can be 0 or 1
896	if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
897	return false;
898
899	if (LHSOpcode == ISD::SRL)
900	NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
901	else
902	NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
903	}
904
905	assert(NewShiftC < BitWidth && "Invalid shift amount");
906	SDValue NewShiftAmt = CurDAG->getTargetConstant(Val: NewShiftC, DL, VT);
907	SDValue BitWidthMinus1 = CurDAG->getTargetConstant(Val: BitWidth - `1`, DL, VT);
908	Reg = SDValue (CurDAG->getMachineNode(Opcode: NewShiftOp, dl: DL, VT, Op1: LHS ->getOperand(Num: `0`),
909	Op2: NewShiftAmt, Op3: BitWidthMinus1),
910	`0`);
911	unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: LowZBits);
912	Shift = CurDAG->getTargetConstant(Val: ShVal, DL, VT: MVT::i32);
913	return true;
914	}
915
916	/// getExtendTypeForNode - Translate an extend node to the corresponding
917	/// ExtendType value.
918	static AArch64_AM::ShiftExtendType
919	getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
920	if (N.getOpcode() == ISD::SIGN_EXTEND \|\|
921	N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
922	EVT SrcVT;
923	if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
924	SrcVT = cast<VTSDNode>(Val: N.getOperand(i: `1`))->getVT();
925	else
926	SrcVT = N.getOperand(i: `0`).getValueType();
927
928	if (!IsLoadStore && SrcVT == MVT::i8)
929	return AArch64_AM::SXTB;
930	else if (!IsLoadStore && SrcVT == MVT::i16)
931	return AArch64_AM::SXTH;
932	else if (SrcVT == MVT::i32)
933	return AArch64_AM::SXTW;
934	assert(SrcVT != MVT::i64 && "extend from 64-bits?");
935
936	return AArch64_AM::InvalidShiftExtend;
937	} else if (N.getOpcode() == ISD::ZERO_EXTEND \|\|
938	N.getOpcode() == ISD::ANY_EXTEND) {
939	EVT SrcVT = N.getOperand(i: `0`).getValueType();
940	if (!IsLoadStore && SrcVT == MVT::i8)
941	return AArch64_AM::UXTB;
942	else if (!IsLoadStore && SrcVT == MVT::i16)
943	return AArch64_AM::UXTH;
944	else if (SrcVT == MVT::i32)
945	return AArch64_AM::UXTW;
946	assert(SrcVT != MVT::i64 && "extend from 64-bits?");
947
948	return AArch64_AM::InvalidShiftExtend;
949	} else if (N.getOpcode() == ISD::AND) {
950	ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
951	if (!CSD)
952	return AArch64_AM::InvalidShiftExtend;
953	uint64_t AndMask = CSD->getZExtValue();
954
955	switch (AndMask) {
956	default:
957	return AArch64_AM::InvalidShiftExtend;
958	case `0xFF`:
959	return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
960	case `0xFFFF`:
961	return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
962	case `0xFFFFFFFF`:
963	return AArch64_AM::UXTW;
964	}
965	}
966
967	return AArch64_AM::InvalidShiftExtend;
968	}
969
970	/// Determine whether it is worth to fold V into an extended register of an
971	/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
972	/// instruction, and the shift should be treated as worth folding even if has
973	/// multiple uses.
974	bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
975	// Trivial if we are optimizing for code size or if there is only
976	// one use of the value.
977	if (CurDAG->shouldOptForSize() \|\| V.hasOneUse())
978	return true;
979
980	// If a subtarget has a fastpath LSL we can fold a logical shift into
981	// the add/sub and save a cycle.
982	if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
983	V.getConstantOperandVal(i: `1`) <= `4` &&
984	getExtendTypeForNode(N: V.getOperand(i: `0`)) == AArch64_AM::InvalidShiftExtend)
985	return true;
986
987	// It hurts otherwise, since the value will be reused.
988	return false;
989	}
990
991	/// SelectShiftedRegister - Select a "shifted register" operand. If the value
992	/// is not shifted, set the Shift operand to default of "LSL 0". The logical
993	/// instructions allow the shifted register to be rotated, but the arithmetic
994	/// instructions do not. The AllowROR parameter specifies whether ROR is
995	/// supported.
996	bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
997	SDValue &Reg, SDValue &Shift) {
998	if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
999	return true;
1000
1001	AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
1002	if (ShType == AArch64_AM::InvalidShiftExtend)
1003	return false;
1004	if (!AllowROR && ShType == AArch64_AM::ROR)
1005	return false;
1006
1007	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1008	unsigned BitSize = N.getValueSizeInBits();
1009	unsigned Val = RHS->getZExtValue() & (BitSize - `1`);
1010	unsigned ShVal = AArch64_AM::getShifterImm(ST: ShType, Imm: Val);
1011
1012	Reg = N.getOperand(i: `0`);
1013	Shift = CurDAG->getTargetConstant(Val: ShVal, DL: SDLoc (N), VT: MVT::i32);
1014	return isWorthFoldingALU(V: N, LSL: true);
1015	}
1016
1017	return false;
1018	}
1019
1020	/// Instructions that accept extend modifiers like UXTW expect the register
1021	/// being extended to be a GPR32, but the incoming DAG might be acting on a
1022	/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
1023	/// this is the case.
1024	static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
1025	if (N.getValueType() == MVT::i32)
1026	return N;
1027
1028	SDLoc dl(N);
1029	return CurDAG->getTargetExtractSubreg(SRIdx: AArch64::sub_32, DL: dl, VT: MVT::i32, Operand: N);
1030	}
1031
1032	// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALEN.*
1033	template<signed Low, signed High, signed Scale>
1034	bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
1035	if (!isa<ConstantSDNode>(Val: N))
1036	return false;
1037
1038	int64_t MulImm = cast<ConstantSDNode>(Val&: N)->getSExtValue();
1039	if ((MulImm % std::abs(x: Scale)) == `0`) {
1040	int64_t RDVLImm = MulImm / Scale;
1041	if ((RDVLImm >= Low) && (RDVLImm <= High)) {
1042	Imm = CurDAG->getSignedTargetConstant(Val: RDVLImm, DL: SDLoc (N), VT: MVT::i32);
1043	return true;
1044	}
1045	}
1046
1047	return false;
1048	}
1049
1050	// Returns a suitable RDSVL multiplier from a left shift.
1051	template <signed Low, signed High>
1052	bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
1053	if (!isa<ConstantSDNode>(Val: N))
1054	return false;
1055
1056	int64_t MulImm = `1LL` << cast<ConstantSDNode>(Val&: N)->getSExtValue();
1057	if (MulImm >= Low && MulImm <= High) {
1058	Imm = CurDAG->getSignedTargetConstant(Val: MulImm, DL: SDLoc (N), VT: MVT::i32);
1059	return true;
1060	}
1061
1062	return false;
1063	}
1064
1065	/// SelectArithExtendedRegister - Select a "extended register" operand. This
1066	/// operand folds in an extend followed by an optional left shift.
1067	bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
1068	SDValue &Shift) {
1069	unsigned ShiftVal = `0`;
1070	AArch64_AM::ShiftExtendType Ext;
1071
1072	if (N.getOpcode() == ISD::SHL) {
1073	ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
1074	if (!CSD)
1075	return false;
1076	ShiftVal = CSD->getZExtValue();
1077	if (ShiftVal > `4`)
1078	return false;
1079
1080	Ext = getExtendTypeForNode(N: N.getOperand(i: `0`));
1081	if (Ext == AArch64_AM::InvalidShiftExtend)
1082	return false;
1083
1084	Reg = N.getOperand(i: `0`).getOperand(i: `0`);
1085	} else {
1086	Ext = getExtendTypeForNode(N);
1087	if (Ext == AArch64_AM::InvalidShiftExtend)
1088	return false;
1089
1090	// Don't match sext of vector extracts. These can use SMOV, but if we match
1091	// this as an extended register, we'll always fold the extend into an ALU op
1092	// user of the extend (which results in a UMOV).
1093	if (AArch64_AM::isSignExtendShiftType(Type: Ext)) {
1094	SDValue Op = N.getOperand(i: `0`);
1095	if (Op ->getOpcode() == ISD::ANY_EXTEND)
1096	Op = Op ->getOperand(Num: `0`);
1097	if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
1098	Op.getOperand(i: `0`).getValueType().isFixedLengthVector())
1099	return false;
1100	}
1101
1102	Reg = N.getOperand(i: `0`);
1103
1104	// Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
1105	// isDef32 as a heuristic for when the operand is likely to be a 32bit def.
1106	auto isDef32 = [](SDValue N) {
1107	unsigned Opc = N.getOpcode();
1108	return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
1109	Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
1110	Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
1111	Opc != ISD::FREEZE;
1112	};
1113	if (Ext == AArch64_AM::UXTW && Reg ->getValueType(ResNo: `0`).getSizeInBits() == `32` &&
1114	isDef32 (Reg))
1115	return false;
1116	}
1117
1118	// AArch64 mandates that the RHS of the operation must use the smallest
1119	// register class that could contain the size being extended from. Thus,
1120	// if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1121	// there might not be an actual 32-bit value in the program. We can
1122	// (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1123	assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1124	Reg = narrowIfNeeded(CurDAG, N: Reg);
1125	Shift = CurDAG->getTargetConstant(Val: getArithExtendImm(ET: Ext, Imm: ShiftVal), DL: SDLoc (N),
1126	VT: MVT::i32);
1127	return isWorthFoldingALU(V: N);
1128	}
1129
1130	/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1131	/// operand is referred by the instructions have SP operand
1132	bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1133	SDValue &Shift) {
1134	unsigned ShiftVal = `0`;
1135	AArch64_AM::ShiftExtendType Ext;
1136
1137	if (N.getOpcode() != ISD::SHL)
1138	return false;
1139
1140	ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
1141	if (!CSD)
1142	return false;
1143	ShiftVal = CSD->getZExtValue();
1144	if (ShiftVal > `4`)
1145	return false;
1146
1147	Ext = AArch64_AM::UXTX;
1148	Reg = N.getOperand(i: `0`);
1149	Shift = CurDAG->getTargetConstant(Val: getArithExtendImm(ET: Ext, Imm: ShiftVal), DL: SDLoc (N),
1150	VT: MVT::i32);
1151	return isWorthFoldingALU(V: N);
1152	}
1153
1154	/// If there's a use of this ADDlow that's not itself a load/store then we'll
1155	/// need to create a real ADD instruction from it anyway and there's no point in
1156	/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1157	/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1158	/// leads to duplicated ADRP instructions.
1159	static bool isWorthFoldingADDlow(SDValue N) {
1160	for (auto *User : N ->users()) {
1161	if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1162	User->getOpcode() != ISD::ATOMIC_LOAD &&
1163	User->getOpcode() != ISD::ATOMIC_STORE)
1164	return false;
1165
1166	// ldar and stlr have much more restrictive addressing modes (just a
1167	// register).
1168	if (isStrongerThanMonotonic(AO: cast<MemSDNode>(Val: User)->getSuccessOrdering()))
1169	return false;
1170	}
1171
1172	return true;
1173	}
1174
1175	/// Check if the immediate offset is valid as a scaled immediate.
1176	static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1177	unsigned Size) {
1178	if ((Offset & (Size - `1`)) == `0` && Offset >= `0` &&
1179	Offset < (Range << Log2_32(Value: Size)))
1180	return true;
1181	return false;
1182	}
1183
1184	/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1185	/// immediate" address. The "Size" argument is the size in bytes of the memory
1186	/// reference, which determines the scale.
1187	bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1188	unsigned BW, unsigned Size,
1189	SDValue &Base,
1190	SDValue &OffImm) {
1191	SDLoc dl(N);
1192	const DataLayout &DL = CurDAG->getDataLayout();
1193	const TargetLowering *TLI = getTargetLowering();
1194	if (N.getOpcode() == ISD::FrameIndex) {
1195	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1196	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1197	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i64);
1198	return true;
1199	}
1200
1201	// As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1202	// selected here doesn't support labels/immediates, only base+offset.
1203	if (CurDAG->isBaseWithConstantOffset(Op: N)) {
1204	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1205	if (IsSignedImm) {
1206	int64_t RHSC = RHS->getSExtValue();
1207	unsigned Scale = Log2_32(Value: Size);
1208	int64_t Range = `0x1LL` << (BW - `1`);
1209
1210	if ((RHSC & (Size - `1`)) == `0` && RHSC >= -(Range << Scale) &&
1211	RHSC < (Range << Scale)) {
1212	Base = N.getOperand(i: `0`);
1213	if (Base.getOpcode() == ISD::FrameIndex) {
1214	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1215	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1216	}
1217	OffImm = CurDAG->getTargetConstant(Val: RHSC >> Scale, DL: dl, VT: MVT::i64);
1218	return true;
1219	}
1220	} else {
1221	// unsigned Immediate
1222	uint64_t RHSC = RHS->getZExtValue();
1223	unsigned Scale = Log2_32(Value: Size);
1224	uint64_t Range = `0x1ULL` << BW;
1225
1226	if ((RHSC & (Size - `1`)) == `0` && RHSC < (Range << Scale)) {
1227	Base = N.getOperand(i: `0`);
1228	if (Base.getOpcode() == ISD::FrameIndex) {
1229	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1230	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1231	}
1232	OffImm = CurDAG->getTargetConstant(Val: RHSC >> Scale, DL: dl, VT: MVT::i64);
1233	return true;
1234	}
1235	}
1236	}
1237	}
1238	// Base only. The address will be materialized into a register before
1239	// the memory is accessed.
1240	// add x0, Xbase, #offset
1241	// stp x1, x2, [x0]
1242	Base = N;
1243	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i64);
1244	return true;
1245	}
1246
1247	/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1248	/// immediate" address. The "Size" argument is the size in bytes of the memory
1249	/// reference, which determines the scale.
1250	bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1251	SDValue &Base, SDValue &OffImm) {
1252	SDLoc dl(N);
1253	const DataLayout &DL = CurDAG->getDataLayout();
1254	const TargetLowering *TLI = getTargetLowering();
1255	if (N.getOpcode() == ISD::FrameIndex) {
1256	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
1257	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1258	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i64);
1259	return true;
1260	}
1261
1262	if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1263	GlobalAddressSDNode *GAN =
1264	dyn_cast<GlobalAddressSDNode>(Val: N.getOperand(i: `1`).getNode());
1265	Base = N.getOperand(i: `0`);
1266	OffImm = N.getOperand(i: `1`);
1267	if (!GAN)
1268	return true;
1269
1270	if (GAN->getOffset() % Size == `0` &&
1271	GAN->getGlobal()->getPointerAlignment(DL) >= Size)
1272	return true;
1273	}
1274
1275	if (CurDAG->isBaseWithConstantOffset(Op: N)) {
1276	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1277	int64_t RHSC = (int64_t)RHS->getZExtValue();
1278	unsigned Scale = Log2_32(Value: Size);
1279	if (isValidAsScaledImmediate(Offset: RHSC, Range: `0x1000`, Size)) {
1280	Base = N.getOperand(i: `0`);
1281	if (Base.getOpcode() == ISD::FrameIndex) {
1282	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1283	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
1284	}
1285	OffImm = CurDAG->getTargetConstant(Val: RHSC >> Scale, DL: dl, VT: MVT::i64);
1286	return true;
1287	}
1288	}
1289	}
1290
1291	// Before falling back to our general case, check if the unscaled
1292	// instructions can handle this. If so, that's preferable.
1293	if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1294	return false;
1295
1296	// Base only. The address will be materialized into a register before
1297	// the memory is accessed.
1298	// add x0, Xbase, #offset
1299	// ldr x0, [x0]
1300	Base = N;
1301	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i64);
1302	return true;
1303	}
1304
1305	/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1306	/// immediate" address. This should only match when there is an offset that
1307	/// is not valid for a scaled immediate addressing mode. The "Size" argument
1308	/// is the size in bytes of the memory reference, which is needed here to know
1309	/// what is valid for a scaled immediate.
1310	bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1311	SDValue &Base,
1312	SDValue &OffImm) {
1313	if (!CurDAG->isBaseWithConstantOffset(Op: N))
1314	return false;
1315	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`))) {
1316	int64_t RHSC = RHS->getSExtValue();
1317	if (RHSC >= -`256` && RHSC < `256`) {
1318	Base = N.getOperand(i: `0`);
1319	if (Base.getOpcode() == ISD::FrameIndex) {
1320	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
1321	const TargetLowering *TLI = getTargetLowering();
1322	Base = CurDAG->getTargetFrameIndex(
1323	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
1324	}
1325	OffImm = CurDAG->getTargetConstant(Val: RHSC, DL: SDLoc (N), VT: MVT::i64);
1326	return true;
1327	}
1328	}
1329	return false;
1330	}
1331
1332	static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
1333	SDLoc dl(N);
1334	SDValue ImpDef = SDValue (
1335	CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl, VT: MVT::i64), `0`);
1336	return CurDAG->getTargetInsertSubreg(SRIdx: AArch64::sub_32, DL: dl, VT: MVT::i64, Operand: ImpDef,
1337	Subreg: N);
1338	}
1339
1340	/// Check if the given SHL node (\p N), can be used to form an
1341	/// extended register for an addressing mode.
1342	bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1343	bool WantExtend, SDValue &Offset,
1344	SDValue &SignExtend) {
1345	assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1346	ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: `1`));
1347	if (!CSD \|\| (CSD->getZExtValue() & `0x7`) != CSD->getZExtValue())
1348	return false;
1349
1350	SDLoc dl(N);
1351	if (WantExtend) {
1352	AArch64_AM::ShiftExtendType Ext =
1353	getExtendTypeForNode(N: N.getOperand(i: `0`), IsLoadStore: true);
1354	if (Ext == AArch64_AM::InvalidShiftExtend)
1355	return false;
1356
1357	Offset = narrowIfNeeded(CurDAG, N: N.getOperand(i: `0`).getOperand(i: `0`));
1358	SignExtend = CurDAG->getTargetConstant(Val: Ext == AArch64_AM::SXTW, DL: dl,
1359	VT: MVT::i32);
1360	} else {
1361	Offset = N.getOperand(i: `0`);
1362	SignExtend = CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i32);
1363	}
1364
1365	unsigned LegalShiftVal = Log2_32(Value: Size);
1366	unsigned ShiftVal = CSD->getZExtValue();
1367
1368	if (ShiftVal != `0` && ShiftVal != LegalShiftVal)
1369	return false;
1370
1371	return isWorthFoldingAddr(V: N, Size);
1372	}
1373
1374	bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1375	SDValue &Base, SDValue &Offset,
1376	SDValue &SignExtend,
1377	SDValue &DoShift) {
1378	if (N.getOpcode() != ISD::ADD)
1379	return false;
1380	SDValue LHS = N.getOperand(i: `0`);
1381	SDValue RHS = N.getOperand(i: `1`);
1382	SDLoc dl(N);
1383
1384	// We don't want to match immediate adds here, because they are better lowered
1385	// to the register-immediate addressing modes.
1386	if (isa<ConstantSDNode>(Val: LHS) \|\| isa<ConstantSDNode>(Val: RHS))
1387	return false;
1388
1389	// Check if this particular node is reused in any non-memory related
1390	// operation. If yes, do not try to fold this node into the address
1391	// computation, since the computation will be kept.
1392	const SDNode *Node = N.getNode();
1393	for (SDNode *UI : Node->users()) {
1394	if (!isMemOpOrPrefetch(N: UI))
1395	return false;
1396	}
1397
1398	// Remember if it is worth folding N when it produces extended register.
1399	bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(V: N, Size);
1400
1401	// Try to match a shifted extend on the RHS.
1402	if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1403	SelectExtendedSHL(N: RHS, Size, WantExtend: true, Offset, SignExtend)) {
1404	Base = LHS;
1405	DoShift = CurDAG->getTargetConstant(Val: true, DL: dl, VT: MVT::i32);
1406	return true;
1407	}
1408
1409	// Try to match a shifted extend on the LHS.
1410	if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1411	SelectExtendedSHL(N: LHS, Size, WantExtend: true, Offset, SignExtend)) {
1412	Base = RHS;
1413	DoShift = CurDAG->getTargetConstant(Val: true, DL: dl, VT: MVT::i32);
1414	return true;
1415	}
1416
1417	// There was no shift, whatever else we find.
1418	DoShift = CurDAG->getTargetConstant(Val: false, DL: dl, VT: MVT::i32);
1419
1420	AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
1421	// Try to match an unshifted extend on the LHS.
1422	if (IsExtendedRegisterWorthFolding &&
1423	(Ext = getExtendTypeForNode(N: LHS, IsLoadStore: true)) !=
1424	AArch64_AM::InvalidShiftExtend) {
1425	Base = RHS;
1426	Offset = narrowIfNeeded(CurDAG, N: LHS.getOperand(i: `0`));
1427	SignExtend = CurDAG->getTargetConstant(Val: Ext == AArch64_AM::SXTW, DL: dl,
1428	VT: MVT::i32);
1429	if (isWorthFoldingAddr(V: LHS, Size))
1430	return true;
1431	}
1432
1433	// Try to match an unshifted extend on the RHS.
1434	if (IsExtendedRegisterWorthFolding &&
1435	(Ext = getExtendTypeForNode(N: RHS, IsLoadStore: true)) !=
1436	AArch64_AM::InvalidShiftExtend) {
1437	Base = LHS;
1438	Offset = narrowIfNeeded(CurDAG, N: RHS.getOperand(i: `0`));
1439	SignExtend = CurDAG->getTargetConstant(Val: Ext == AArch64_AM::SXTW, DL: dl,
1440	VT: MVT::i32);
1441	if (isWorthFoldingAddr(V: RHS, Size))
1442	return true;
1443	}
1444
1445	return false;
1446	}
1447
1448	// Check if the given immediate is preferred by ADD. If an immediate can be
1449	// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1450	// encoded by one MOVZ, return true.
1451	static bool isPreferredADD(int64_t ImmOff) {
1452	// Constant in [0x0, 0xfff] can be encoded in ADD.
1453	if ((ImmOff & `0xfffffffffffff000LL`) == `0x0LL`)
1454	return true;
1455	// Check if it can be encoded in an "ADD LSL #12".
1456	if ((ImmOff & `0xffffffffff000fffLL`) == `0x0LL`)
1457	// As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1458	return (ImmOff & `0xffffffffff00ffffLL`) != `0x0LL` &&
1459	(ImmOff & `0xffffffffffff0fffLL`) != `0x0LL`;
1460	return false;
1461	}
1462
1463	bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1464	SDValue &Base, SDValue &Offset,
1465	SDValue &SignExtend,
1466	SDValue &DoShift) {
1467	if (N.getOpcode() != ISD::ADD)
1468	return false;
1469	SDValue LHS = N.getOperand(i: `0`);
1470	SDValue RHS = N.getOperand(i: `1`);
1471	SDLoc DL(N);
1472
1473	// Check if this particular node is reused in any non-memory related
1474	// operation. If yes, do not try to fold this node into the address
1475	// computation, since the computation will be kept.
1476	const SDNode *Node = N.getNode();
1477	for (SDNode *UI : Node->users()) {
1478	if (!isMemOpOrPrefetch(N: UI))
1479	return false;
1480	}
1481
1482	// Watch out if RHS is a wide immediate, it can not be selected into
1483	// [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1484	// ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1485	// instructions like:
1486	// MOV X0, WideImmediate
1487	// ADD X1, BaseReg, X0
1488	// LDR X2, [X1, 0]
1489	// For such situation, using [BaseReg, XReg] addressing mode can save one
1490	// ADD/SUB:
1491	// MOV X0, WideImmediate
1492	// LDR X2, [BaseReg, X0]
1493	if (isa<ConstantSDNode>(Val: RHS)) {
1494	int64_t ImmOff = (int64_t)RHS ->getAsZExtVal();
1495	// Skip the immediate can be selected by load/store addressing mode.
1496	// Also skip the immediate can be encoded by a single ADD (SUB is also
1497	// checked by using -ImmOff).
1498	if (isValidAsScaledImmediate(Offset: ImmOff, Range: `0x1000`, Size) \|\|
1499	isPreferredADD(ImmOff) \|\| isPreferredADD(ImmOff: -ImmOff))
1500	return false;
1501
1502	SDValue Ops[] = { RHS };
1503	SDNode *MOVI =
1504	CurDAG->getMachineNode(Opcode: AArch64::MOVi64imm, dl: DL, VT: MVT::i64, Ops);
1505	SDValue MOVIV = SDValue (MOVI, `0`);
1506	// This ADD of two X register will be selected into [Reg+Reg] mode.
1507	N = CurDAG->getNode(Opcode: ISD::ADD, DL, VT: MVT::i64, N1: LHS, N2: MOVIV);
1508	}
1509
1510	// Remember if it is worth folding N when it produces extended register.
1511	bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(V: N, Size);
1512
1513	// Try to match a shifted extend on the RHS.
1514	if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1515	SelectExtendedSHL(N: RHS, Size, WantExtend: false, Offset, SignExtend)) {
1516	Base = LHS;
1517	DoShift = CurDAG->getTargetConstant(Val: true, DL, VT: MVT::i32);
1518	return true;
1519	}
1520
1521	// Try to match a shifted extend on the LHS.
1522	if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1523	SelectExtendedSHL(N: LHS, Size, WantExtend: false, Offset, SignExtend)) {
1524	Base = RHS;
1525	DoShift = CurDAG->getTargetConstant(Val: true, DL, VT: MVT::i32);
1526	return true;
1527	}
1528
1529	// Match any non-shifted, non-extend, non-immediate add expression.
1530	Base = LHS;
1531	Offset = RHS;
1532	SignExtend = CurDAG->getTargetConstant(Val: false, DL, VT: MVT::i32);
1533	DoShift = CurDAG->getTargetConstant(Val: false, DL, VT: MVT::i32);
1534	// Reg1 + Reg2 is free: no check needed.
1535	return true;
1536	}
1537
1538	SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1539	static const unsigned RegClassIDs[] = {
1540	AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1541	static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1542	AArch64::dsub2, AArch64::dsub3};
1543
1544	return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1545	}
1546
1547	SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1548	static const unsigned RegClassIDs[] = {
1549	AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1550	static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1551	AArch64::qsub2, AArch64::qsub3};
1552
1553	return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1554	}
1555
1556	SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1557	static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1558	AArch64::ZPR3RegClassID,
1559	AArch64::ZPR4RegClassID};
1560	static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1561	AArch64::zsub2, AArch64::zsub3};
1562
1563	return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1564	}
1565
1566	SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1567	assert(Regs.size() == `2` \|\| Regs.size() == `4`);
1568
1569	// The createTuple interface requires 3 RegClassIDs for each possible
1570	// tuple type even though we only have them for ZPR2 and ZPR4.
1571	static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, `0`,
1572	AArch64::ZPR4Mul4RegClassID};
1573	static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1574	AArch64::zsub2, AArch64::zsub3};
1575	return createTuple(Vecs: Regs, RegClassIDs, SubRegs);
1576	}
1577
1578	SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1579	const unsigned RegClassIDs[],
1580	const unsigned SubRegs[]) {
1581	// There's no special register-class for a vector-list of 1 element: it's just
1582	// a vector.
1583	if (Regs.size() == `1`)
1584	return Regs [`0`];
1585
1586	assert(Regs.size() >= `2` && Regs.size() <= `4`);
1587
1588	SDLoc DL(Regs [`0`]);
1589
1590	SmallVector<SDValue, `4`> Ops;
1591
1592	// First operand of REG_SEQUENCE is the desired RegClass.
1593	Ops.push_back(
1594	Elt: CurDAG->getTargetConstant(Val: RegClassIDs[Regs.size() - `2`], DL, VT: MVT::i32));
1595
1596	// Then we get pairs of source & subregister-position for the components.
1597	for (unsigned i = `0`; i < Regs.size(); ++i) {
1598	Ops.push_back(Elt: Regs [i]);
1599	Ops.push_back(Elt: CurDAG->getTargetConstant(Val: SubRegs[i], DL, VT: MVT::i32));
1600	}
1601
1602	SDNode *N =
1603	CurDAG->getMachineNode(Opcode: TargetOpcode::REG_SEQUENCE, dl: DL, VT: MVT::Untyped, Ops);
1604	return SDValue (N, `0`);
1605	}
1606
1607	void AArch64DAGToDAGISel::SelectTable(SDNode N, unsigned* NumVecs, unsigned Opc,
1608	bool isExt) {
1609	SDLoc dl(N);
1610	EVT VT = N->getValueType(ResNo: `0`);
1611
1612	unsigned ExtOff = isExt;
1613
1614	// Form a REG_SEQUENCE to force register allocation.
1615	unsigned Vec0Off = ExtOff + `1`;
1616	SmallVector<SDValue, `4`> Regs(N->ops().slice(N: Vec0Off, M: NumVecs));
1617	SDValue RegSeq = createQTuple(Regs);
1618
1619	SmallVector<SDValue, `6`> Ops;
1620	if (isExt)
1621	Ops.push_back(Elt: N->getOperand(Num: `1`));
1622	Ops.push_back(Elt: RegSeq);
1623	Ops.push_back(Elt: N->getOperand(Num: NumVecs + ExtOff + `1`));
1624	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT, Ops));
1625	}
1626
1627	static std::tuple<SDValue, SDValue>
1628	extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG) {
1629	SDLoc DL(Disc);
1630	SDValue AddrDisc;
1631	SDValue ConstDisc;
1632
1633	// If this is a blend, remember the constant and address discriminators.
1634	// Otherwise, it's either a constant discriminator, or a non-blended
1635	// address discriminator.
1636	if (Disc ->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1637	Disc ->getConstantOperandVal(Num: `0`) == Intrinsic::ptrauth_blend) {
1638	AddrDisc = Disc ->getOperand(Num: `1`);
1639	ConstDisc = Disc ->getOperand(Num: `2`);
1640	} else {
1641	ConstDisc = Disc;
1642	}
1643
1644	// If the constant discriminator (either the blend RHS, or the entire
1645	// discriminator value) isn't a 16-bit constant, bail out, and let the
1646	// discriminator be computed separately.
1647	auto *ConstDiscN = dyn_cast<ConstantSDNode>(Val&: ConstDisc);
1648	if (!ConstDiscN \|\| !isUInt<`16`>(x: ConstDiscN->getZExtValue()))
1649	return std::make_tuple(args: DAG->getTargetConstant(Val: `0`, DL, VT: MVT::i64), args&: Disc);
1650
1651	// If there's no address discriminator, use XZR directly.
1652	if (!AddrDisc)
1653	AddrDisc = DAG->getRegister(Reg: AArch64::XZR, VT: MVT::i64);
1654
1655	return std::make_tuple(
1656	args: DAG->getTargetConstant(Val: ConstDiscN->getZExtValue(), DL, VT: MVT::i64),
1657	args&: AddrDisc);
1658	}
1659
1660	void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1661	SDLoc DL(N);
1662	// IntrinsicID is operand #0
1663	SDValue Val = N->getOperand(Num: `1`);
1664	SDValue AUTKey = N->getOperand(Num: `2`);
1665	SDValue AUTDisc = N->getOperand(Num: `3`);
1666
1667	unsigned AUTKeyC = cast<ConstantSDNode>(Val&: AUTKey)->getZExtValue();
1668	AUTKey = CurDAG->getTargetConstant(Val: AUTKeyC, DL, VT: MVT::i64);
1669
1670	SDValue AUTAddrDisc, AUTConstDisc;
1671	std::tie(args&: AUTConstDisc, args&: AUTAddrDisc) =
1672	extractPtrauthBlendDiscriminators(Disc: AUTDisc, DAG: CurDAG);
1673
1674	if (!Subtarget->isX16X17Safer()) {
1675	std::vector<SDValue> Ops = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1676	// Copy deactivation symbol if present.
1677	if (N->getNumOperands() > `4`)
1678	Ops.push_back(x: N->getOperand(Num: `4`));
1679
1680	SDNode *AUT =
1681	CurDAG->getMachineNode(Opcode: AArch64::AUTxMxN, dl: DL, VT1: MVT::i64, VT2: MVT::i64, Ops);
1682	ReplaceNode(F: N, T: AUT);
1683	} else {
1684	SDValue X16Copy = CurDAG->getCopyToReg(Chain: CurDAG->getEntryNode(), dl: DL,
1685	Reg: AArch64::X16, N: Val, Glue: SDValue ());
1686	SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(R: `1`)};
1687
1688	SDNode *AUT = CurDAG->getMachineNode(Opcode: AArch64::AUTx16x17, dl: DL, VT: MVT::i64, Ops);
1689	ReplaceNode(F: N, T: AUT);
1690	}
1691	}
1692
1693	void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1694	SDLoc DL(N);
1695	// IntrinsicID is operand #0, if W_CHAIN it is #1
1696	int OffsetBase = N->getOpcode() == ISD::INTRINSIC_W_CHAIN ? `1` : `0`;
1697	SDValue Val = N->getOperand(Num: OffsetBase + `1`);
1698	SDValue AUTKey = N->getOperand(Num: OffsetBase + `2`);
1699	SDValue AUTDisc = N->getOperand(Num: OffsetBase + `3`);
1700	SDValue PACKey = N->getOperand(Num: OffsetBase + `4`);
1701	SDValue PACDisc = N->getOperand(Num: OffsetBase + `5`);
1702	uint32_t IntNum = N->getConstantOperandVal(Num: OffsetBase + `0`);
1703	bool HasLoad = IntNum == Intrinsic::ptrauth_resign_load_relative;
1704
1705	unsigned AUTKeyC = cast<ConstantSDNode>(Val&: AUTKey)->getZExtValue();
1706	unsigned PACKeyC = cast<ConstantSDNode>(Val&: PACKey)->getZExtValue();
1707
1708	AUTKey = CurDAG->getTargetConstant(Val: AUTKeyC, DL, VT: MVT::i64);
1709	PACKey = CurDAG->getTargetConstant(Val: PACKeyC, DL, VT: MVT::i64);
1710
1711	SDValue AUTAddrDisc, AUTConstDisc;
1712	std::tie(args&: AUTConstDisc, args&: AUTAddrDisc) =
1713	extractPtrauthBlendDiscriminators(Disc: AUTDisc, DAG: CurDAG);
1714
1715	SDValue PACAddrDisc, PACConstDisc;
1716	std::tie(args&: PACConstDisc, args&: PACAddrDisc) =
1717	extractPtrauthBlendDiscriminators(Disc: PACDisc, DAG: CurDAG);
1718
1719	SDValue X16Copy = CurDAG->getCopyToReg(Chain: CurDAG->getEntryNode(), dl: DL,
1720	Reg: AArch64::X16, N: Val, Glue: SDValue ());
1721
1722	if (HasLoad) {
1723	SDValue Addend = N->getOperand(Num: OffsetBase + `6`);
1724	SDValue IncomingChain = N->getOperand(Num: `0`);
1725	SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc,
1726	PACKey, PACConstDisc, PACAddrDisc,
1727	Addend, IncomingChain, X16Copy.getValue(R: `1`)};
1728
1729	SDNode *AUTRELLOADPAC = CurDAG->getMachineNode(Opcode: AArch64::AUTRELLOADPAC, dl: DL,
1730	VT1: MVT::i64, VT2: MVT::Other, Ops);
1731	ReplaceNode(F: N, T: AUTRELLOADPAC);
1732	} else {
1733	SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1734	PACConstDisc, PACAddrDisc, X16Copy.getValue(R: `1`)};
1735
1736	SDNode *AUTPAC = CurDAG->getMachineNode(Opcode: AArch64::AUTPAC, dl: DL, VT: MVT::i64, Ops);
1737	ReplaceNode(F: N, T: AUTPAC);
1738	}
1739	}
1740
1741	bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1742	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
1743	if (LD->isUnindexed())
1744	return false;
1745	EVT VT = LD->getMemoryVT();
1746	EVT DstVT = N->getValueType(ResNo: `0`);
1747	ISD::MemIndexedMode AM = LD->getAddressingMode();
1748	bool IsPre = AM == ISD::PRE_INC \|\| AM == ISD::PRE_DEC;
1749	ConstantSDNode *OffsetOp = cast<ConstantSDNode>(Val: LD->getOffset());
1750	int OffsetVal = (int)OffsetOp->getZExtValue();
1751
1752	// We're not doing validity checking here. That was done when checking
1753	// if we should mark the load as indexed or not. We're just selecting
1754	// the right instruction.
1755	unsigned Opcode = `0`;
1756
1757	ISD::LoadExtType ExtType = LD->getExtensionType();
1758	bool InsertTo64 = false;
1759	if (VT == MVT::i64)
1760	Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1761	else if (VT == MVT::i32) {
1762	if (ExtType == ISD::NON_EXTLOAD)
1763	Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1764	else if (ExtType == ISD::SEXTLOAD)
1765	Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1766	else {
1767	Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1768	InsertTo64 = true;
1769	// The result of the load is only i32. It's the subreg_to_reg that makes
1770	// it into an i64.
1771	DstVT = MVT::i32;
1772	}
1773	} else if (VT == MVT::i16) {
1774	if (ExtType == ISD::SEXTLOAD) {
1775	if (DstVT == MVT::i64)
1776	Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1777	else
1778	Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1779	} else {
1780	Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1781	InsertTo64 = DstVT == MVT::i64;
1782	// The result of the load is only i32. It's the subreg_to_reg that makes
1783	// it into an i64.
1784	DstVT = MVT::i32;
1785	}
1786	} else if (VT == MVT::i8) {
1787	if (ExtType == ISD::SEXTLOAD) {
1788	if (DstVT == MVT::i64)
1789	Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1790	else
1791	Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1792	} else {
1793	Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1794	InsertTo64 = DstVT == MVT::i64;
1795	// The result of the load is only i32. It's the subreg_to_reg that makes
1796	// it into an i64.
1797	DstVT = MVT::i32;
1798	}
1799	} else if (VT == MVT::f16) {
1800	Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1801	} else if (VT == MVT::bf16) {
1802	Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1803	} else if (VT == MVT::f32) {
1804	Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1805	} else if (VT == MVT::f64 \|\|
1806	(VT.is64BitVector() && Subtarget->isLittleEndian())) {
1807	Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1808	} else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1809	Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1810	} else if (VT.is64BitVector()) {
1811	if (IsPre \|\| OffsetVal != `8`)
1812	return false;
1813	switch (VT.getScalarSizeInBits()) {
1814	case `8`:
1815	Opcode = AArch64::LD1Onev8b_POST;
1816	break;
1817	case `16`:
1818	Opcode = AArch64::LD1Onev4h_POST;
1819	break;
1820	case `32`:
1821	Opcode = AArch64::LD1Onev2s_POST;
1822	break;
1823	case `64`:
1824	Opcode = AArch64::LD1Onev1d_POST;
1825	break;
1826	default:
1827	llvm_unreachable("Expected vector element to be a power of 2");
1828	}
1829	} else if (VT.is128BitVector()) {
1830	if (IsPre \|\| OffsetVal != `16`)
1831	return false;
1832	switch (VT.getScalarSizeInBits()) {
1833	case `8`:
1834	Opcode = AArch64::LD1Onev16b_POST;
1835	break;
1836	case `16`:
1837	Opcode = AArch64::LD1Onev8h_POST;
1838	break;
1839	case `32`:
1840	Opcode = AArch64::LD1Onev4s_POST;
1841	break;
1842	case `64`:
1843	Opcode = AArch64::LD1Onev2d_POST;
1844	break;
1845	default:
1846	llvm_unreachable("Expected vector element to be a power of 2");
1847	}
1848	} else
1849	return false;
1850	SDValue Chain = LD->getChain();
1851	SDValue Base = LD->getBasePtr();
1852	SDLoc dl(N);
1853	// LD1 encodes an immediate offset by using XZR as the offset register.
1854	SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1855	? CurDAG->getRegister(Reg: AArch64::XZR, VT: MVT::i64)
1856	: CurDAG->getTargetConstant(Val: OffsetVal, DL: dl, VT: MVT::i64);
1857	SDValue Ops[] = { Base, Offset, Chain };
1858	SDNode *Res = CurDAG->getMachineNode(Opcode, dl, VT1: MVT::i64, VT2: DstVT,
1859	VT3: MVT::Other, Ops);
1860
1861	// Transfer memoperands.
1862	MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
1863	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Res), NewMemRefs: {MemOp});
1864
1865	// Either way, we're replacing the node, so tell the caller that.
1866	SDValue LoadedVal = SDValue (Res, `1`);
1867	if (InsertTo64) {
1868	SDValue SubReg = CurDAG->getTargetConstant(Val: AArch64::sub_32, DL: dl, VT: MVT::i32);
1869	LoadedVal = SDValue (CurDAG->getMachineNode(Opcode: AArch64::SUBREG_TO_REG, dl,
1870	VT: MVT::i64, Op1: LoadedVal, Op2: SubReg),
1871	`0`);
1872	}
1873
1874	ReplaceUses(F: SDValue (N, `0`), T: LoadedVal);
1875	ReplaceUses(F: SDValue (N, `1`), T: SDValue (Res, `0`));
1876	ReplaceUses(F: SDValue (N, `2`), T: SDValue (Res, `2`));
1877	CurDAG->RemoveDeadNode(N);
1878	return true;
1879	}
1880
1881	void AArch64DAGToDAGISel::SelectLoad(SDNode N, unsigned* NumVecs, unsigned Opc,
1882	unsigned SubRegIdx) {
1883	SDLoc dl(N);
1884	EVT VT = N->getValueType(ResNo: `0`);
1885	SDValue Chain = N->getOperand(Num: `0`);
1886
1887	SDValue Ops[] = {N->getOperand(Num: `2`), // Mem operand;
1888	Chain};
1889
1890	const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1891
1892	SDNode *Ld = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
1893	SDValue SuperReg = SDValue (Ld, `0`);
1894	for (unsigned i = `0`; i < NumVecs; ++i)
1895	ReplaceUses(F: SDValue (N, i),
1896	T: CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx + i, DL: dl, VT, Operand: SuperReg));
1897
1898	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (Ld, `1`));
1899
1900	// Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1901	// because it's too simple to have needed special treatment during lowering.
1902	if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Val: N)) {
1903	MachineMemOperand *MemOp = MemIntr->getMemOperand();
1904	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Ld), NewMemRefs: {MemOp});
1905	}
1906
1907	CurDAG->RemoveDeadNode(N);
1908	}
1909
1910	void AArch64DAGToDAGISel::SelectPostLoad(SDNode N, unsigned* NumVecs,
1911	unsigned Opc, unsigned SubRegIdx) {
1912	SDLoc dl(N);
1913	EVT VT = N->getValueType(ResNo: `0`);
1914	SDValue Chain = N->getOperand(Num: `0`);
1915
1916	SDValue Ops[] = {N->getOperand(Num: `1`), // Mem operand
1917	N->getOperand(Num: `2`), // Incremental
1918	Chain};
1919
1920	const EVT ResTys[] = {MVT::i64, // Type of the write back register
1921	MVT::Untyped, MVT::Other};
1922
1923	SDNode *Ld = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
1924
1925	// Update uses of write back register
1926	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (Ld, `0`));
1927
1928	// Update uses of vector list
1929	SDValue SuperReg = SDValue (Ld, `1`);
1930	if (NumVecs == `1`)
1931	ReplaceUses(F: SDValue (N, `0`), T: SuperReg);
1932	else
1933	for (unsigned i = `0`; i < NumVecs; ++i)
1934	ReplaceUses(F: SDValue (N, i),
1935	T: CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx + i, DL: dl, VT, Operand: SuperReg));
1936
1937	// Update the chain
1938	ReplaceUses(F: SDValue (N, NumVecs + `1`), T: SDValue (Ld, `2`));
1939	CurDAG->RemoveDeadNode(N);
1940	}
1941
1942	/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1943	/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1944	/// new Base and an SDValue representing the new offset.
1945	std::tuple<unsigned, SDValue, SDValue>
1946	AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode N, unsigned* Opc_rr,
1947	unsigned Opc_ri,
1948	const SDValue &OldBase,
1949	const SDValue &OldOffset,
1950	unsigned Scale) {
1951	SDValue NewBase = OldBase;
1952	SDValue NewOffset = OldOffset;
1953	// Detect a possible Reg+Imm addressing mode.
1954	const bool IsRegImm = SelectAddrModeIndexedSVE</Min=/-`8`, /Max=/`7`>(
1955	Root: N, N: OldBase, Base&: NewBase, OffImm&: NewOffset);
1956
1957	// Detect a possible reg+reg addressing mode, but only if we haven't already
1958	// detected a Reg+Imm one.
1959	const bool IsRegReg =
1960	!IsRegImm && SelectSVERegRegAddrMode(N: OldBase, Scale, Base&: NewBase, Offset&: NewOffset);
1961
1962	// Select the instruction.
1963	return std::make_tuple(args&: IsRegReg ? Opc_rr : Opc_ri, args&: NewBase, args&: NewOffset);
1964	}
1965
1966	enum class SelectTypeKind {
1967	Int1 = `0`,
1968	Int = `1`,
1969	FP = `2`,
1970	AnyType = `3`,
1971	};
1972
1973	/// This function selects an opcode from a list of opcodes, which is
1974	/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1975	/// element types, in this order.
1976	template <SelectTypeKind Kind>
1977	static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1978	// Only match scalable vector VTs
1979	if (!VT.isScalableVector())
1980	return `0`;
1981
1982	EVT EltVT = VT.getVectorElementType();
1983	unsigned Key = VT.getVectorMinNumElements();
1984	switch (Kind) {
1985	case SelectTypeKind::AnyType:
1986	break;
1987	case SelectTypeKind::Int:
1988	if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1989	EltVT != MVT::i64)
1990	return `0`;
1991	break;
1992	case SelectTypeKind::Int1:
1993	if (EltVT != MVT::i1)
1994	return `0`;
1995	break;
1996	case SelectTypeKind::FP:
1997	if (EltVT == MVT::bf16)
1998	Key = `16`;
1999	else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
2000	EltVT != MVT::f64)
2001	return `0`;
2002	break;
2003	}
2004
2005	unsigned Offset;
2006	switch (Key) {
2007	case `16`: // 8-bit or bf16
2008	Offset = `0`;
2009	break;
2010	case `8`: // 16-bit
2011	Offset = `1`;
2012	break;
2013	case `4`: // 32-bit
2014	Offset = `2`;
2015	break;
2016	case `2`: // 64-bit
2017	Offset = `3`;
2018	break;
2019	default:
2020	return `0`;
2021	}
2022
2023	return (Opcodes.size() <= Offset) ? `0` : Opcodes [Offset];
2024	}
2025
2026	// This function is almost identical to SelectWhilePair, but has an
2027	// extra check on the range of the immediate operand.
2028	// TODO: Merge these two functions together at some point?
2029	void AArch64DAGToDAGISel::SelectPExtPair(SDNode N, unsigned* Opc) {
2030	// Immediate can be either 0 or 1.
2031	if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `2`)))
2032	if (Imm->getZExtValue() > `1`)
2033	return;
2034
2035	SDLoc DL(N);
2036	EVT VT = N->getValueType(ResNo: `0`);
2037	SDValue Ops[] = {N->getOperand(Num: `1`), N->getOperand(Num: `2`)};
2038	SDNode *WhilePair = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped, Ops);
2039	SDValue SuperReg = SDValue (WhilePair, `0`);
2040
2041	for (unsigned I = `0`; I < `2`; ++I)
2042	ReplaceUses(F: SDValue (N, I), T: CurDAG->getTargetExtractSubreg(
2043	SRIdx: AArch64::psub0 + I, DL, VT, Operand: SuperReg));
2044
2045	CurDAG->RemoveDeadNode(N);
2046	}
2047
2048	void AArch64DAGToDAGISel::SelectWhilePair(SDNode N, unsigned* Opc) {
2049	SDLoc DL(N);
2050	EVT VT = N->getValueType(ResNo: `0`);
2051
2052	SDValue Ops[] = {N->getOperand(Num: `1`), N->getOperand(Num: `2`)};
2053
2054	SDNode *WhilePair = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped, Ops);
2055	SDValue SuperReg = SDValue (WhilePair, `0`);
2056
2057	for (unsigned I = `0`; I < `2`; ++I)
2058	ReplaceUses(F: SDValue (N, I), T: CurDAG->getTargetExtractSubreg(
2059	SRIdx: AArch64::psub0 + I, DL, VT, Operand: SuperReg));
2060
2061	CurDAG->RemoveDeadNode(N);
2062	}
2063
2064	void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode N, unsigned* NumVecs,
2065	unsigned Opcode) {
2066	EVT VT = N->getValueType(ResNo: `0`);
2067	SmallVector<SDValue, `4`> Regs(N->ops().slice(N: `1`, M: NumVecs));
2068	SDValue Ops = createZTuple(Regs);
2069	SDLoc DL(N);
2070	SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Untyped, Op1: Ops);
2071	SDValue SuperReg = SDValue (Intrinsic, `0`);
2072	for (unsigned i = `0`; i < NumVecs; ++i)
2073	ReplaceUses(F: SDValue (N, i), T: CurDAG->getTargetExtractSubreg(
2074	SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
2075
2076	CurDAG->RemoveDeadNode(N);
2077	}
2078
2079	void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode N, unsigned* NumVecs,
2080	unsigned Opcode) {
2081	SDLoc DL(N);
2082	EVT VT = N->getValueType(ResNo: `0`);
2083	SmallVector<SDValue, `4`> Ops(N->op_begin() + `2`, N->op_end());
2084	Ops.push_back(/Chain/ Elt: N->getOperand(Num: `0`));
2085
2086	SDNode *Instruction =
2087	CurDAG->getMachineNode(Opcode, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
2088	SDValue SuperReg = SDValue (Instruction, `0`);
2089
2090	for (unsigned i = `0`; i < NumVecs; ++i)
2091	ReplaceUses(F: SDValue (N, i), T: CurDAG->getTargetExtractSubreg(
2092	SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
2093
2094	// Copy chain
2095	unsigned ChainIdx = NumVecs;
2096	ReplaceUses(F: SDValue (N, ChainIdx), T: SDValue (Instruction, `1`));
2097	CurDAG->RemoveDeadNode(N);
2098	}
2099
2100	void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
2101	unsigned NumVecs,
2102	bool IsZmMulti,
2103	unsigned Opcode,
2104	bool HasPred) {
2105	assert(Opcode != `0` && "Unexpected opcode");
2106
2107	SDLoc DL(N);
2108	EVT VT = N->getValueType(ResNo: `0`);
2109	SDUse OpsIter = N->op_begin() + `1`; // Skip intrinsic ID*
2110	SmallVector<SDValue, `4`> Ops;
2111
2112	auto GetMultiVecOperand = [&]() {
2113	SmallVector<SDValue, `4`> Regs(OpsIter, OpsIter + NumVecs);
2114	OpsIter += NumVecs;
2115	return createZMulTuple(Regs);
2116	};
2117
2118	if (HasPred)
2119	Ops.push_back(Elt: *OpsIter++);
2120
2121	Ops.push_back(Elt: GetMultiVecOperand ());
2122	if (IsZmMulti)
2123	Ops.push_back(Elt: GetMultiVecOperand ());
2124	else
2125	Ops.push_back(Elt: *OpsIter++);
2126
2127	// Append any remaining operands.
2128	Ops.append(in_start: OpsIter, in_end: N->op_end());
2129	SDNode *Intrinsic;
2130	Intrinsic = CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Untyped, Ops);
2131	SDValue SuperReg = SDValue (Intrinsic, `0`);
2132	for (unsigned i = `0`; i < NumVecs; ++i)
2133	ReplaceUses(F: SDValue (N, i), T: CurDAG->getTargetExtractSubreg(
2134	SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
2135
2136	CurDAG->RemoveDeadNode(N);
2137	}
2138
2139	void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode N, unsigned* NumVecs,
2140	unsigned Scale, unsigned Opc_ri,
2141	unsigned Opc_rr, bool IsIntr) {
2142	assert(Scale < `5` && "Invalid scaling value.");
2143	SDLoc DL(N);
2144	EVT VT = N->getValueType(ResNo: `0`);
2145	SDValue Chain = N->getOperand(Num: `0`);
2146
2147	// Optimize addressing mode.
2148	SDValue Base, Offset;
2149	unsigned Opc;
2150	std::tie(args&: Opc, args&: Base, args&: Offset) = findAddrModeSVELoadStore(
2151	N, Opc_rr, Opc_ri, OldBase: N->getOperand(Num: IsIntr ? `3` : `2`),
2152	OldOffset: CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i64), Scale);
2153
2154	SDValue Ops[] = {N->getOperand(Num: IsIntr ? `2` : `1`), // Predicate
2155	Base, // Memory operand
2156	Offset, Chain};
2157
2158	const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2159
2160	SDNode *Load = CurDAG->getMachineNode(Opcode: Opc, dl: DL, ResultTys: ResTys, Ops);
2161	SDValue SuperReg = SDValue (Load, `0`);
2162	for (unsigned i = `0`; i < NumVecs; ++i)
2163	ReplaceUses(F: SDValue (N, i), T: CurDAG->getTargetExtractSubreg(
2164	SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
2165
2166	// Copy chain
2167	unsigned ChainIdx = NumVecs;
2168	ReplaceUses(F: SDValue (N, ChainIdx), T: SDValue (Load, `1`));
2169	CurDAG->RemoveDeadNode(N);
2170	}
2171
2172	void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2173	unsigned NumVecs,
2174	unsigned Scale,
2175	unsigned Opc_ri,
2176	unsigned Opc_rr) {
2177	assert(Scale < `4` && "Invalid scaling value.");
2178	SDLoc DL(N);
2179	EVT VT = N->getValueType(ResNo: `0`);
2180	SDValue Chain = N->getOperand(Num: `0`);
2181
2182	SDValue PNg = N->getOperand(Num: `2`);
2183	SDValue Base = N->getOperand(Num: `3`);
2184	SDValue Offset = CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i64);
2185	unsigned Opc;
2186	std::tie(args&: Opc, args&: Base, args&: Offset) =
2187	findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, OldBase: Base, OldOffset: Offset, Scale);
2188
2189	SDValue Ops[] = {PNg, // Predicate-as-counter
2190	Base, // Memory operand
2191	Offset, Chain};
2192
2193	const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2194
2195	SDNode *Load = CurDAG->getMachineNode(Opcode: Opc, dl: DL, ResultTys: ResTys, Ops);
2196	SDValue SuperReg = SDValue (Load, `0`);
2197	for (unsigned i = `0`; i < NumVecs; ++i)
2198	ReplaceUses(F: SDValue (N, i), T: CurDAG->getTargetExtractSubreg(
2199	SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
2200
2201	// Copy chain
2202	unsigned ChainIdx = NumVecs;
2203	ReplaceUses(F: SDValue (N, ChainIdx), T: SDValue (Load, `1`));
2204	CurDAG->RemoveDeadNode(N);
2205	}
2206
2207	void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode N, unsigned* NumVecs,
2208	unsigned Opcode) {
2209	if (N->getValueType(ResNo: `0`) != MVT::nxv4f32)
2210	return;
2211	SelectUnaryMultiIntrinsic(N, NumOutVecs: NumVecs, IsTupleInput: true, Opc: Opcode);
2212	}
2213
2214	void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2215	unsigned NumOutVecs,
2216	unsigned Opc,
2217	uint32_t MaxImm) {
2218	if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: `4`)))
2219	if (Imm->getZExtValue() > MaxImm)
2220	return;
2221
2222	SDValue ZtValue;
2223	if (!ImmToReg<AArch64::ZT0, `0`>(N: Node->getOperand(Num: `2`), Imm&: ZtValue))
2224	return;
2225
2226	SDValue Chain = Node->getOperand(Num: `0`);
2227	SDValue Ops[] = {ZtValue, Node->getOperand(Num: `3`), Node->getOperand(Num: `4`), Chain};
2228	SDLoc DL(Node);
2229	EVT VT = Node->getValueType(ResNo: `0`);
2230
2231	SDNode *Instruction =
2232	CurDAG->getMachineNode(Opcode: Opc, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
2233	SDValue SuperReg = SDValue (Instruction, `0`);
2234
2235	for (unsigned I = `0`; I < NumOutVecs; ++I)
2236	ReplaceUses(F: SDValue (Node, I), T: CurDAG->getTargetExtractSubreg(
2237	SRIdx: AArch64::zsub0 + I, DL, VT, Operand: SuperReg));
2238
2239	// Copy chain
2240	unsigned ChainIdx = NumOutVecs;
2241	ReplaceUses(F: SDValue (Node, ChainIdx), T: SDValue (Instruction, `1`));
2242	CurDAG->RemoveDeadNode(N: Node);
2243	}
2244
2245	void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2246	unsigned NumOutVecs,
2247	unsigned Opc) {
2248	SDValue ZtValue;
2249	if (!ImmToReg<AArch64::ZT0, `0`>(N: Node->getOperand(Num: `2`), Imm&: ZtValue))
2250	return;
2251
2252	SDValue Chain = Node->getOperand(Num: `0`);
2253	SDValue Ops[] = {ZtValue,
2254	createZMulTuple(Regs: {Node->getOperand(Num: `3`), Node->getOperand(Num: `4`)}),
2255	Chain};
2256
2257	SDLoc DL(Node);
2258	EVT VT = Node->getValueType(ResNo: `0`);
2259
2260	SDNode *Instruction =
2261	CurDAG->getMachineNode(Opcode: Opc, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
2262	SDValue SuperReg = SDValue (Instruction, `0`);
2263
2264	for (unsigned I = `0`; I < NumOutVecs; ++I)
2265	ReplaceUses(F: SDValue (Node, I), T: CurDAG->getTargetExtractSubreg(
2266	SRIdx: AArch64::zsub0 + I, DL, VT, Operand: SuperReg));
2267
2268	// Copy chain
2269	unsigned ChainIdx = NumOutVecs;
2270	ReplaceUses(F: SDValue (Node, ChainIdx), T: SDValue (Instruction, `1`));
2271	CurDAG->RemoveDeadNode(N: Node);
2272	}
2273
2274	void AArch64DAGToDAGISel::SelectClamp(SDNode N, unsigned* NumVecs,
2275	unsigned Op) {
2276	SDLoc DL(N);
2277	EVT VT = N->getValueType(ResNo: `0`);
2278
2279	SmallVector<SDValue, `4`> Regs(N->ops().slice(N: `1`, M: NumVecs));
2280	SDValue Zd = createZMulTuple(Regs);
2281	SDValue Zn = N->getOperand(Num: `1` + NumVecs);
2282	SDValue Zm = N->getOperand(Num: `2` + NumVecs);
2283
2284	SDValue Ops[] = {Zd, Zn, Zm};
2285
2286	SDNode *Intrinsic = CurDAG->getMachineNode(Opcode: Op, dl: DL, VT: MVT::Untyped, Ops);
2287	SDValue SuperReg = SDValue (Intrinsic, `0`);
2288	for (unsigned i = `0`; i < NumVecs; ++i)
2289	ReplaceUses(F: SDValue (N, i), T: CurDAG->getTargetExtractSubreg(
2290	SRIdx: AArch64::zsub0 + i, DL, VT, Operand: SuperReg));
2291
2292	CurDAG->RemoveDeadNode(N);
2293	}
2294
2295	bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2296	switch (BaseReg) {
2297	default:
2298	return false;
2299	case AArch64::ZA:
2300	case AArch64::ZAB0:
2301	if (TileNum == `0`)
2302	break;
2303	return false;
2304	case AArch64::ZAH0:
2305	if (TileNum <= `1`)
2306	break;
2307	return false;
2308	case AArch64::ZAS0:
2309	if (TileNum <= `3`)
2310	break;
2311	return false;
2312	case AArch64::ZAD0:
2313	if (TileNum <= `7`)
2314	break;
2315	return false;
2316	}
2317
2318	BaseReg += TileNum;
2319	return true;
2320	}
2321
2322	template <unsigned MaxIdx, unsigned Scale>
2323	void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode N, unsigned* NumVecs,
2324	unsigned BaseReg, unsigned Op) {
2325	unsigned TileNum = `0`;
2326	if (BaseReg != AArch64::ZA)
2327	TileNum = N->getConstantOperandVal(Num: `2`);
2328
2329	if (!SelectSMETile(BaseReg, TileNum))
2330	return;
2331
2332	SDValue SliceBase, Base, Offset;
2333	if (BaseReg == AArch64::ZA)
2334	SliceBase = N->getOperand(Num: `2`);
2335	else
2336	SliceBase = N->getOperand(Num: `3`);
2337
2338	if (!SelectSMETileSlice(N: SliceBase, MaxSize: MaxIdx, Vector&: Base, Offset, Scale))
2339	return;
2340
2341	SDLoc DL(N);
2342	SDValue SubReg = CurDAG->getRegister(Reg: BaseReg, VT: MVT::Other);
2343	SDValue Ops[] = {SubReg, Base, Offset, /Chain/ N->getOperand(Num: `0`)};
2344	SDNode *Mov = CurDAG->getMachineNode(Opcode: Op, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
2345
2346	EVT VT = N->getValueType(ResNo: `0`);
2347	for (unsigned I = `0`; I < NumVecs; ++I)
2348	ReplaceUses(F: SDValue (N, I),
2349	T: CurDAG->getTargetExtractSubreg(SRIdx: AArch64::zsub0 + I, DL, VT,
2350	Operand: SDValue (Mov, `0`)));
2351	// Copy chain
2352	unsigned ChainIdx = NumVecs;
2353	ReplaceUses(F: SDValue (N, ChainIdx), T: SDValue (Mov, `1`));
2354	CurDAG->RemoveDeadNode(N);
2355	}
2356
2357	void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode N, unsigned* NumVecs,
2358	unsigned Op, unsigned MaxIdx,
2359	unsigned Scale, unsigned BaseReg) {
2360	// Slice can be in different positions
2361	// The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2362	// The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2363	SDValue SliceBase = N->getOperand(Num: `2`);
2364	if (BaseReg != AArch64::ZA)
2365	SliceBase = N->getOperand(Num: `3`);
2366
2367	SDValue Base, Offset;
2368	if (!SelectSMETileSlice(N: SliceBase, MaxSize: MaxIdx, Vector&: Base, Offset, Scale))
2369	return;
2370	// The correct Za tile number is computed in Machine Instruction
2371	// See EmitZAInstr
2372	// DAG cannot select Za tile as an output register with ZReg
2373	SDLoc DL(N);
2374	SmallVector<SDValue, `6`> Ops;
2375	if (BaseReg != AArch64::ZA )
2376	Ops.push_back(Elt: N->getOperand(Num: `2`));
2377	Ops.push_back(Elt: Base);
2378	Ops.push_back(Elt: Offset);
2379	Ops.push_back(Elt: N->getOperand(Num: `0`)); //Chain
2380	SDNode *Mov = CurDAG->getMachineNode(Opcode: Op, dl: DL, ResultTys: {MVT::Untyped, MVT::Other}, Ops);
2381
2382	EVT VT = N->getValueType(ResNo: `0`);
2383	for (unsigned I = `0`; I < NumVecs; ++I)
2384	ReplaceUses(F: SDValue (N, I),
2385	T: CurDAG->getTargetExtractSubreg(SRIdx: AArch64::zsub0 + I, DL, VT,
2386	Operand: SDValue (Mov, `0`)));
2387
2388	// Copy chain
2389	unsigned ChainIdx = NumVecs;
2390	ReplaceUses(F: SDValue (N, ChainIdx), T: SDValue (Mov, `1`));
2391	CurDAG->RemoveDeadNode(N);
2392	}
2393
2394	void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2395	unsigned NumOutVecs,
2396	bool IsTupleInput,
2397	unsigned Opc) {
2398	SDLoc DL(N);
2399	EVT VT = N->getValueType(ResNo: `0`);
2400	unsigned NumInVecs = N->getNumOperands() - `1`;
2401
2402	SmallVector<SDValue, `6`> Ops;
2403	if (IsTupleInput) {
2404	assert((NumInVecs == `2` \|\| NumInVecs == `4`) &&
2405	"Don't know how to handle multi-register input!");
2406	SmallVector<SDValue, `4`> Regs(N->ops().slice(N: `1`, M: NumInVecs));
2407	Ops.push_back(Elt: createZMulTuple(Regs));
2408	} else {
2409	// All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2410	for (unsigned I = `0`; I < NumInVecs; I++)
2411	Ops.push_back(Elt: N->getOperand(Num: `1` + I));
2412	}
2413
2414	SDNode *Res = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: MVT::Untyped, Ops);
2415	SDValue SuperReg = SDValue (Res, `0`);
2416
2417	for (unsigned I = `0`; I < NumOutVecs; I++)
2418	ReplaceUses(F: SDValue (N, I), T: CurDAG->getTargetExtractSubreg(
2419	SRIdx: AArch64::zsub0 + I, DL, VT, Operand: SuperReg));
2420	CurDAG->RemoveDeadNode(N);
2421	}
2422
2423	void AArch64DAGToDAGISel::SelectStore(SDNode N, unsigned* NumVecs,
2424	unsigned Opc) {
2425	SDLoc dl(N);
2426	EVT VT = N->getOperand(Num: `2`)->getValueType(ResNo: `0`);
2427
2428	// Form a REG_SEQUENCE to force register allocation.
2429	bool Is128Bit = VT.getSizeInBits() == `128`;
2430	SmallVector<SDValue, `4`> Regs(N->ops().slice(N: `2`, M: NumVecs));
2431	SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2432
2433	SDValue Ops[] = {RegSeq, N->getOperand(Num: NumVecs + `2`), N->getOperand(Num: `0`)};
2434	SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, VT: N->getValueType(ResNo: `0`), Ops);
2435
2436	// Transfer memoperands.
2437	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2438	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
2439
2440	ReplaceNode(F: N, T: St);
2441	}
2442
2443	void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode N, unsigned* NumVecs,
2444	unsigned Scale, unsigned Opc_rr,
2445	unsigned Opc_ri) {
2446	SDLoc dl(N);
2447
2448	// Form a REG_SEQUENCE to force register allocation.
2449	SmallVector<SDValue, `4`> Regs(N->ops().slice(N: `2`, M: NumVecs));
2450	SDValue RegSeq = createZTuple(Regs);
2451
2452	// Optimize addressing mode.
2453	unsigned Opc;
2454	SDValue Offset, Base;
2455	std::tie(args&: Opc, args&: Base, args&: Offset) = findAddrModeSVELoadStore(
2456	N, Opc_rr, Opc_ri, OldBase: N->getOperand(Num: NumVecs + `3`),
2457	OldOffset: CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i64), Scale);
2458
2459	SDValue Ops[] = {RegSeq, N->getOperand(Num: NumVecs + `2`), // predicate
2460	Base, // address
2461	Offset, // offset
2462	N->getOperand(Num: `0`)}; // chain
2463	SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, VT: N->getValueType(ResNo: `0`), Ops);
2464
2465	ReplaceNode(F: N, T: St);
2466	}
2467
2468	bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2469	SDValue &OffImm) {
2470	SDLoc dl(N);
2471	const DataLayout &DL = CurDAG->getDataLayout();
2472	const TargetLowering *TLI = getTargetLowering();
2473
2474	// Try to match it for the frame address
2475	if (auto FINode = dyn_cast<FrameIndexSDNode>(Val&: N)) {
2476	int FI = FINode->getIndex();
2477	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
2478	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: dl, VT: MVT::i64);
2479	return true;
2480	}
2481
2482	return false;
2483	}
2484
2485	void AArch64DAGToDAGISel::SelectPostStore(SDNode N, unsigned* NumVecs,
2486	unsigned Opc) {
2487	SDLoc dl(N);
2488	EVT VT = N->getOperand(Num: `2`)->getValueType(ResNo: `0`);
2489	const EVT ResTys[] = {MVT::i64, // Type of the write back register
2490	MVT::Other}; // Type for the Chain
2491
2492	// Form a REG_SEQUENCE to force register allocation.
2493	bool Is128Bit = VT.getSizeInBits() == `128`;
2494	SmallVector<SDValue, `4`> Regs(N->ops().slice(N: `1`, M: NumVecs));
2495	SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2496
2497	SDValue Ops[] = {RegSeq,
2498	N->getOperand(Num: NumVecs + `1`), // base register
2499	N->getOperand(Num: NumVecs + `2`), // Incremental
2500	N->getOperand(Num: `0`)}; // Chain
2501	SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2502
2503	ReplaceNode(F: N, T: St);
2504	}
2505
2506	namespace {
2507	/// WidenVector - Given a value in the V64 register class, produce the
2508	/// equivalent value in the V128 register class.
2509	class WidenVector {
2510	SelectionDAG &DAG;
2511
2512	public:
2513	WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2514
2515	SDValue operator()(SDValue V64Reg) {
2516	EVT VT = V64Reg.getValueType();
2517	unsigned NarrowSize = VT.getVectorNumElements();
2518	MVT EltTy = VT.getVectorElementType().getSimpleVT();
2519	MVT WideTy = MVT::getVectorVT(VT: EltTy, NumElements: `2` * NarrowSize);
2520	SDLoc DL(V64Reg);
2521
2522	SDValue Undef =
2523	SDValue (DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: WideTy), `0`);
2524	return DAG.getTargetInsertSubreg(SRIdx: AArch64::dsub, DL, VT: WideTy, Operand: Undef, Subreg: V64Reg);
2525	}
2526	};
2527	} // namespace
2528
2529	/// NarrowVector - Given a value in the V128 register class, produce the
2530	/// equivalent value in the V64 register class.
2531	static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
2532	EVT VT = V128Reg.getValueType();
2533	unsigned WideSize = VT.getVectorNumElements();
2534	MVT EltTy = VT.getVectorElementType().getSimpleVT();
2535	MVT NarrowTy = MVT::getVectorVT(VT: EltTy, NumElements: WideSize / `2`);
2536
2537	return DAG.getTargetExtractSubreg(SRIdx: AArch64::dsub, DL: SDLoc (V128Reg), VT: NarrowTy,
2538	Operand: V128Reg);
2539	}
2540
2541	void AArch64DAGToDAGISel::SelectLoadLane(SDNode N, unsigned* NumVecs,
2542	unsigned Opc) {
2543	SDLoc dl(N);
2544	EVT VT = N->getValueType(ResNo: `0`);
2545	bool Narrow = VT.getSizeInBits() == `64`;
2546
2547	// Form a REG_SEQUENCE to force register allocation.
2548	SmallVector<SDValue, `4`> Regs(N->ops().slice(N: `2`, M: NumVecs));
2549
2550	if (Narrow)
2551	transform(Range&: Regs, d_first: Regs.begin(),
2552	F: WidenVector (*CurDAG));
2553
2554	SDValue RegSeq = createQTuple(Regs);
2555
2556	const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2557
2558	unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + `2`);
2559
2560	SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(Val: LaneNo, DL: dl, VT: MVT::i64),
2561	N->getOperand(Num: NumVecs + `3`), N->getOperand(Num: `0`)};
2562	SDNode *Ld = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2563	SDValue SuperReg = SDValue (Ld, `0`);
2564
2565	EVT WideVT = RegSeq.getOperand(i: `1`)->getValueType(ResNo: `0`);
2566	static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2567	AArch64::qsub2, AArch64::qsub3 };
2568	for (unsigned i = `0`; i < NumVecs; ++i) {
2569	SDValue NV = CurDAG->getTargetExtractSubreg(SRIdx: QSubs[i], DL: dl, VT: WideVT, Operand: SuperReg);
2570	if (Narrow)
2571	NV = NarrowVector(V128Reg: NV, DAG&: *CurDAG);
2572	ReplaceUses(F: SDValue (N, i), T: NV);
2573	}
2574
2575	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (Ld, `1`));
2576	CurDAG->RemoveDeadNode(N);
2577	}
2578
2579	void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode N, unsigned* NumVecs,
2580	unsigned Opc) {
2581	SDLoc dl(N);
2582	EVT VT = N->getValueType(ResNo: `0`);
2583	bool Narrow = VT.getSizeInBits() == `64`;
2584
2585	// Form a REG_SEQUENCE to force register allocation.
2586	SmallVector<SDValue, `4`> Regs(N->ops().slice(N: `1`, M: NumVecs));
2587
2588	if (Narrow)
2589	transform(Range&: Regs, d_first: Regs.begin(),
2590	F: WidenVector (*CurDAG));
2591
2592	SDValue RegSeq = createQTuple(Regs);
2593
2594	const EVT ResTys[] = {MVT::i64, // Type of the write back register
2595	RegSeq ->getValueType(ResNo: `0`), MVT::Other};
2596
2597	unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + `1`);
2598
2599	SDValue Ops[] = {RegSeq,
2600	CurDAG->getTargetConstant(Val: LaneNo, DL: dl,
2601	VT: MVT::i64), // Lane Number
2602	N->getOperand(Num: NumVecs + `2`), // Base register
2603	N->getOperand(Num: NumVecs + `3`), // Incremental
2604	N->getOperand(Num: `0`)};
2605	SDNode *Ld = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2606
2607	// Update uses of the write back register
2608	ReplaceUses(F: SDValue (N, NumVecs), T: SDValue (Ld, `0`));
2609
2610	// Update uses of the vector list
2611	SDValue SuperReg = SDValue (Ld, `1`);
2612	if (NumVecs == `1`) {
2613	ReplaceUses(F: SDValue (N, `0`),
2614	T: Narrow ? NarrowVector(V128Reg: SuperReg, DAG&: *CurDAG) : SuperReg);
2615	} else {
2616	EVT WideVT = RegSeq.getOperand(i: `1`)->getValueType(ResNo: `0`);
2617	static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2618	AArch64::qsub2, AArch64::qsub3 };
2619	for (unsigned i = `0`; i < NumVecs; ++i) {
2620	SDValue NV = CurDAG->getTargetExtractSubreg(SRIdx: QSubs[i], DL: dl, VT: WideVT,
2621	Operand: SuperReg);
2622	if (Narrow)
2623	NV = NarrowVector(V128Reg: NV, DAG&: *CurDAG);
2624	ReplaceUses(F: SDValue (N, i), T: NV);
2625	}
2626	}
2627
2628	// Update the Chain
2629	ReplaceUses(F: SDValue (N, NumVecs + `1`), T: SDValue (Ld, `2`));
2630	CurDAG->RemoveDeadNode(N);
2631	}
2632
2633	void AArch64DAGToDAGISel::SelectStoreLane(SDNode N, unsigned* NumVecs,
2634	unsigned Opc) {
2635	SDLoc dl(N);
2636	EVT VT = N->getOperand(Num: `2`)->getValueType(ResNo: `0`);
2637	bool Narrow = VT.getSizeInBits() == `64`;
2638
2639	// Form a REG_SEQUENCE to force register allocation.
2640	SmallVector<SDValue, `4`> Regs(N->ops().slice(N: `2`, M: NumVecs));
2641
2642	if (Narrow)
2643	transform(Range&: Regs, d_first: Regs.begin(),
2644	F: WidenVector (*CurDAG));
2645
2646	SDValue RegSeq = createQTuple(Regs);
2647
2648	unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + `2`);
2649
2650	SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(Val: LaneNo, DL: dl, VT: MVT::i64),
2651	N->getOperand(Num: NumVecs + `3`), N->getOperand(Num: `0`)};
2652	SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::Other, Ops);
2653
2654	// Transfer memoperands.
2655	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2656	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
2657
2658	ReplaceNode(F: N, T: St);
2659	}
2660
2661	void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode N, unsigned* NumVecs,
2662	unsigned Opc) {
2663	SDLoc dl(N);
2664	EVT VT = N->getOperand(Num: `2`)->getValueType(ResNo: `0`);
2665	bool Narrow = VT.getSizeInBits() == `64`;
2666
2667	// Form a REG_SEQUENCE to force register allocation.
2668	SmallVector<SDValue, `4`> Regs(N->ops().slice(N: `1`, M: NumVecs));
2669
2670	if (Narrow)
2671	transform(Range&: Regs, d_first: Regs.begin(),
2672	F: WidenVector (*CurDAG));
2673
2674	SDValue RegSeq = createQTuple(Regs);
2675
2676	const EVT ResTys[] = {MVT::i64, // Type of the write back register
2677	MVT::Other};
2678
2679	unsigned LaneNo = N->getConstantOperandVal(Num: NumVecs + `1`);
2680
2681	SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(Val: LaneNo, DL: dl, VT: MVT::i64),
2682	N->getOperand(Num: NumVecs + `2`), // Base Register
2683	N->getOperand(Num: NumVecs + `3`), // Incremental
2684	N->getOperand(Num: `0`)};
2685	SDNode *St = CurDAG->getMachineNode(Opcode: Opc, dl, ResultTys: ResTys, Ops);
2686
2687	// Transfer memoperands.
2688	MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(Val: N)->getMemOperand();
2689	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
2690
2691	ReplaceNode(F: N, T: St);
2692	}
2693
2694	static bool isBitfieldExtractOpFromAnd(SelectionDAG CurDAG, SDNode N,
2695	unsigned &Opc, SDValue &Opd0,
2696	unsigned &LSB, unsigned &MSB,
2697	unsigned NumberOfIgnoredLowBits,
2698	bool BiggerPattern) {
2699	assert(N->getOpcode() == ISD::AND &&
2700	"N must be a AND operation to call this function");
2701
2702	EVT VT = N->getValueType(ResNo: `0`);
2703
2704	// Here we can test the type of VT and return false when the type does not
2705	// match, but since it is done prior to that call in the current context
2706	// we turned that into an assert to avoid redundant code.
2707	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
2708	"Type checking must have been done before calling this function");
2709
2710	// FIXME: simplify-demanded-bits in DAGCombine will probably have
2711	// changed the AND node to a 32-bit mask operation. We'll have to
2712	// undo that as part of the transform here if we want to catch all
2713	// the opportunities.
2714	// Currently the NumberOfIgnoredLowBits argument helps to recover
2715	// from these situations when matching bigger pattern (bitfield insert).
2716
2717	// For unsigned extracts, check for a shift right and mask
2718	uint64_t AndImm = `0`;
2719	if (!isOpcWithIntImmediate(N, Opc: ISD::AND, Imm&: AndImm))
2720	return false;
2721
2722	const SDNode *Op0 = N->getOperand(Num: `0`).getNode();
2723
2724	// Because of simplify-demanded-bits in DAGCombine, the mask may have been
2725	// simplified. Try to undo that
2726	AndImm \|= maskTrailingOnes<uint64_t>(N: NumberOfIgnoredLowBits);
2727
2728	// The immediate is a mask of the low bits iff imm & (imm+1) == 0
2729	if (AndImm & (AndImm + `1`))
2730	return false;
2731
2732	bool ClampMSB = false;
2733	uint64_t SrlImm = `0`;
2734	// Handle the SRL + ANY_EXTEND case.
2735	if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2736	isOpcWithIntImmediate(N: Op0->getOperand(Num: `0`).getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
2737	// Extend the incoming operand of the SRL to 64-bit.
2738	Opd0 = Widen(CurDAG, N: Op0->getOperand(Num: `0`).getOperand(i: `0`));
2739	// Make sure to clamp the MSB so that we preserve the semantics of the
2740	// original operations.
2741	ClampMSB = true;
2742	} else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2743	isOpcWithIntImmediate(N: Op0->getOperand(Num: `0`).getNode(), Opc: ISD::SRL,
2744	Imm&: SrlImm)) {
2745	// If the shift result was truncated, we can still combine them.
2746	Opd0 = Op0->getOperand(Num: `0`).getOperand(i: `0`);
2747
2748	// Use the type of SRL node.
2749	VT = Opd0 ->getValueType(ResNo: `0`);
2750	} else if (isOpcWithIntImmediate(N: Op0, Opc: ISD::SRL, Imm&: SrlImm)) {
2751	Opd0 = Op0->getOperand(Num: `0`);
2752	ClampMSB = (VT == MVT::i32);
2753	} else if (BiggerPattern) {
2754	// Let's pretend a 0 shift right has been performed.
2755	// The resulting code will be at least as good as the original one
2756	// plus it may expose more opportunities for bitfield insert pattern.
2757	// FIXME: Currently we limit this to the bigger pattern, because
2758	// some optimizations expect AND and not UBFM.
2759	Opd0 = N->getOperand(Num: `0`);
2760	} else
2761	return false;
2762
2763	// Bail out on large immediates. This happens when no proper
2764	// combining/constant folding was performed.
2765	if (!BiggerPattern && (SrlImm <= `0` \|\| SrlImm >= VT.getSizeInBits())) {
2766	LLVM_DEBUG(
2767	(dbgs() << N
2768	<< ": Found large shift immediate, this should not happen\n"));
2769	return false;
2770	}
2771
2772	LSB = SrlImm;
2773	MSB = SrlImm +
2774	(VT == MVT::i32 ? llvm::countr_one<uint32_t>(Value: AndImm)
2775	: llvm::countr_one<uint64_t>(Value: AndImm)) -
2776	`1`;
2777	if (ClampMSB)
2778	// Since we're moving the extend before the right shift operation, we need
2779	// to clamp the MSB to make sure we don't shift in undefined bits instead of
2780	// the zeros which would get shifted in with the original right shift
2781	// operation.
2782	MSB = MSB > `31` ? `31` : MSB;
2783
2784	Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2785	return true;
2786	}
2787
2788	static bool isBitfieldExtractOpFromSExtInReg(SDNode N, unsigned* &Opc,
2789	SDValue &Opd0, unsigned &Immr,
2790	unsigned &Imms) {
2791	assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2792
2793	EVT VT = N->getValueType(ResNo: `0`);
2794	unsigned BitWidth = VT.getSizeInBits();
2795	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
2796	"Type checking must have been done before calling this function");
2797
2798	SDValue Op = N->getOperand(Num: `0`);
2799	if (Op ->getOpcode() == ISD::TRUNCATE) {
2800	Op = Op ->getOperand(Num: `0`);
2801	VT = Op ->getValueType(ResNo: `0`);
2802	BitWidth = VT.getSizeInBits();
2803	}
2804
2805	uint64_t ShiftImm;
2806	if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SRL, Imm&: ShiftImm) &&
2807	!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SRA, Imm&: ShiftImm))
2808	return false;
2809
2810	unsigned Width = cast<VTSDNode>(Val: N->getOperand(Num: `1`))->getVT().getSizeInBits();
2811	if (ShiftImm + Width > BitWidth)
2812	return false;
2813
2814	Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2815	Opd0 = Op.getOperand(i: `0`);
2816	Immr = ShiftImm;
2817	Imms = ShiftImm + Width - `1`;
2818	return true;
2819	}
2820
2821	static bool isSeveralBitsExtractOpFromShr(SDNode N, unsigned* &Opc,
2822	SDValue &Opd0, unsigned &LSB,
2823	unsigned &MSB) {
2824	// We are looking for the following pattern which basically extracts several
2825	// continuous bits from the source value and places it from the LSB of the
2826	// destination value, all other bits of the destination value or set to zero:
2827	//
2828	// Value2 = AND Value, MaskImm
2829	// SRL Value2, ShiftImm
2830	//
2831	// with MaskImm >> ShiftImm to search for the bit width.
2832	//
2833	// This gets selected into a single UBFM:
2834	//
2835	// UBFM Value, ShiftImm, Log2_64(MaskImm)
2836	//
2837
2838	if (N->getOpcode() != ISD::SRL)
2839	return false;
2840
2841	uint64_t AndMask = `0`;
2842	if (!isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::AND, Imm&: AndMask))
2843	return false;
2844
2845	Opd0 = N->getOperand(Num: `0`).getOperand(i: `0`);
2846
2847	uint64_t SrlImm = `0`;
2848	if (!isIntImmediate(N: N->getOperand(Num: `1`), Imm&: SrlImm))
2849	return false;
2850
2851	// Check whether we really have several bits extract here.
2852	if (!isMask_64(Value: AndMask >> SrlImm))
2853	return false;
2854
2855	Opc = N->getValueType(ResNo: `0`) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2856	LSB = SrlImm;
2857	MSB = llvm::Log2_64(Value: AndMask);
2858	return true;
2859	}
2860
2861	static bool isBitfieldExtractOpFromShr(SDNode N, unsigned* &Opc, SDValue &Opd0,
2862	unsigned &Immr, unsigned &Imms,
2863	bool BiggerPattern) {
2864	assert((N->getOpcode() == ISD::SRA \|\| N->getOpcode() == ISD::SRL) &&
2865	"N must be a SHR/SRA operation to call this function");
2866
2867	EVT VT = N->getValueType(ResNo: `0`);
2868
2869	// Here we can test the type of VT and return false when the type does not
2870	// match, but since it is done prior to that call in the current context
2871	// we turned that into an assert to avoid redundant code.
2872	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
2873	"Type checking must have been done before calling this function");
2874
2875	// Check for AND + SRL doing several bits extract.
2876	if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, LSB&: Immr, MSB&: Imms))
2877	return true;
2878
2879	// We're looking for a shift of a shift.
2880	uint64_t ShlImm = `0`;
2881	uint64_t TruncBits = `0`;
2882	if (isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
2883	Opd0 = N->getOperand(Num: `0`).getOperand(i: `0`);
2884	} else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2885	N->getOperand(Num: `0`).getNode()->getOpcode() == ISD::TRUNCATE) {
2886	// We are looking for a shift of truncate. Truncate from i64 to i32 could
2887	// be considered as setting high 32 bits as zero. Our strategy here is to
2888	// always generate 64bit UBFM. This consistency will help the CSE pass
2889	// later find more redundancy.
2890	Opd0 = N->getOperand(Num: `0`).getOperand(i: `0`);
2891	TruncBits = Opd0 ->getValueType(ResNo: `0`).getSizeInBits() - VT.getSizeInBits();
2892	VT = Opd0.getValueType();
2893	assert(VT == MVT::i64 && "the promoted type should be i64");
2894	} else if (BiggerPattern) {
2895	// Let's pretend a 0 shift left has been performed.
2896	// FIXME: Currently we limit this to the bigger pattern case,
2897	// because some optimizations expect AND and not UBFM
2898	Opd0 = N->getOperand(Num: `0`);
2899	} else
2900	return false;
2901
2902	// Missing combines/constant folding may have left us with strange
2903	// constants.
2904	if (ShlImm >= VT.getSizeInBits()) {
2905	LLVM_DEBUG(
2906	(dbgs() << N
2907	<< ": Found large shift immediate, this should not happen\n"));
2908	return false;
2909	}
2910
2911	uint64_t SrlImm = `0`;
2912	if (!isIntImmediate(N: N->getOperand(Num: `1`), Imm&: SrlImm))
2913	return false;
2914
2915	assert(SrlImm > `0` && SrlImm < VT.getSizeInBits() &&
2916	"bad amount in shift node!");
2917	int immr = SrlImm - ShlImm;
2918	Immr = immr < `0` ? immr + VT.getSizeInBits() : immr;
2919	Imms = VT.getSizeInBits() - ShlImm - TruncBits - `1`;
2920	// SRA requires a signed extraction
2921	if (VT == MVT::i32)
2922	Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2923	else
2924	Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2925	return true;
2926	}
2927
2928	bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2929	assert(N->getOpcode() == ISD::SIGN_EXTEND);
2930
2931	EVT VT = N->getValueType(ResNo: `0`);
2932	EVT NarrowVT = N->getOperand(Num: `0`)->getValueType(ResNo: `0`);
2933	if (VT != MVT::i64 \|\| NarrowVT != MVT::i32)
2934	return false;
2935
2936	uint64_t ShiftImm;
2937	SDValue Op = N->getOperand(Num: `0`);
2938	if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SRA, Imm&: ShiftImm))
2939	return false;
2940
2941	SDLoc dl(N);
2942	// Extend the incoming operand of the shift to 64-bits.
2943	SDValue Opd0 = Widen(CurDAG, N: Op.getOperand(i: `0`));
2944	unsigned Immr = ShiftImm;
2945	unsigned Imms = NarrowVT.getSizeInBits() - `1`;
2946	SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Val: Immr, DL: dl, VT),
2947	CurDAG->getTargetConstant(Val: Imms, DL: dl, VT)};
2948	CurDAG->SelectNodeTo(N, MachineOpc: AArch64::SBFMXri, VT, Ops);
2949	return true;
2950	}
2951
2952	static bool isBitfieldExtractOp(SelectionDAG CurDAG, SDNode N, unsigned &Opc,
2953	SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2954	unsigned NumberOfIgnoredLowBits = `0`,
2955	bool BiggerPattern = false) {
2956	if (N->getValueType(ResNo: `0`) != MVT::i32 && N->getValueType(ResNo: `0`) != MVT::i64)
2957	return false;
2958
2959	switch (N->getOpcode()) {
2960	default:
2961	if (!N->isMachineOpcode())
2962	return false;
2963	break;
2964	case ISD::AND:
2965	return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB&: Immr, MSB&: Imms,
2966	NumberOfIgnoredLowBits, BiggerPattern);
2967	case ISD::SRL:
2968	case ISD::SRA:
2969	return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2970
2971	case ISD::SIGN_EXTEND_INREG:
2972	return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2973	}
2974
2975	unsigned NOpc = N->getMachineOpcode();
2976	switch (NOpc) {
2977	default:
2978	return false;
2979	case AArch64::SBFMWri:
2980	case AArch64::UBFMWri:
2981	case AArch64::SBFMXri:
2982	case AArch64::UBFMXri:
2983	Opc = NOpc;
2984	Opd0 = N->getOperand(Num: `0`);
2985	Immr = N->getConstantOperandVal(Num: `1`);
2986	Imms = N->getConstantOperandVal(Num: `2`);
2987	return true;
2988	}
2989	// Unreachable
2990	return false;
2991	}
2992
2993	bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2994	unsigned Opc, Immr, Imms;
2995	SDValue Opd0;
2996	if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2997	return false;
2998
2999	EVT VT = N->getValueType(ResNo: `0`);
3000	SDLoc dl(N);
3001
3002	// If the bit extract operation is 64bit but the original type is 32bit, we
3003	// need to add one EXTRACT_SUBREG.
3004	if ((Opc == AArch64::SBFMXri \|\| Opc == AArch64::UBFMXri) && VT == MVT::i32) {
3005	SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Val: Immr, DL: dl, VT: MVT::i64),
3006	CurDAG->getTargetConstant(Val: Imms, DL: dl, VT: MVT::i64)};
3007
3008	SDNode *BFM = CurDAG->getMachineNode(Opcode: Opc, dl, VT: MVT::i64, Ops: Ops64);
3009	SDValue Inner = CurDAG->getTargetExtractSubreg(SRIdx: AArch64::sub_32, DL: dl,
3010	VT: MVT::i32, Operand: SDValue (BFM, `0`));
3011	ReplaceNode(F: N, T: Inner.getNode());
3012	return true;
3013	}
3014
3015	SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Val: Immr, DL: dl, VT),
3016	CurDAG->getTargetConstant(Val: Imms, DL: dl, VT)};
3017	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3018	return true;
3019	}
3020
3021	/// Does DstMask form a complementary pair with the mask provided by
3022	/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
3023	/// this asks whether DstMask zeroes precisely those bits that will be set by
3024	/// the other half.
3025	static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
3026	unsigned NumberOfIgnoredHighBits, EVT VT) {
3027	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
3028	"i32 or i64 mask type expected!");
3029	unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
3030
3031	// Enable implicitTrunc as we're intentionally ignoring high bits.
3032	APInt SignificantDstMask =
3033	APInt (BitWidth, DstMask, /isSigned=/false, /implicitTrunc=/true);
3034	APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(width: BitWidth);
3035
3036	return (SignificantDstMask & SignificantBitsToBeInserted) == `0` &&
3037	(SignificantDstMask \| SignificantBitsToBeInserted).isAllOnes();
3038	}
3039
3040	// Look for bits that will be useful for later uses.
3041	// A bit is consider useless as soon as it is dropped and never used
3042	// before it as been dropped.
3043	// E.g., looking for useful bit of x
3044	// 1. y = x & 0x7
3045	// 2. z = y >> 2
3046	// After #1, x useful bits are 0x7, then the useful bits of x, live through
3047	// y.
3048	// After #2, the useful bits of x are 0x4.
3049	// However, if x is used on an unpredictable instruction, then all its bits
3050	// are useful.
3051	// E.g.
3052	// 1. y = x & 0x7
3053	// 2. z = y >> 2
3054	// 3. str x, [@x]
3055	static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = `0`);
3056
3057	static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
3058	unsigned Depth) {
3059	uint64_t Imm =
3060	cast<const ConstantSDNode>(Val: Op.getOperand(i: `1`).getNode())->getZExtValue();
3061	Imm = AArch64_AM::decodeLogicalImmediate(val: Imm, regSize: UsefulBits.getBitWidth());
3062	UsefulBits &= APInt (UsefulBits.getBitWidth(), Imm);
3063	getUsefulBits(Op, UsefulBits, Depth: Depth + `1`);
3064	}
3065
3066	static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
3067	uint64_t Imm, uint64_t MSB,
3068	unsigned Depth) {
3069	// inherit the bitwidth value
3070	APInt OpUsefulBits(UsefulBits);
3071	OpUsefulBits = `1`;
3072
3073	if (MSB >= Imm) {
3074	OpUsefulBits <<= MSB - Imm + `1`;
3075	--OpUsefulBits;
3076	// The interesting part will be in the lower part of the result
3077	getUsefulBits(Op, UsefulBits&: OpUsefulBits, Depth: Depth + `1`);
3078	// The interesting part was starting at Imm in the argument
3079	OpUsefulBits <<= Imm;
3080	} else {
3081	OpUsefulBits <<= MSB + `1`;
3082	--OpUsefulBits;
3083	// The interesting part will be shifted in the result
3084	OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
3085	getUsefulBits(Op, UsefulBits&: OpUsefulBits, Depth: Depth + `1`);
3086	// The interesting part was at zero in the argument
3087	OpUsefulBits.lshrInPlace(ShiftAmt: OpUsefulBits.getBitWidth() - Imm);
3088	}
3089
3090	UsefulBits &= OpUsefulBits;
3091	}
3092
3093	static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
3094	unsigned Depth) {
3095	uint64_t Imm =
3096	cast<const ConstantSDNode>(Val: Op.getOperand(i: `1`).getNode())->getZExtValue();
3097	uint64_t MSB =
3098	cast<const ConstantSDNode>(Val: Op.getOperand(i: `2`).getNode())->getZExtValue();
3099
3100	getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
3101	}
3102
3103	static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
3104	unsigned Depth) {
3105	uint64_t ShiftTypeAndValue =
3106	cast<const ConstantSDNode>(Val: Op.getOperand(i: `2`).getNode())->getZExtValue();
3107	APInt Mask(UsefulBits);
3108	Mask.clearAllBits();
3109	Mask.flipAllBits();
3110
3111	if (AArch64_AM::getShiftType(Imm: ShiftTypeAndValue) == AArch64_AM::LSL) {
3112	// Shift Left
3113	uint64_t ShiftAmt = AArch64_AM::getShiftValue(Imm: ShiftTypeAndValue);
3114	Mask <<= ShiftAmt;
3115	getUsefulBits(Op, UsefulBits&: Mask, Depth: Depth + `1`);
3116	Mask.lshrInPlace(ShiftAmt);
3117	} else if (AArch64_AM::getShiftType(Imm: ShiftTypeAndValue) == AArch64_AM::LSR) {
3118	// Shift Right
3119	// We do not handle AArch64_AM::ASR, because the sign will change the
3120	// number of useful bits
3121	uint64_t ShiftAmt = AArch64_AM::getShiftValue(Imm: ShiftTypeAndValue);
3122	Mask.lshrInPlace(ShiftAmt);
3123	getUsefulBits(Op, UsefulBits&: Mask, Depth: Depth + `1`);
3124	Mask <<= ShiftAmt;
3125	} else
3126	return;
3127
3128	UsefulBits &= Mask;
3129	}
3130
3131	static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
3132	unsigned Depth) {
3133	uint64_t Imm =
3134	cast<const ConstantSDNode>(Val: Op.getOperand(i: `2`).getNode())->getZExtValue();
3135	uint64_t MSB =
3136	cast<const ConstantSDNode>(Val: Op.getOperand(i: `3`).getNode())->getZExtValue();
3137
3138	APInt OpUsefulBits(UsefulBits);
3139	OpUsefulBits = `1`;
3140
3141	APInt ResultUsefulBits(UsefulBits.getBitWidth(), `0`);
3142	ResultUsefulBits.flipAllBits();
3143	APInt Mask(UsefulBits.getBitWidth(), `0`);
3144
3145	getUsefulBits(Op, UsefulBits&: ResultUsefulBits, Depth: Depth + `1`);
3146
3147	if (MSB >= Imm) {
3148	// The instruction is a BFXIL.
3149	uint64_t Width = MSB - Imm + `1`;
3150	uint64_t LSB = Imm;
3151
3152	OpUsefulBits <<= Width;
3153	--OpUsefulBits;
3154
3155	if (Op.getOperand(i: `1`) == Orig) {
3156	// Copy the low bits from the result to bits starting from LSB.
3157	Mask = ResultUsefulBits & OpUsefulBits;
3158	Mask <<= LSB;
3159	}
3160
3161	if (Op.getOperand(i: `0`) == Orig)
3162	// Bits starting from LSB in the input contribute to the result.
3163	Mask \|= (ResultUsefulBits & ~OpUsefulBits);
3164	} else {
3165	// The instruction is a BFI.
3166	uint64_t Width = MSB + `1`;
3167	uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3168
3169	OpUsefulBits <<= Width;
3170	--OpUsefulBits;
3171	OpUsefulBits <<= LSB;
3172
3173	if (Op.getOperand(i: `1`) == Orig) {
3174	// Copy the bits from the result to the zero bits.
3175	Mask = ResultUsefulBits & OpUsefulBits;
3176	Mask.lshrInPlace(ShiftAmt: LSB);
3177	}
3178
3179	if (Op.getOperand(i: `0`) == Orig)
3180	Mask \|= (ResultUsefulBits & ~OpUsefulBits);
3181	}
3182
3183	UsefulBits &= Mask;
3184	}
3185
3186	static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3187	SDValue Orig, unsigned Depth) {
3188
3189	// Users of this node should have already been instruction selected
3190	// FIXME: Can we turn that into an assert?
3191	if (!UserNode->isMachineOpcode())
3192	return;
3193
3194	switch (UserNode->getMachineOpcode()) {
3195	default:
3196	return;
3197	case AArch64::ANDSWri:
3198	case AArch64::ANDSXri:
3199	case AArch64::ANDWri:
3200	case AArch64::ANDXri:
3201	// We increment Depth only when we call the getUsefulBits
3202	return getUsefulBitsFromAndWithImmediate(Op: SDValue (UserNode, `0`), UsefulBits,
3203	Depth);
3204	case AArch64::UBFMWri:
3205	case AArch64::UBFMXri:
3206	return getUsefulBitsFromUBFM(Op: SDValue (UserNode, `0`), UsefulBits, Depth);
3207
3208	case AArch64::ORRWrs:
3209	case AArch64::ORRXrs:
3210	if (UserNode->getOperand(Num: `0`) != Orig && UserNode->getOperand(Num: `1`) == Orig)
3211	getUsefulBitsFromOrWithShiftedReg(Op: SDValue (UserNode, `0`), UsefulBits,
3212	Depth);
3213	return;
3214	case AArch64::BFMWri:
3215	case AArch64::BFMXri:
3216	return getUsefulBitsFromBFM(Op: SDValue (UserNode, `0`), Orig, UsefulBits, Depth);
3217
3218	case AArch64::STRBBui:
3219	case AArch64::STURBBi:
3220	if (UserNode->getOperand(Num: `0`) != Orig)
3221	return;
3222	UsefulBits &= APInt (UsefulBits.getBitWidth(), `0xff`);
3223	return;
3224
3225	case AArch64::STRHHui:
3226	case AArch64::STURHHi:
3227	if (UserNode->getOperand(Num: `0`) != Orig)
3228	return;
3229	UsefulBits &= APInt (UsefulBits.getBitWidth(), `0xffff`);
3230	return;
3231	}
3232	}
3233
3234	static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3235	if (Depth >= SelectionDAG::MaxRecursionDepth)
3236	return;
3237	// Initialize UsefulBits
3238	if (!Depth) {
3239	unsigned Bitwidth = Op.getScalarValueSizeInBits();
3240	// At the beginning, assume every produced bits is useful
3241	UsefulBits = APInt (Bitwidth, `0`);
3242	UsefulBits.flipAllBits();
3243	}
3244	APInt UsersUsefulBits(UsefulBits.getBitWidth(), `0`);
3245
3246	for (SDNode *Node : Op.getNode()->users()) {
3247	// A use cannot produce useful bits
3248	APInt UsefulBitsForUse = APInt (UsefulBits);
3249	getUsefulBitsForUse(UserNode: Node, UsefulBits&: UsefulBitsForUse, Orig: Op, Depth);
3250	UsersUsefulBits \|= UsefulBitsForUse;
3251	}
3252	// UsefulBits contains the produced bits that are meaningful for the
3253	// current definition, thus a user cannot make a bit meaningful at
3254	// this point
3255	UsefulBits &= UsersUsefulBits;
3256	}
3257
3258	/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3259	/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3260	/// 0, return Op unchanged.
3261	static SDValue getLeftShift(SelectionDAG CurDAG, SDValue Op, int* ShlAmount) {
3262	if (ShlAmount == `0`)
3263	return Op;
3264
3265	EVT VT = Op.getValueType();
3266	SDLoc dl(Op);
3267	unsigned BitWidth = VT.getSizeInBits();
3268	unsigned UBFMOpc = BitWidth == `32` ? AArch64::UBFMWri : AArch64::UBFMXri;
3269
3270	SDNode *ShiftNode;
3271	if (ShlAmount > `0`) {
3272	// LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3273	ShiftNode = CurDAG->getMachineNode(
3274	Opcode: UBFMOpc, dl, VT, Op1: Op,
3275	Op2: CurDAG->getTargetConstant(Val: BitWidth - ShlAmount, DL: dl, VT),
3276	Op3: CurDAG->getTargetConstant(Val: BitWidth - `1` - ShlAmount, DL: dl, VT));
3277	} else {
3278	// LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3279	assert(ShlAmount < `0` && "expected right shift");
3280	int ShrAmount = -ShlAmount;
3281	ShiftNode = CurDAG->getMachineNode(
3282	Opcode: UBFMOpc, dl, VT, Op1: Op, Op2: CurDAG->getTargetConstant(Val: ShrAmount, DL: dl, VT),
3283	Op3: CurDAG->getTargetConstant(Val: BitWidth - `1`, DL: dl, VT));
3284	}
3285
3286	return SDValue (ShiftNode, `0`);
3287	}
3288
3289	// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3290	static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3291	bool BiggerPattern,
3292	const uint64_t NonZeroBits,
3293	SDValue &Src, int &DstLSB,
3294	int &Width);
3295
3296	// For bit-field-positioning pattern "shl VAL, N)".
3297	static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3298	bool BiggerPattern,
3299	const uint64_t NonZeroBits,
3300	SDValue &Src, int &DstLSB,
3301	int &Width);
3302
3303	/// Does this tree qualify as an attempt to move a bitfield into position,
3304	/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3305	static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
3306	bool BiggerPattern, SDValue &Src,
3307	int &DstLSB, int &Width) {
3308	EVT VT = Op.getValueType();
3309	unsigned BitWidth = VT.getSizeInBits();
3310	(void)BitWidth;
3311	assert(BitWidth == `32` \|\| BitWidth == `64`);
3312
3313	KnownBits Known = CurDAG->computeKnownBits(Op);
3314
3315	// Non-zero in the sense that they're not provably zero, which is the key
3316	// point if we want to use this value
3317	const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3318	if (!isShiftedMask_64(Value: NonZeroBits))
3319	return false;
3320
3321	switch (Op.getOpcode()) {
3322	default:
3323	break;
3324	case ISD::AND:
3325	return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3326	NonZeroBits, Src, DstLSB, Width);
3327	case ISD::SHL:
3328	return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3329	NonZeroBits, Src, DstLSB, Width);
3330	}
3331
3332	return false;
3333	}
3334
3335	static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3336	bool BiggerPattern,
3337	const uint64_t NonZeroBits,
3338	SDValue &Src, int &DstLSB,
3339	int &Width) {
3340	assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3341
3342	EVT VT = Op.getValueType();
3343	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
3344	"Caller guarantees VT is one of i32 or i64");
3345	(void)VT;
3346
3347	uint64_t AndImm;
3348	if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::AND, Imm&: AndImm))
3349	return false;
3350
3351	// If (~AndImm & NonZeroBits) is not zero at POS, we know that
3352	// 1) (AndImm & (1 << POS) == 0)
3353	// 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3354	//
3355	// 1) and 2) don't agree so something must be wrong (e.g., in
3356	// 'SelectionDAG::computeKnownBits')
3357	assert((~AndImm & NonZeroBits) == `0` &&
3358	"Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3359
3360	SDValue AndOp0 = Op.getOperand(i: `0`);
3361
3362	uint64_t ShlImm;
3363	SDValue ShlOp0;
3364	if (isOpcWithIntImmediate(N: AndOp0.getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
3365	// For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3366	ShlOp0 = AndOp0.getOperand(i: `0`);
3367	} else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3368	isOpcWithIntImmediate(N: AndOp0.getOperand(i: `0`).getNode(), Opc: ISD::SHL,
3369	Imm&: ShlImm)) {
3370	// For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3371
3372	// ShlVal == shl(val, N), which is a left shift on a smaller type.
3373	SDValue ShlVal = AndOp0.getOperand(i: `0`);
3374
3375	// Since this is after type legalization and ShlVal is extended to MVT::i64,
3376	// expect VT to be MVT::i32.
3377	assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3378
3379	// Widens 'val' to MVT::i64 as the source of bit field positioning.
3380	ShlOp0 = Widen(CurDAG, N: ShlVal.getOperand(i: `0`));
3381	} else
3382	return false;
3383
3384	// For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3385	// then we'll end up generating AndOp0+UBFIZ instead of just keeping
3386	// AndOp0+AND.
3387	if (!BiggerPattern && !AndOp0.hasOneUse())
3388	return false;
3389
3390	DstLSB = llvm::countr_zero(Val: NonZeroBits);
3391	Width = llvm::countr_one(Value: NonZeroBits >> DstLSB);
3392
3393	// Bail out on large Width. This happens when no proper combining / constant
3394	// folding was performed.
3395	if (Width >= (int)VT.getSizeInBits()) {
3396	// If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3397	// Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3398	// "val".
3399	// If VT is i32, what Width >= 32 means:
3400	// - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3401	// demands at least 'Width' bits (after dag-combiner). This together with
3402	// `any_extend` Op (undefined higher bits) indicates missed combination
3403	// when lowering the 'and' IR instruction to an machine IR instruction.
3404	LLVM_DEBUG(
3405	dbgs()
3406	<< "Found large Width in bit-field-positioning -- this indicates no "
3407	"proper combining / constant folding was performed\n");
3408	return false;
3409	}
3410
3411	// BFI encompasses sufficiently many nodes that it's worth inserting an extra
3412	// LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3413	// amount. BiggerPattern is true when this pattern is being matched for BFI,
3414	// BiggerPattern is false when this pattern is being matched for UBFIZ, in
3415	// which case it is not profitable to insert an extra shift.
3416	if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3417	return false;
3418
3419	Src = getLeftShift(CurDAG, Op: ShlOp0, ShlAmount: ShlImm - DstLSB);
3420	return true;
3421	}
3422
3423	// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3424	// UBFIZ.
3425	static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op,
3426	SDValue &Src, int &DstLSB,
3427	int &Width) {
3428	// Caller should have verified that N is a left shift with constant shift
3429	// amount; asserts that.
3430	assert(Op.getOpcode() == ISD::SHL &&
3431	"Op.getNode() should be a SHL node to call this function");
3432	assert(isIntImmediateEq(Op.getOperand(`1`), ShlImm) &&
3433	"Op.getNode() should shift ShlImm to call this function");
3434
3435	uint64_t AndImm = `0`;
3436	SDValue Op0 = Op.getOperand(i: `0`);
3437	if (!isOpcWithIntImmediate(N: Op0.getNode(), Opc: ISD::AND, Imm&: AndImm))
3438	return false;
3439
3440	const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3441	if (isMask_64(Value: ShiftedAndImm)) {
3442	// AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3443	// should end with Mask, and could be prefixed with random bits if those
3444	// bits are shifted out.
3445	//
3446	// For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3447	// the AND result corresponding to those bits are shifted out, so it's fine
3448	// to not extract them.
3449	Width = llvm::countr_one(Value: ShiftedAndImm);
3450	DstLSB = ShlImm;
3451	Src = Op0.getOperand(i: `0`);
3452	return true;
3453	}
3454	return false;
3455	}
3456
3457	static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3458	bool BiggerPattern,
3459	const uint64_t NonZeroBits,
3460	SDValue &Src, int &DstLSB,
3461	int &Width) {
3462	assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3463
3464	EVT VT = Op.getValueType();
3465	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
3466	"Caller guarantees that type is i32 or i64");
3467	(void)VT;
3468
3469	uint64_t ShlImm;
3470	if (!isOpcWithIntImmediate(N: Op.getNode(), Opc: ISD::SHL, Imm&: ShlImm))
3471	return false;
3472
3473	if (!BiggerPattern && !Op.hasOneUse())
3474	return false;
3475
3476	if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3477	return true;
3478
3479	DstLSB = llvm::countr_zero(Val: NonZeroBits);
3480	Width = llvm::countr_one(Value: NonZeroBits >> DstLSB);
3481
3482	if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3483	return false;
3484
3485	Src = getLeftShift(CurDAG, Op: Op.getOperand(i: `0`), ShlAmount: ShlImm - DstLSB);
3486	return true;
3487	}
3488
3489	static bool isShiftedMask(uint64_t Mask, EVT VT) {
3490	assert(VT == MVT::i32 \|\| VT == MVT::i64);
3491	if (VT == MVT::i32)
3492	return isShiftedMask_32(Value: Mask);
3493	return isShiftedMask_64(Value: Mask);
3494	}
3495
3496	// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3497	// inserted only sets known zero bits.
3498	static bool tryBitfieldInsertOpFromOrAndImm(SDNode N, SelectionDAG CurDAG) {
3499	assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3500
3501	EVT VT = N->getValueType(ResNo: `0`);
3502	if (VT != MVT::i32 && VT != MVT::i64)
3503	return false;
3504
3505	unsigned BitWidth = VT.getSizeInBits();
3506
3507	uint64_t OrImm;
3508	if (!isOpcWithIntImmediate(N, Opc: ISD::OR, Imm&: OrImm))
3509	return false;
3510
3511	// Skip this transformation if the ORR immediate can be encoded in the ORR.
3512	// Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3513	// performance neutral.
3514	if (AArch64_AM::isLogicalImmediate(imm: OrImm, regSize: BitWidth))
3515	return false;
3516
3517	uint64_t MaskImm;
3518	SDValue And = N->getOperand(Num: `0`);
3519	// Must be a single use AND with an immediate operand.
3520	if (!And.hasOneUse() \|\|
3521	!isOpcWithIntImmediate(N: And.getNode(), Opc: ISD::AND, Imm&: MaskImm))
3522	return false;
3523
3524	// Compute the Known Zero for the AND as this allows us to catch more general
3525	// cases than just looking for AND with imm.
3526	KnownBits Known = CurDAG->computeKnownBits(Op: And);
3527
3528	// Non-zero in the sense that they're not provably zero, which is the key
3529	// point if we want to use this value.
3530	uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3531
3532	// The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3533	if (!isShiftedMask(Mask: Known.Zero.getZExtValue(), VT))
3534	return false;
3535
3536	// The bits being inserted must only set those bits that are known to be zero.
3537	if ((OrImm & NotKnownZero) != `0`) {
3538	// FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3539	// currently handle this case.
3540	return false;
3541	}
3542
3543	// BFI/BFXIL dst, src, #lsb, #width.
3544	int LSB = llvm::countr_one(Value: NotKnownZero);
3545	int Width = BitWidth - APInt (BitWidth, NotKnownZero).popcount();
3546
3547	// BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3548	unsigned ImmR = (BitWidth - LSB) % BitWidth;
3549	unsigned ImmS = Width - `1`;
3550
3551	// If we're creating a BFI instruction avoid cases where we need more
3552	// instructions to materialize the BFI constant as compared to the original
3553	// ORR. A BFXIL will use the same constant as the original ORR, so the code
3554	// should be no worse in this case.
3555	bool IsBFI = LSB != `0`;
3556	uint64_t BFIImm = OrImm >> LSB;
3557	if (IsBFI && !AArch64_AM::isLogicalImmediate(imm: BFIImm, regSize: BitWidth)) {
3558	// We have a BFI instruction and we know the constant can't be materialized
3559	// with a ORR-immediate with the zero register.
3560	unsigned OrChunks = `0`, BFIChunks = `0`;
3561	for (unsigned Shift = `0`; Shift < BitWidth; Shift += `16`) {
3562	if (((OrImm >> Shift) & `0xFFFF`) != `0`)
3563	++OrChunks;
3564	if (((BFIImm >> Shift) & `0xFFFF`) != `0`)
3565	++BFIChunks;
3566	}
3567	if (BFIChunks > OrChunks)
3568	return false;
3569	}
3570
3571	// Materialize the constant to be inserted.
3572	SDLoc DL(N);
3573	unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3574	SDNode *MOVI = CurDAG->getMachineNode(
3575	Opcode: MOVIOpc, dl: DL, VT, Op1: CurDAG->getTargetConstant(Val: BFIImm, DL, VT));
3576
3577	// Create the BFI/BFXIL instruction.
3578	SDValue Ops[] = {And.getOperand(i: `0`), SDValue (MOVI, `0`),
3579	CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3580	CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3581	unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3582	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3583	return true;
3584	}
3585
3586	static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG,
3587	SDValue &ShiftedOperand,
3588	uint64_t &EncodedShiftImm) {
3589	// Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3590	if (!Dst.hasOneUse())
3591	return false;
3592
3593	EVT VT = Dst.getValueType();
3594	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
3595	"Caller should guarantee that VT is one of i32 or i64");
3596	const unsigned SizeInBits = VT.getSizeInBits();
3597
3598	SDLoc DL(Dst.getNode());
3599	uint64_t AndImm, ShlImm;
3600	if (isOpcWithIntImmediate(N: Dst.getNode(), Opc: ISD::AND, Imm&: AndImm) &&
3601	isShiftedMask_64(Value: AndImm)) {
3602	// Avoid transforming 'DstOp0' if it has other uses than the AND node.
3603	SDValue DstOp0 = Dst.getOperand(i: `0`);
3604	if (!DstOp0.hasOneUse())
3605	return false;
3606
3607	// An example to illustrate the transformation
3608	// From:
3609	// lsr x8, x1, #1
3610	// and x8, x8, #0x3f80
3611	// bfxil x8, x1, #0, #7
3612	// To:
3613	// and x8, x23, #0x7f
3614	// ubfx x9, x23, #8, #7
3615	// orr x23, x8, x9, lsl #7
3616	//
3617	// The number of instructions remains the same, but ORR is faster than BFXIL
3618	// on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3619	// the dependency chain is improved after the transformation.
3620	uint64_t SrlImm;
3621	if (isOpcWithIntImmediate(N: DstOp0.getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
3622	uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(Val: AndImm);
3623	if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3624	unsigned MaskWidth =
3625	llvm::countr_one(Value: AndImm >> NumTrailingZeroInShiftedMask);
3626	unsigned UBFMOpc =
3627	(VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3628	SDNode *UBFMNode = CurDAG->getMachineNode(
3629	Opcode: UBFMOpc, dl: DL, VT, Op1: DstOp0.getOperand(i: `0`),
3630	Op2: CurDAG->getTargetConstant(Val: SrlImm + NumTrailingZeroInShiftedMask, DL,
3631	VT),
3632	Op3: CurDAG->getTargetConstant(
3633	Val: SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - `1`, DL, VT));
3634	ShiftedOperand = SDValue (UBFMNode, `0`);
3635	EncodedShiftImm = AArch64_AM::getShifterImm(
3636	ST: AArch64_AM::LSL, Imm: NumTrailingZeroInShiftedMask);
3637	return true;
3638	}
3639	}
3640	return false;
3641	}
3642
3643	if (isOpcWithIntImmediate(N: Dst.getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
3644	ShiftedOperand = Dst.getOperand(i: `0`);
3645	EncodedShiftImm = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShlImm);
3646	return true;
3647	}
3648
3649	uint64_t SrlImm;
3650	if (isOpcWithIntImmediate(N: Dst.getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
3651	ShiftedOperand = Dst.getOperand(i: `0`);
3652	EncodedShiftImm = AArch64_AM::getShifterImm(ST: AArch64_AM::LSR, Imm: SrlImm);
3653	return true;
3654	}
3655	return false;
3656	}
3657
3658	// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3659	// the operands and select it to AArch64::ORR with shifted registers if
3660	// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3661	static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3662	SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3663	const bool BiggerPattern) {
3664	EVT VT = N->getValueType(ResNo: `0`);
3665	assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3666	assert(((N->getOperand(`0`) == OrOpd0 && N->getOperand(`1`) == OrOpd1) \|\|
3667	(N->getOperand(`1`) == OrOpd0 && N->getOperand(`0`) == OrOpd1)) &&
3668	"Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3669	assert((VT == MVT::i32 \|\| VT == MVT::i64) &&
3670	"Expect result type to be i32 or i64 since N is combinable to BFM");
3671	SDLoc DL(N);
3672
3673	// Bail out if BFM simplifies away one node in BFM Dst.
3674	if (OrOpd1 != Dst)
3675	return false;
3676
3677	const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3678	// For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3679	// nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3680	if (BiggerPattern) {
3681	uint64_t SrcAndImm;
3682	if (isOpcWithIntImmediate(N: OrOpd0.getNode(), Opc: ISD::AND, Imm&: SrcAndImm) &&
3683	isMask_64(Value: SrcAndImm) && OrOpd0.getOperand(i: `0`) == Src) {
3684	// OrOpd0 = AND Src, #Mask
3685	// So BFM simplifies away one AND node from Src and doesn't simplify away
3686	// nodes from Dst. If ORR with left-shifted operand also simplifies away
3687	// one node (from Rd), ORR is better since it has higher throughput and
3688	// smaller latency than BFM on many AArch64 processors (and for the rest
3689	// ORR is at least as good as BFM).
3690	SDValue ShiftedOperand;
3691	uint64_t EncodedShiftImm;
3692	if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3693	EncodedShiftImm)) {
3694	SDValue Ops[] = {OrOpd0, ShiftedOperand,
3695	CurDAG->getTargetConstant(Val: EncodedShiftImm, DL, VT)};
3696	CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3697	return true;
3698	}
3699	}
3700	return false;
3701	}
3702
3703	assert((!BiggerPattern) && "BiggerPattern should be handled above");
3704
3705	uint64_t ShlImm;
3706	if (isOpcWithIntImmediate(N: OrOpd0.getNode(), Opc: ISD::SHL, Imm&: ShlImm)) {
3707	if (OrOpd0.getOperand(i: `0`) == Src && OrOpd0.hasOneUse()) {
3708	SDValue Ops[] = {
3709	Dst, Src,
3710	CurDAG->getTargetConstant(
3711	Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShlImm), DL, VT)};
3712	CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3713	return true;
3714	}
3715
3716	// Select the following pattern to left-shifted operand rather than BFI.
3717	// %val1 = op ..
3718	// %val2 = shl %val1, #imm
3719	// %res = or %val1, %val2
3720	//
3721	// If N is selected to be BFI, we know that
3722	// 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3723	// BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3724	//
3725	// Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3726	if (OrOpd0.getOperand(i: `0`) == OrOpd1) {
3727	SDValue Ops[] = {
3728	OrOpd1, OrOpd1,
3729	CurDAG->getTargetConstant(
3730	Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShlImm), DL, VT)};
3731	CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3732	return true;
3733	}
3734	}
3735
3736	uint64_t SrlImm;
3737	if (isOpcWithIntImmediate(N: OrOpd0.getNode(), Opc: ISD::SRL, Imm&: SrlImm)) {
3738	// Select the following pattern to right-shifted operand rather than BFXIL.
3739	// %val1 = op ..
3740	// %val2 = lshr %val1, #imm
3741	// %res = or %val1, %val2
3742	//
3743	// If N is selected to be BFXIL, we know that
3744	// 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3745	// BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3746	//
3747	// Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3748	if (OrOpd0.getOperand(i: `0`) == OrOpd1) {
3749	SDValue Ops[] = {
3750	OrOpd1, OrOpd1,
3751	CurDAG->getTargetConstant(
3752	Val: AArch64_AM::getShifterImm(ST: AArch64_AM::LSR, Imm: SrlImm), DL, VT)};
3753	CurDAG->SelectNodeTo(N, MachineOpc: OrrOpc, VT, Ops);
3754	return true;
3755	}
3756	}
3757
3758	return false;
3759	}
3760
3761	static bool tryBitfieldInsertOpFromOr(SDNode N, const* APInt &UsefulBits,
3762	SelectionDAG *CurDAG) {
3763	assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3764
3765	EVT VT = N->getValueType(ResNo: `0`);
3766	if (VT != MVT::i32 && VT != MVT::i64)
3767	return false;
3768
3769	unsigned BitWidth = VT.getSizeInBits();
3770
3771	// Because of simplify-demanded-bits in DAGCombine, involved masks may not
3772	// have the expected shape. Try to undo that.
3773
3774	unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3775	unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3776
3777	// Given a OR operation, check if we have the following pattern
3778	// ubfm c, b, imm, imm2 (or something that does the same jobs, see
3779	// isBitfieldExtractOp)
3780	// d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3781	// countTrailingZeros(mask2) == imm2 - imm + 1
3782	// f = d \| c
3783	// if yes, replace the OR instruction with:
3784	// f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3785
3786	// OR is commutative, check all combinations of operand order and values of
3787	// BiggerPattern, i.e.
3788	// Opd0, Opd1, BiggerPattern=false
3789	// Opd1, Opd0, BiggerPattern=false
3790	// Opd0, Opd1, BiggerPattern=true
3791	// Opd1, Opd0, BiggerPattern=true
3792	// Several of these combinations may match, so check with BiggerPattern=false
3793	// first since that will produce better results by matching more instructions
3794	// and/or inserting fewer extra instructions.
3795	for (int I = `0`; I < `4`; ++I) {
3796
3797	SDValue Dst, Src;
3798	unsigned ImmR, ImmS;
3799	bool BiggerPattern = I / `2`;
3800	SDValue OrOpd0Val = N->getOperand(Num: I % `2`);
3801	SDNode *OrOpd0 = OrOpd0Val.getNode();
3802	SDValue OrOpd1Val = N->getOperand(Num: (I + `1`) % `2`);
3803	SDNode *OrOpd1 = OrOpd1Val.getNode();
3804
3805	unsigned BFXOpc;
3806	int DstLSB, Width;
3807	if (isBitfieldExtractOp(CurDAG, N: OrOpd0, Opc&: BFXOpc, Opd0&: Src, Immr&: ImmR, Imms&: ImmS,
3808	NumberOfIgnoredLowBits, BiggerPattern)) {
3809	// Check that the returned opcode is compatible with the pattern,
3810	// i.e., same type and zero extended (U and not S)
3811	if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) \|\|
3812	(BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3813	continue;
3814
3815	// Compute the width of the bitfield insertion
3816	DstLSB = `0`;
3817	Width = ImmS - ImmR + `1`;
3818	// FIXME: This constraint is to catch bitfield insertion we may
3819	// want to widen the pattern if we want to grab general bitfield
3820	// move case
3821	if (Width <= `0`)
3822	continue;
3823
3824	// If the mask on the insertee is correct, we have a BFXIL operation. We
3825	// can share the ImmR and ImmS values from the already-computed UBFM.
3826	} else if (isBitfieldPositioningOp(CurDAG, Op: OrOpd0Val,
3827	BiggerPattern,
3828	Src, DstLSB, Width)) {
3829	ImmR = (BitWidth - DstLSB) % BitWidth;
3830	ImmS = Width - `1`;
3831	} else
3832	continue;
3833
3834	// Check the second part of the pattern
3835	EVT VT = OrOpd1Val.getValueType();
3836	assert((VT == MVT::i32 \|\| VT == MVT::i64) && "unexpected OR operand");
3837
3838	// Compute the Known Zero for the candidate of the first operand.
3839	// This allows to catch more general case than just looking for
3840	// AND with imm. Indeed, simplify-demanded-bits may have removed
3841	// the AND instruction because it proves it was useless.
3842	KnownBits Known = CurDAG->computeKnownBits(Op: OrOpd1Val);
3843
3844	// Check if there is enough room for the second operand to appear
3845	// in the first one
3846	APInt BitsToBeInserted =
3847	APInt::getBitsSet(numBits: Known.getBitWidth(), loBit: DstLSB, hiBit: DstLSB + Width);
3848
3849	if ((BitsToBeInserted & ~Known.Zero) != `0`)
3850	continue;
3851
3852	// Set the first operand
3853	uint64_t Imm;
3854	if (isOpcWithIntImmediate(N: OrOpd1, Opc: ISD::AND, Imm) &&
3855	isBitfieldDstMask(DstMask: Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3856	// In that case, we can eliminate the AND
3857	Dst = OrOpd1->getOperand(Num: `0`);
3858	else
3859	// Maybe the AND has been removed by simplify-demanded-bits
3860	// or is useful because it discards more bits
3861	Dst = OrOpd1Val;
3862
3863	// Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3864	// with shifted operand is more efficient.
3865	if (tryOrrWithShift(N, OrOpd0: OrOpd0Val, OrOpd1: OrOpd1Val, Src, Dst, CurDAG,
3866	BiggerPattern))
3867	return true;
3868
3869	// both parts match
3870	SDLoc DL(N);
3871	SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3872	CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3873	unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3874	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3875	return true;
3876	}
3877
3878	// Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3879	// Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3880	// mask (e.g., 0x000ffff0).
3881	uint64_t Mask0Imm, Mask1Imm;
3882	SDValue And0 = N->getOperand(Num: `0`);
3883	SDValue And1 = N->getOperand(Num: `1`);
3884	if (And0.hasOneUse() && And1.hasOneUse() &&
3885	isOpcWithIntImmediate(N: And0.getNode(), Opc: ISD::AND, Imm&: Mask0Imm) &&
3886	isOpcWithIntImmediate(N: And1.getNode(), Opc: ISD::AND, Imm&: Mask1Imm) &&
3887	APInt (BitWidth, Mask0Imm) == ~APInt (BitWidth, Mask1Imm) &&
3888	(isShiftedMask(Mask: Mask0Imm, VT) \|\| isShiftedMask(Mask: Mask1Imm, VT))) {
3889
3890	// ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3891	// (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3892	// bits to be inserted.
3893	if (isShiftedMask(Mask: Mask0Imm, VT)) {
3894	std::swap(a&: And0, b&: And1);
3895	std::swap(a&: Mask0Imm, b&: Mask1Imm);
3896	}
3897
3898	SDValue Src = And1 ->getOperand(Num: `0`);
3899	SDValue Dst = And0 ->getOperand(Num: `0`);
3900	unsigned LSB = llvm::countr_zero(Val: Mask1Imm);
3901	int Width = BitWidth - APInt (BitWidth, Mask0Imm).popcount();
3902
3903	// The BFXIL inserts the low-order bits from a source register, so right
3904	// shift the needed bits into place.
3905	SDLoc DL(N);
3906	unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3907	uint64_t LsrImm = LSB;
3908	if (Src ->hasOneUse() &&
3909	isOpcWithIntImmediate(N: Src.getNode(), Opc: ISD::SRL, Imm&: LsrImm) &&
3910	(LsrImm + LSB) < BitWidth) {
3911	Src = Src ->getOperand(Num: `0`);
3912	LsrImm += LSB;
3913	}
3914
3915	SDNode *LSR = CurDAG->getMachineNode(
3916	Opcode: ShiftOpc, dl: DL, VT, Op1: Src, Op2: CurDAG->getTargetConstant(Val: LsrImm, DL, VT),
3917	Op3: CurDAG->getTargetConstant(Val: BitWidth - `1`, DL, VT));
3918
3919	// BFXIL is an alias of BFM, so translate to BFM operands.
3920	unsigned ImmR = (BitWidth - LSB) % BitWidth;
3921	unsigned ImmS = Width - `1`;
3922
3923	// Create the BFXIL instruction.
3924	SDValue Ops[] = {Dst, SDValue (LSR, `0`),
3925	CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3926	CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3927	unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3928	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3929	return true;
3930	}
3931
3932	return false;
3933	}
3934
3935	bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3936	if (N->getOpcode() != ISD::OR)
3937	return false;
3938
3939	APInt NUsefulBits;
3940	getUsefulBits(Op: SDValue (N, `0`), UsefulBits&: NUsefulBits);
3941
3942	// If all bits are not useful, just return UNDEF.
3943	if (!NUsefulBits) {
3944	CurDAG->SelectNodeTo(N, MachineOpc: TargetOpcode::IMPLICIT_DEF, VT: N->getValueType(ResNo: `0`));
3945	return true;
3946	}
3947
3948	if (tryBitfieldInsertOpFromOr(N, UsefulBits: NUsefulBits, CurDAG))
3949	return true;
3950
3951	return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3952	}
3953
3954	/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3955	/// equivalent of a left shift by a constant amount followed by an and masking
3956	/// out a contiguous set of bits.
3957	bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3958	if (N->getOpcode() != ISD::AND)
3959	return false;
3960
3961	EVT VT = N->getValueType(ResNo: `0`);
3962	if (VT != MVT::i32 && VT != MVT::i64)
3963	return false;
3964
3965	SDValue Op0;
3966	int DstLSB, Width;
3967	if (!isBitfieldPositioningOp(CurDAG, Op: SDValue (N, `0`), /BiggerPattern=/false,
3968	Src&: Op0, DstLSB, Width))
3969	return false;
3970
3971	// ImmR is the rotate right amount.
3972	unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3973	// ImmS is the most significant bit of the source to be moved.
3974	unsigned ImmS = Width - `1`;
3975
3976	SDLoc DL(N);
3977	SDValue Ops[] = {Op0, CurDAG->getTargetConstant(Val: ImmR, DL, VT),
3978	CurDAG->getTargetConstant(Val: ImmS, DL, VT)};
3979	unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3980	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
3981	return true;
3982	}
3983
3984	/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3985	/// variable shift/rotate instructions.
3986	bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3987	EVT VT = N->getValueType(ResNo: `0`);
3988
3989	unsigned Opc;
3990	switch (N->getOpcode()) {
3991	case ISD::ROTR:
3992	Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3993	break;
3994	case ISD::SHL:
3995	Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3996	break;
3997	case ISD::SRL:
3998	Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3999	break;
4000	case ISD::SRA:
4001	Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
4002	break;
4003	default:
4004	return false;
4005	}
4006
4007	uint64_t Size;
4008	uint64_t Bits;
4009	if (VT == MVT::i32) {
4010	Bits = `5`;
4011	Size = `32`;
4012	} else if (VT == MVT::i64) {
4013	Bits = `6`;
4014	Size = `64`;
4015	} else
4016	return false;
4017
4018	SDValue ShiftAmt = N->getOperand(Num: `1`);
4019	SDLoc DL(N);
4020	SDValue NewShiftAmt;
4021
4022	// Skip over an extend of the shift amount.
4023	if (ShiftAmt ->getOpcode() == ISD::ZERO_EXTEND \|\|
4024	ShiftAmt ->getOpcode() == ISD::ANY_EXTEND)
4025	ShiftAmt = ShiftAmt ->getOperand(Num: `0`);
4026
4027	if (ShiftAmt ->getOpcode() == ISD::ADD \|\| ShiftAmt ->getOpcode() == ISD::SUB) {
4028	SDValue Add0 = ShiftAmt ->getOperand(Num: `0`);
4029	SDValue Add1 = ShiftAmt ->getOperand(Num: `1`);
4030	uint64_t Add0Imm;
4031	uint64_t Add1Imm;
4032	if (isIntImmediate(N: Add1, Imm&: Add1Imm) && (Add1Imm % Size == `0`)) {
4033	// If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
4034	// to avoid the ADD/SUB.
4035	NewShiftAmt = Add0;
4036	} else if (ShiftAmt ->getOpcode() == ISD::SUB &&
4037	isIntImmediate(N: Add0, Imm&: Add0Imm) && Add0Imm != `0` &&
4038	(Add0Imm % Size == `0`)) {
4039	// If we are shifting by N-X where N == 0 mod Size, then just shift by -X
4040	// to generate a NEG instead of a SUB from a constant.
4041	unsigned NegOpc;
4042	unsigned ZeroReg;
4043	EVT SubVT = ShiftAmt ->getValueType(ResNo: `0`);
4044	if (SubVT == MVT::i32) {
4045	NegOpc = AArch64::SUBWrr;
4046	ZeroReg = AArch64::WZR;
4047	} else {
4048	assert(SubVT == MVT::i64);
4049	NegOpc = AArch64::SUBXrr;
4050	ZeroReg = AArch64::XZR;
4051	}
4052	SDValue Zero =
4053	CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl: DL, Reg: ZeroReg, VT: SubVT);
4054	MachineSDNode *Neg =
4055	CurDAG->getMachineNode(Opcode: NegOpc, dl: DL, VT: SubVT, Op1: Zero, Op2: Add1);
4056	NewShiftAmt = SDValue (Neg, `0`);
4057	} else if (ShiftAmt ->getOpcode() == ISD::SUB &&
4058	isIntImmediate(N: Add0, Imm&: Add0Imm) && (Add0Imm % Size == Size - `1`)) {
4059	// If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
4060	// to generate a NOT instead of a SUB from a constant.
4061	unsigned NotOpc;
4062	unsigned ZeroReg;
4063	EVT SubVT = ShiftAmt ->getValueType(ResNo: `0`);
4064	if (SubVT == MVT::i32) {
4065	NotOpc = AArch64::ORNWrr;
4066	ZeroReg = AArch64::WZR;
4067	} else {
4068	assert(SubVT == MVT::i64);
4069	NotOpc = AArch64::ORNXrr;
4070	ZeroReg = AArch64::XZR;
4071	}
4072	SDValue Zero =
4073	CurDAG->getCopyFromReg(Chain: CurDAG->getEntryNode(), dl: DL, Reg: ZeroReg, VT: SubVT);
4074	MachineSDNode *Not =
4075	CurDAG->getMachineNode(Opcode: NotOpc, dl: DL, VT: SubVT, Op1: Zero, Op2: Add1);
4076	NewShiftAmt = SDValue (Not, `0`);
4077	} else
4078	return false;
4079	} else {
4080	// If the shift amount is masked with an AND, check that the mask covers the
4081	// bits that are implicitly ANDed off by the above opcodes and if so, skip
4082	// the AND.
4083	uint64_t MaskImm;
4084	if (!isOpcWithIntImmediate(N: ShiftAmt.getNode(), Opc: ISD::AND, Imm&: MaskImm) &&
4085	!isOpcWithIntImmediate(N: ShiftAmt.getNode(), Opc: AArch64ISD::ANDS, Imm&: MaskImm))
4086	return false;
4087
4088	if ((unsigned)llvm::countr_one(Value: MaskImm) < Bits)
4089	return false;
4090
4091	NewShiftAmt = ShiftAmt ->getOperand(Num: `0`);
4092	}
4093
4094	// Narrow/widen the shift amount to match the size of the shift operation.
4095	if (VT == MVT::i32)
4096	NewShiftAmt = narrowIfNeeded(CurDAG, N: NewShiftAmt);
4097	else if (VT == MVT::i64 && NewShiftAmt ->getValueType(ResNo: `0`) == MVT::i32) {
4098	SDValue SubReg = CurDAG->getTargetConstant(Val: AArch64::sub_32, DL, VT: MVT::i32);
4099	MachineSDNode *Ext = CurDAG->getMachineNode(Opcode: AArch64::SUBREG_TO_REG, dl: DL, VT,
4100	Op1: NewShiftAmt, Op2: SubReg);
4101	NewShiftAmt = SDValue (Ext, `0`);
4102	}
4103
4104	SDValue Ops[] = {N->getOperand(Num: `0`), NewShiftAmt};
4105	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
4106	return true;
4107	}
4108
4109	static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N,
4110	SDValue &FixedPos,
4111	unsigned RegWidth,
4112	bool isReciprocal) {
4113	APFloat FVal(`0.0`);
4114	if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Val&: N))
4115	FVal = CN->getValueAPF();
4116	else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(Val&: N)) {
4117	// Some otherwise illegal constants are allowed in this case.
4118	if (LN->getOperand(Num: `1`).getOpcode() != AArch64ISD::ADDlow \|\|
4119	!isa<ConstantPoolSDNode>(Val: LN->getOperand(Num: `1`)->getOperand(Num: `1`)))
4120	return false;
4121
4122	ConstantPoolSDNode *CN =
4123	dyn_cast<ConstantPoolSDNode>(Val: LN->getOperand(Num: `1`)->getOperand(Num: `1`));
4124	FVal = cast<ConstantFP>(Val: CN->getConstVal())->getValueAPF();
4125	} else
4126	return false;
4127
4128	if (unsigned FBits =
4129	CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal)) {
4130	FixedPos = CurDAG->getTargetConstant(Val: FBits, DL: SDLoc (N), VT: MVT::i32);
4131	return true;
4132	}
4133
4134	return false;
4135	}
4136
4137	bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4138	unsigned RegWidth) {
4139	return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4140	/isReciprocal/ false);
4141	}
4142
4143	bool AArch64DAGToDAGISel::SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos,
4144	unsigned RegWidth) {
4145	if ((N.getOpcode() == AArch64ISD::NVCAST \|\| N.getOpcode() == ISD::BITCAST) &&
4146	N.getValueType().getScalarSizeInBits() ==
4147	N.getOperand(i: `0`).getValueType().getScalarSizeInBits())
4148	N = N.getOperand(i: `0`);
4149
4150	auto ImmToFloat = [RegWidth](APInt Imm) {
4151	switch (RegWidth) {
4152	case `16`:
4153	return APFloat (APFloat::IEEEhalf(), Imm);
4154	case `32`:
4155	return APFloat (APFloat::IEEEsingle(), Imm);
4156	case `64`:
4157	return APFloat (APFloat::IEEEdouble(), Imm);
4158	default:
4159	llvm_unreachable("Unexpected RegWidth!");
4160	};
4161	};
4162
4163	APFloat FVal(`0.0`);
4164	switch (N ->getOpcode()) {
4165	case AArch64ISD::MOVIshift:
4166	FVal = ImmToFloat (APInt (RegWidth, N.getConstantOperandVal(i: `0`)
4167	<< N.getConstantOperandVal(i: `1`)));
4168	break;
4169	case AArch64ISD::FMOV:
4170	FVal = ImmToFloat (DecodeFMOVImm(Imm: N.getConstantOperandVal(i: `0`), RegWidth));
4171	break;
4172	case AArch64ISD::DUP:
4173	if (isa<ConstantSDNode>(Val: N.getOperand(i: `0`)))
4174	FVal = ImmToFloat (N.getConstantOperandAPInt(i: `0`).trunc(width: RegWidth));
4175	else
4176	return false;
4177	break;
4178	default:
4179	return false;
4180	}
4181
4182	if (unsigned FBits = CheckFixedPointOperandConstant(FVal, RegWidth,
4183	/isReciprocal/ false)) {
4184	FixedPos = CurDAG->getTargetConstant(Val: FBits, DL: SDLoc (N), VT: MVT::i32);
4185	return true;
4186	}
4187
4188	return false;
4189	}
4190
4191	bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4192	SDValue &FixedPos,
4193	unsigned RegWidth) {
4194	return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4195	/isReciprocal/ true);
4196	}
4197
4198	// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4199	// of the string and obtains the integer values from them and combines these
4200	// into a single value to be used in the MRS/MSR instruction.
4201	static int getIntOperandFromRegisterString(StringRef RegString) {
4202	SmallVector<StringRef, `5`> Fields;
4203	RegString.split(A&: Fields, Separator: `':'`);
4204
4205	if (Fields.size() == `1`)
4206	return -`1`;
4207
4208	assert(Fields.size() == `5`
4209	&& "Invalid number of fields in read register string");
4210
4211	SmallVector<int, `5`> Ops;
4212	bool AllIntFields = true;
4213
4214	for (StringRef Field : Fields) {
4215	unsigned IntField;
4216	AllIntFields &= !Field.getAsInteger(Radix: `10`, Result&: IntField);
4217	Ops.push_back(Elt: IntField);
4218	}
4219
4220	assert(AllIntFields &&
4221	"Unexpected non-integer value in special register string.");
4222	(void)AllIntFields;
4223
4224	// Need to combine the integer fields of the string into a single value
4225	// based on the bit encoding of MRS/MSR instruction.
4226	return (Ops [`0`] << `14`) \| (Ops [`1`] << `11`) \| (Ops [`2`] << `7`) \|
4227	(Ops [`3`] << `3`) \| (Ops [`4`]);
4228	}
4229
4230	// Lower the read_register intrinsic to an MRS instruction node if the special
4231	// register string argument is either of the form detailed in the ALCE (the
4232	// form described in getIntOperandsFromRegisterString) or is a named register
4233	// known by the MRS SysReg mapper.
4234	bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4235	const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: `1`));
4236	const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: `0`));
4237	SDLoc DL(N);
4238
4239	bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4240
4241	unsigned Opcode64Bit = AArch64::MRS;
4242	int Imm = getIntOperandFromRegisterString(RegString: RegString->getString());
4243	if (Imm == -`1`) {
4244	// No match, Use the sysreg mapper to map the remaining possible strings to
4245	// the value for the register to be used for the instruction operand.
4246	const auto *TheReg =
4247	AArch64SysReg::lookupSysRegByName(Name: RegString->getString());
4248	if (TheReg && TheReg->Readable &&
4249	TheReg->haveFeatures(ActiveFeatures: Subtarget->getFeatureBits()))
4250	Imm = TheReg->Encoding;
4251	else
4252	Imm = AArch64SysReg::parseGenericRegister(Name: RegString->getString());
4253
4254	if (Imm == -`1`) {
4255	// Still no match, see if this is "pc" or give up.
4256	if (!ReadIs128Bit && RegString->getString() == "pc") {
4257	Opcode64Bit = AArch64::ADR;
4258	Imm = `0`;
4259	} else {
4260	return false;
4261	}
4262	}
4263	}
4264
4265	SDValue InChain = N->getOperand(Num: `0`);
4266	SDValue SysRegImm = CurDAG->getTargetConstant(Val: Imm, DL, VT: MVT::i32);
4267	if (!ReadIs128Bit) {
4268	CurDAG->SelectNodeTo(N, MachineOpc: Opcode64Bit, VT1: MVT::i64, VT2: MVT::Other / Chain /,
4269	Ops: {SysRegImm, InChain});
4270	} else {
4271	SDNode *MRRS = CurDAG->getMachineNode(
4272	Opcode: AArch64::MRRS, dl: DL,
4273	ResultTys: {MVT::Untyped / XSeqPair /, MVT::Other / Chain /},
4274	Ops: {SysRegImm, InChain});
4275
4276	// Sysregs are not endian. The even register always contains the low half
4277	// of the register.
4278	SDValue Lo = CurDAG->getTargetExtractSubreg(SRIdx: AArch64::sube64, DL, VT: MVT::i64,
4279	Operand: SDValue (MRRS, `0`));
4280	SDValue Hi = CurDAG->getTargetExtractSubreg(SRIdx: AArch64::subo64, DL, VT: MVT::i64,
4281	Operand: SDValue (MRRS, `0`));
4282	SDValue OutChain = SDValue (MRRS, `1`);
4283
4284	ReplaceUses(F: SDValue (N, `0`), T: Lo);
4285	ReplaceUses(F: SDValue (N, `1`), T: Hi);
4286	ReplaceUses(F: SDValue (N, `2`), T: OutChain);
4287	};
4288	return true;
4289	}
4290
4291	// Lower the write_register intrinsic to an MSR instruction node if the special
4292	// register string argument is either of the form detailed in the ALCE (the
4293	// form described in getIntOperandsFromRegisterString) or is a named register
4294	// known by the MSR SysReg mapper.
4295	bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4296	const auto *MD = cast<MDNodeSDNode>(Val: N->getOperand(Num: `1`));
4297	const auto *RegString = cast<MDString>(Val: MD->getMD()->getOperand(I: `0`));
4298	SDLoc DL(N);
4299
4300	bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4301
4302	if (!WriteIs128Bit) {
4303	// Check if the register was one of those allowed as the pstatefield value
4304	// in the MSR (immediate) instruction. To accept the values allowed in the
4305	// pstatefield for the MSR (immediate) instruction, we also require that an
4306	// immediate value has been provided as an argument, we know that this is
4307	// the case as it has been ensured by semantic checking.
4308	auto trySelectPState = [&](auto PMapper, unsigned State) {
4309	if (PMapper) {
4310	assert(isa<ConstantSDNode>(N->getOperand(`2`)) &&
4311	"Expected a constant integer expression.");
4312	unsigned Reg = PMapper->Encoding;
4313	uint64_t Immed = N->getConstantOperandVal(Num: `2`);
4314	CurDAG->SelectNodeTo(
4315	N, MachineOpc: State, VT: MVT::Other, Op1: CurDAG->getTargetConstant(Val: Reg, DL, VT: MVT::i32),
4316	Op2: CurDAG->getTargetConstant(Val: Immed, DL, VT: MVT::i16), Op3: N->getOperand(Num: `0`));
4317	return true;
4318	}
4319	return false;
4320	};
4321
4322	if (trySelectPState (
4323	AArch64PState::lookupPStateImm0_15ByName(Name: RegString->getString()),
4324	AArch64::MSRpstateImm4))
4325	return true;
4326	if (trySelectPState (
4327	AArch64PState::lookupPStateImm0_1ByName(Name: RegString->getString()),
4328	AArch64::MSRpstateImm1))
4329	return true;
4330	}
4331
4332	int Imm = getIntOperandFromRegisterString(RegString: RegString->getString());
4333	if (Imm == -`1`) {
4334	// Use the sysreg mapper to attempt to map the remaining possible strings
4335	// to the value for the register to be used for the MSR (register)
4336	// instruction operand.
4337	auto TheReg = AArch64SysReg::lookupSysRegByName(Name: RegString->getString());
4338	if (TheReg && TheReg->Writeable &&
4339	TheReg->haveFeatures(ActiveFeatures: Subtarget->getFeatureBits()))
4340	Imm = TheReg->Encoding;
4341	else
4342	Imm = AArch64SysReg::parseGenericRegister(Name: RegString->getString());
4343
4344	if (Imm == -`1`)
4345	return false;
4346	}
4347
4348	SDValue InChain = N->getOperand(Num: `0`);
4349	if (!WriteIs128Bit) {
4350	CurDAG->SelectNodeTo(N, MachineOpc: AArch64::MSR, VT: MVT::Other,
4351	Op1: CurDAG->getTargetConstant(Val: Imm, DL, VT: MVT::i32),
4352	Op2: N->getOperand(Num: `2`), Op3: InChain);
4353	} else {
4354	// No endian swap. The lower half always goes into the even subreg, and the
4355	// higher half always into the odd supreg.
4356	SDNode *Pair = CurDAG->getMachineNode(
4357	Opcode: TargetOpcode::REG_SEQUENCE, dl: DL, VT: MVT::Untyped / XSeqPair /,
4358	Ops: {CurDAG->getTargetConstant(Val: AArch64::XSeqPairsClassRegClass.getID(), DL,
4359	VT: MVT::i32),
4360	N->getOperand(Num: `2`),
4361	CurDAG->getTargetConstant(Val: AArch64::sube64, DL, VT: MVT::i32),
4362	N->getOperand(Num: `3`),
4363	CurDAG->getTargetConstant(Val: AArch64::subo64, DL, VT: MVT::i32)});
4364
4365	CurDAG->SelectNodeTo(N, MachineOpc: AArch64::MSRR, VT: MVT::Other,
4366	Op1: CurDAG->getTargetConstant(Val: Imm, DL, VT: MVT::i32),
4367	Op2: SDValue (Pair, `0`), Op3: InChain);
4368	}
4369
4370	return true;
4371	}
4372
4373	/// We've got special pseudo-instructions for these
4374	bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4375	unsigned Opcode;
4376	EVT MemTy = cast<MemSDNode>(Val: N)->getMemoryVT();
4377
4378	// Leave IR for LSE if subtarget supports it.
4379	if (Subtarget->hasLSE()) return false;
4380
4381	if (MemTy == MVT::i8)
4382	Opcode = AArch64::CMP_SWAP_8;
4383	else if (MemTy == MVT::i16)
4384	Opcode = AArch64::CMP_SWAP_16;
4385	else if (MemTy == MVT::i32)
4386	Opcode = AArch64::CMP_SWAP_32;
4387	else if (MemTy == MVT::i64)
4388	Opcode = AArch64::CMP_SWAP_64;
4389	else
4390	llvm_unreachable("Unknown AtomicCmpSwap type");
4391
4392	MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4393	SDValue Ops[] = {N->getOperand(Num: `1`), N->getOperand(Num: `2`), N->getOperand(Num: `3`),
4394	N->getOperand(Num: `0`)};
4395	SDNode *CmpSwap = CurDAG->getMachineNode(
4396	Opcode, dl: SDLoc (N),
4397	VTs: CurDAG->getVTList(VT1: RegTy, VT2: MVT::i32, VT3: MVT::Other), Ops);
4398
4399	MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
4400	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: CmpSwap), NewMemRefs: {MemOp});
4401
4402	ReplaceUses(F: SDValue (N, `0`), T: SDValue (CmpSwap, `0`));
4403	ReplaceUses(F: SDValue (N, `1`), T: SDValue (CmpSwap, `2`));
4404	CurDAG->RemoveDeadNode(N);
4405
4406	return true;
4407	}
4408
4409	bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4410	SDValue &Shift, bool Negate) {
4411	if (!isa<ConstantSDNode>(Val: N))
4412	return false;
4413
4414	APInt Val =
4415	cast<ConstantSDNode>(Val&: N)->getAPIntValue().trunc(width: VT.getFixedSizeInBits());
4416
4417	return SelectSVEAddSubImm(DL: SDLoc (N), Value: Val, VT, Imm, Shift, Negate);
4418	}
4419
4420	bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDLoc DL, APInt Val, MVT VT,
4421	SDValue &Imm, SDValue &Shift,
4422	bool Negate) {
4423	if (Negate)
4424	Val = -Val;
4425
4426	switch (VT.SimpleTy) {
4427	case MVT::i8:
4428	// All immediates are supported.
4429	Shift = CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i32);
4430	Imm = CurDAG->getTargetConstant(Val: Val.getZExtValue(), DL, VT: MVT::i32);
4431	return true;
4432	case MVT::i16:
4433	case MVT::i32:
4434	case MVT::i64:
4435	// Support 8bit unsigned immediates.
4436	if ((Val & ~`0xff`) == `0`) {
4437	Shift = CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i32);
4438	Imm = CurDAG->getTargetConstant(Val: Val.getZExtValue(), DL, VT: MVT::i32);
4439	return true;
4440	}
4441	// Support 16bit unsigned immediates that are a multiple of 256.
4442	if ((Val & ~`0xff00`) == `0`) {
4443	Shift = CurDAG->getTargetConstant(Val: `8`, DL, VT: MVT::i32);
4444	Imm = CurDAG->getTargetConstant(Val: Val.lshr(shiftAmt: `8`).getZExtValue(), DL, VT: MVT::i32);
4445	return true;
4446	}
4447	break;
4448	default:
4449	break;
4450	}
4451
4452	return false;
4453	}
4454
4455	bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4456	SDValue &Imm, SDValue &Shift,
4457	bool Negate) {
4458	if (!isa<ConstantSDNode>(Val: N))
4459	return false;
4460
4461	SDLoc DL(N);
4462	int64_t Val = cast<ConstantSDNode>(Val&: N)
4463	->getAPIntValue()
4464	.trunc(width: VT.getFixedSizeInBits())
4465	.getSExtValue();
4466
4467	if (Negate)
4468	Val = -Val;
4469
4470	// Signed saturating instructions treat their immediate operand as unsigned,
4471	// whereas the related intrinsics define their operands to be signed. This
4472	// means we can only use the immediate form when the operand is non-negative.
4473	if (Val < `0`)
4474	return false;
4475
4476	switch (VT.SimpleTy) {
4477	case MVT::i8:
4478	// All positive immediates are supported.
4479	Shift = CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i32);
4480	Imm = CurDAG->getTargetConstant(Val, DL, VT: MVT::i32);
4481	return true;
4482	case MVT::i16:
4483	case MVT::i32:
4484	case MVT::i64:
4485	// Support 8bit positive immediates.
4486	if (Val <= `255`) {
4487	Shift = CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i32);
4488	Imm = CurDAG->getTargetConstant(Val, DL, VT: MVT::i32);
4489	return true;
4490	}
4491	// Support 16bit positive immediates that are a multiple of 256.
4492	if (Val <= `65280` && Val % `256` == `0`) {
4493	Shift = CurDAG->getTargetConstant(Val: `8`, DL, VT: MVT::i32);
4494	Imm = CurDAG->getTargetConstant(Val: Val >> `8`, DL, VT: MVT::i32);
4495	return true;
4496	}
4497	break;
4498	default:
4499	break;
4500	}
4501
4502	return false;
4503	}
4504
4505	bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4506	SDValue &Shift) {
4507	if (!isa<ConstantSDNode>(Val: N))
4508	return false;
4509
4510	SDLoc DL(N);
4511	int64_t Val = cast<ConstantSDNode>(Val&: N)
4512	->getAPIntValue()
4513	.trunc(width: VT.getFixedSizeInBits())
4514	.getSExtValue();
4515	int32_t ImmVal, ShiftVal;
4516	if (!AArch64_AM::isSVECpyDupImm(SizeInBits: VT.getScalarSizeInBits(), Val, Imm&: ImmVal,
4517	Shift&: ShiftVal))
4518	return false;
4519
4520	Shift = CurDAG->getTargetConstant(Val: ShiftVal, DL, VT: MVT::i32);
4521	Imm = CurDAG->getTargetConstant(Val: ImmVal, DL, VT: MVT::i32);
4522	return true;
4523	}
4524
4525	bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4526	if (auto CNode = dyn_cast<ConstantSDNode>(Val&: N))
4527	return SelectSVESignedArithImm(DL: SDLoc (N), Value: CNode->getAPIntValue(), Imm);
4528	return false;
4529	}
4530
4531	bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDLoc DL, APInt Val,
4532	SDValue &Imm) {
4533	int64_t ImmVal = Val.getSExtValue();
4534	if (ImmVal >= -`128` && ImmVal < `128`) {
4535	Imm = CurDAG->getSignedTargetConstant(Val: ImmVal, DL, VT: MVT::i32);
4536	return true;
4537	}
4538	return false;
4539	}
4540
4541	bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4542	if (auto CNode = dyn_cast<ConstantSDNode>(Val&: N)) {
4543	uint64_t ImmVal = CNode->getZExtValue();
4544
4545	switch (VT.SimpleTy) {
4546	case MVT::i8:
4547	ImmVal &= `0xFF`;
4548	break;
4549	case MVT::i16:
4550	ImmVal &= `0xFFFF`;
4551	break;
4552	case MVT::i32:
4553	ImmVal &= `0xFFFFFFFF`;
4554	break;
4555	case MVT::i64:
4556	break;
4557	default:
4558	llvm_unreachable("Unexpected type");
4559	}
4560
4561	if (ImmVal < `256`) {
4562	Imm = CurDAG->getTargetConstant(Val: ImmVal, DL: SDLoc (N), VT: MVT::i32);
4563	return true;
4564	}
4565	}
4566	return false;
4567	}
4568
4569	bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4570	bool Invert) {
4571	uint64_t ImmVal;
4572	if (auto CI = dyn_cast<ConstantSDNode>(Val&: N))
4573	ImmVal = CI->getZExtValue();
4574	else if (auto CFP = dyn_cast<ConstantFPSDNode>(Val&: N))
4575	ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
4576	else
4577	return false;
4578
4579	if (Invert)
4580	ImmVal = ~ImmVal;
4581
4582	uint64_t encoding;
4583	if (!AArch64_AM::isSVELogicalImm(SizeInBits: VT.getScalarSizeInBits(), ImmVal, Encoding&: encoding))
4584	return false;
4585
4586	Imm = CurDAG->getTargetConstant(Val: encoding, DL: SDLoc (N), VT: MVT::i64);
4587	return true;
4588	}
4589
4590	// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4591	// Rather than attempt to normalise everything we can sometimes saturate the
4592	// shift amount during selection. This function also allows for consistent
4593	// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4594	// required by the instructions.
4595	bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4596	uint64_t High, bool AllowSaturation,
4597	SDValue &Imm) {
4598	if (auto *CN = dyn_cast<ConstantSDNode>(Val&: N)) {
4599	uint64_t ImmVal = CN->getZExtValue();
4600
4601	// Reject shift amounts that are too small.
4602	if (ImmVal < Low)
4603	return false;
4604
4605	// Reject or saturate shift amounts that are too big.
4606	if (ImmVal > High) {
4607	if (!AllowSaturation)
4608	return false;
4609	ImmVal = High;
4610	}
4611
4612	Imm = CurDAG->getTargetConstant(Val: ImmVal, DL: SDLoc (N), VT: MVT::i32);
4613	return true;
4614	}
4615
4616	return false;
4617	}
4618
4619	bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4620	// tagp(FrameIndex, IRGstack, tag_offset):
4621	// since the offset between FrameIndex and IRGstack is a compile-time
4622	// constant, this can be lowered to a single ADDG instruction.
4623	if (!(isa<FrameIndexSDNode>(Val: N->getOperand(Num: `1`)))) {
4624	return false;
4625	}
4626
4627	SDValue IRG_SP = N->getOperand(Num: `2`);
4628	if (IRG_SP ->getOpcode() != ISD::INTRINSIC_W_CHAIN \|\|
4629	IRG_SP ->getConstantOperandVal(Num: `1`) != Intrinsic::aarch64_irg_sp) {
4630	return false;
4631	}
4632
4633	const TargetLowering *TLI = getTargetLowering();
4634	SDLoc DL(N);
4635	int FI = cast<FrameIndexSDNode>(Val: N->getOperand(Num: `1`))->getIndex();
4636	SDValue FiOp = CurDAG->getTargetFrameIndex(
4637	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
4638	int TagOffset = N->getConstantOperandVal(Num: `3`);
4639
4640	SDNode *Out = CurDAG->getMachineNode(
4641	Opcode: AArch64::TAGPstack, dl: DL, VT: MVT::i64,
4642	Ops: {FiOp, CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i64), N->getOperand(Num: `2`),
4643	CurDAG->getTargetConstant(Val: TagOffset, DL, VT: MVT::i64)});
4644	ReplaceNode(F: N, T: Out);
4645	return true;
4646	}
4647
4648	void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4649	assert(isa<ConstantSDNode>(N->getOperand(`3`)) &&
4650	"llvm.aarch64.tagp third argument must be an immediate");
4651	if (trySelectStackSlotTagP(N))
4652	return;
4653	// FIXME: above applies in any case when offset between Op1 and Op2 is a
4654	// compile-time constant, not just for stack allocations.
4655
4656	// General case for unrelated pointers in Op1 and Op2.
4657	SDLoc DL(N);
4658	int TagOffset = N->getConstantOperandVal(Num: `3`);
4659	SDNode *N1 = CurDAG->getMachineNode(Opcode: AArch64::SUBP, dl: DL, VT: MVT::i64,
4660	Ops: {N->getOperand(Num: `1`), N->getOperand(Num: `2`)});
4661	SDNode *N2 = CurDAG->getMachineNode(Opcode: AArch64::ADDXrr, dl: DL, VT: MVT::i64,
4662	Ops: {SDValue (N1, `0`), N->getOperand(Num: `2`)});
4663	SDNode *N3 = CurDAG->getMachineNode(
4664	Opcode: AArch64::ADDG, dl: DL, VT: MVT::i64,
4665	Ops: {SDValue (N2, `0`), CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i64),
4666	CurDAG->getTargetConstant(Val: TagOffset, DL, VT: MVT::i64)});
4667	ReplaceNode(F: N, T: N3);
4668	}
4669
4670	bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4671	assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4672
4673	// Bail when not a "cast" like insert_subvector.
4674	if (N->getConstantOperandVal(Num: `2`) != `0`)
4675	return false;
4676	if (!N->getOperand(Num: `0`).isUndef())
4677	return false;
4678
4679	// Bail when normal isel should do the job.
4680	EVT VT = N->getValueType(ResNo: `0`);
4681	EVT InVT = N->getOperand(Num: `1`).getValueType();
4682	if (VT.isFixedLengthVector() \|\| InVT.isScalableVector())
4683	return false;
4684	if (InVT.getSizeInBits() <= `128`)
4685	return false;
4686
4687	// NOTE: We can only get here when doing fixed length SVE code generation.
4688	// We do manual selection because the types involved are not linked to real
4689	// registers (despite being legal) and must be coerced into SVE registers.
4690
4691	assert(VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
4692	"Expected to insert into a packed scalable vector!");
4693
4694	SDLoc DL(N);
4695	auto RC = CurDAG->getTargetConstant(Val: AArch64::ZPRRegClassID, DL, VT: MVT::i64);
4696	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, dl: DL, VT,
4697	Op1: N->getOperand(Num: `1`), Op2: RC));
4698	return true;
4699	}
4700
4701	bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4702	assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4703
4704	// Bail when not a "cast" like extract_subvector.
4705	if (N->getConstantOperandVal(Num: `1`) != `0`)
4706	return false;
4707
4708	// Bail when normal isel can do the job.
4709	EVT VT = N->getValueType(ResNo: `0`);
4710	EVT InVT = N->getOperand(Num: `0`).getValueType();
4711	if (VT.isScalableVector() \|\| InVT.isFixedLengthVector())
4712	return false;
4713	if (VT.getSizeInBits() <= `128`)
4714	return false;
4715
4716	// NOTE: We can only get here when doing fixed length SVE code generation.
4717	// We do manual selection because the types involved are not linked to real
4718	// registers (despite being legal) and must be coerced into SVE registers.
4719
4720	assert(InVT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
4721	"Expected to extract from a packed scalable vector!");
4722
4723	SDLoc DL(N);
4724	auto RC = CurDAG->getTargetConstant(Val: AArch64::ZPRRegClassID, DL, VT: MVT::i64);
4725	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, dl: DL, VT,
4726	Op1: N->getOperand(Num: `0`), Op2: RC));
4727	return true;
4728	}
4729
4730	bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4731	assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4732
4733	SDValue N0 = N->getOperand(Num: `0`);
4734	SDValue N1 = N->getOperand(Num: `1`);
4735
4736	EVT VT = N->getValueType(ResNo: `0`);
4737	SDLoc DL(N);
4738
4739	// Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4740	// Rotate by a constant is a funnel shift in IR which is expanded to
4741	// an OR with shifted operands.
4742	// We do the following transform:
4743	// OR N0, N1 -> xar (x, y, imm)
4744	// Where:
4745	// N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4746	// N0 = SHL_PRED true, V, splat(bits-imm)
4747	// V = (xor x, y)
4748	if (VT.isScalableVector() &&
4749	(Subtarget->hasSVE2() \|\|
4750	(Subtarget->hasSME() && Subtarget->isStreaming()))) {
4751	if (N0.getOpcode() != AArch64ISD::SHL_PRED \|\|
4752	N1.getOpcode() != AArch64ISD::SRL_PRED)
4753	std::swap(a&: N0, b&: N1);
4754	if (N0.getOpcode() != AArch64ISD::SHL_PRED \|\|
4755	N1.getOpcode() != AArch64ISD::SRL_PRED)
4756	return false;
4757
4758	auto TLI = static_cast<const* AArch64TargetLowering *>(getTargetLowering());
4759	if (!TLI->isAllActivePredicate(DAG&: *CurDAG, N: N0.getOperand(i: `0`)) \|\|
4760	!TLI->isAllActivePredicate(DAG&: *CurDAG, N: N1.getOperand(i: `0`)))
4761	return false;
4762
4763	if (N0.getOperand(i: `1`) != N1.getOperand(i: `1`))
4764	return false;
4765
4766	SDValue R1, R2;
4767	bool IsXOROperand = true;
4768	if (N0.getOperand(i: `1`).getOpcode() != ISD::XOR) {
4769	IsXOROperand = false;
4770	} else {
4771	R1 = N0.getOperand(i: `1`).getOperand(i: `0`);
4772	R2 = N1.getOperand(i: `1`).getOperand(i: `1`);
4773	}
4774
4775	APInt ShlAmt, ShrAmt;
4776	if (!ISD::isConstantSplatVector(N: N0.getOperand(i: `2`).getNode(), SplatValue&: ShlAmt) \|\|
4777	!ISD::isConstantSplatVector(N: N1.getOperand(i: `2`).getNode(), SplatValue&: ShrAmt))
4778	return false;
4779
4780	if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4781	return false;
4782
4783	if (!IsXOROperand) {
4784	SDValue Zero = CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i64);
4785	SDNode *MOV = CurDAG->getMachineNode(Opcode: AArch64::MOVIv2d_ns, dl: DL, VT, Op1: Zero);
4786	SDValue MOVIV = SDValue (MOV, `0`);
4787
4788	SDValue ZSub = CurDAG->getTargetConstant(Val: AArch64::zsub, DL, VT: MVT::i32);
4789	SDNode *SubRegToReg =
4790	CurDAG->getMachineNode(Opcode: AArch64::SUBREG_TO_REG, dl: DL, VT, Op1: MOVIV, Op2: ZSub);
4791
4792	R1 = N1 ->getOperand(Num: `1`);
4793	R2 = SDValue (SubRegToReg, `0`);
4794	}
4795
4796	SDValue Imm =
4797	CurDAG->getTargetConstant(Val: ShrAmt.getZExtValue(), DL, VT: MVT::i32);
4798
4799	SDValue Ops[] = {R1, R2, Imm};
4800	if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4801	VT, Opcodes: {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4802	AArch64::XAR_ZZZI_D})) {
4803	CurDAG->SelectNodeTo(N, MachineOpc: Opc, VT, Ops);
4804	return true;
4805	}
4806	return false;
4807	}
4808
4809	// We have Neon SHA3 XAR operation for v2i64 but for types
4810	// v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4811	// is available.
4812	EVT SVT;
4813	switch (VT.getSimpleVT().SimpleTy) {
4814	case MVT::v4i32:
4815	case MVT::v2i32:
4816	SVT = MVT::nxv4i32;
4817	break;
4818	case MVT::v8i16:
4819	case MVT::v4i16:
4820	SVT = MVT::nxv8i16;
4821	break;
4822	case MVT::v16i8:
4823	case MVT::v8i8:
4824	SVT = MVT::nxv16i8;
4825	break;
4826	case MVT::v2i64:
4827	case MVT::v1i64:
4828	SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4829	break;
4830	default:
4831	return false;
4832	}
4833
4834	if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) \|\|
4835	(SVT.isScalableVector() && !Subtarget->hasSVE2()))
4836	return false;
4837
4838	if (N0 ->getOpcode() != AArch64ISD::VSHL \|\|
4839	N1 ->getOpcode() != AArch64ISD::VLSHR)
4840	return false;
4841
4842	if (N0 ->getOperand(Num: `0`) != N1 ->getOperand(Num: `0`))
4843	return false;
4844
4845	SDValue R1, R2;
4846	bool IsXOROperand = true;
4847	if (N1 ->getOperand(Num: `0`)->getOpcode() != ISD::XOR) {
4848	IsXOROperand = false;
4849	} else {
4850	SDValue XOR = N0.getOperand(i: `0`);
4851	R1 = XOR.getOperand(i: `0`);
4852	R2 = XOR.getOperand(i: `1`);
4853	}
4854
4855	unsigned HsAmt = N0.getConstantOperandVal(i: `1`);
4856	unsigned ShAmt = N1.getConstantOperandVal(i: `1`);
4857
4858	SDValue Imm = CurDAG->getTargetConstant(
4859	Val: ShAmt, DL, VT: N0.getOperand(i: `1`).getValueType(), isOpaque: false);
4860
4861	unsigned VTSizeInBits = VT.getScalarSizeInBits();
4862	if (ShAmt + HsAmt != VTSizeInBits)
4863	return false;
4864
4865	if (!IsXOROperand) {
4866	SDValue Zero = CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i64);
4867	SDNode *MOV =
4868	CurDAG->getMachineNode(Opcode: AArch64::MOVIv2d_ns, dl: DL, VT: MVT::v2i64, Op1: Zero);
4869	SDValue MOVIV = SDValue (MOV, `0`);
4870
4871	R1 = N1 ->getOperand(Num: `0`);
4872	R2 = MOVIV;
4873	}
4874
4875	if (SVT != VT) {
4876	SDValue Undef =
4877	SDValue (CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: SVT), `0`);
4878
4879	if (SVT.isScalableVector() && VT.is64BitVector()) {
4880	EVT QVT = VT.getDoubleNumVectorElementsVT(Context&: *CurDAG->getContext());
4881
4882	SDValue UndefQ = SDValue (
4883	CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: QVT), `0`);
4884	SDValue DSub = CurDAG->getTargetConstant(Val: AArch64::dsub, DL, VT: MVT::i32);
4885
4886	R1 = SDValue (CurDAG->getMachineNode(Opcode: AArch64::INSERT_SUBREG, dl: DL, VT: QVT,
4887	Op1: UndefQ, Op2: R1, Op3: DSub),
4888	`0`);
4889	if (R2.getValueType() == VT)
4890	R2 = SDValue (CurDAG->getMachineNode(Opcode: AArch64::INSERT_SUBREG, dl: DL, VT: QVT,
4891	Op1: UndefQ, Op2: R2, Op3: DSub),
4892	`0`);
4893	}
4894
4895	SDValue SubReg = CurDAG->getTargetConstant(
4896	Val: (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, VT: MVT::i32);
4897
4898	R1 = SDValue (CurDAG->getMachineNode(Opcode: AArch64::INSERT_SUBREG, dl: DL, VT: SVT, Op1: Undef,
4899	Op2: R1, Op3: SubReg),
4900	`0`);
4901
4902	if (SVT.isScalableVector() \|\| R2.getValueType() != SVT)
4903	R2 = SDValue (CurDAG->getMachineNode(Opcode: AArch64::INSERT_SUBREG, dl: DL, VT: SVT,
4904	Op1: Undef, Op2: R2, Op3: SubReg),
4905	`0`);
4906	}
4907
4908	SDValue Ops[] = {R1, R2, Imm};
4909	SDNode XAR = nullptr*;
4910
4911	if (SVT.isScalableVector()) {
4912	if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4913	VT: SVT, Opcodes: {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4914	AArch64::XAR_ZZZI_D}))
4915	XAR = CurDAG->getMachineNode(Opcode: Opc, dl: DL, VT: SVT, Ops);
4916	} else {
4917	XAR = CurDAG->getMachineNode(Opcode: AArch64::XAR, dl: DL, VT: SVT, Ops);
4918	}
4919
4920	assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
4921
4922	if (SVT != VT) {
4923	if (VT.is64BitVector() && SVT.isScalableVector()) {
4924	EVT QVT = VT.getDoubleNumVectorElementsVT(Context&: *CurDAG->getContext());
4925
4926	SDValue ZSub = CurDAG->getTargetConstant(Val: AArch64::zsub, DL, VT: MVT::i32);
4927	SDNode *Q = CurDAG->getMachineNode(Opcode: AArch64::EXTRACT_SUBREG, dl: DL, VT: QVT,
4928	Op1: SDValue (XAR, `0`), Op2: ZSub);
4929
4930	SDValue DSub = CurDAG->getTargetConstant(Val: AArch64::dsub, DL, VT: MVT::i32);
4931	XAR = CurDAG->getMachineNode(Opcode: AArch64::EXTRACT_SUBREG, dl: DL, VT,
4932	Op1: SDValue (Q, `0`), Op2: DSub);
4933	} else {
4934	SDValue SubReg = CurDAG->getTargetConstant(
4935	Val: (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
4936	VT: MVT::i32);
4937	XAR = CurDAG->getMachineNode(Opcode: AArch64::EXTRACT_SUBREG, dl: DL, VT,
4938	Op1: SDValue (XAR, `0`), Op2: SubReg);
4939	}
4940	}
4941	ReplaceNode(F: N, T: XAR);
4942	return true;
4943	}
4944
4945	void AArch64DAGToDAGISel::Select(SDNode *Node) {
4946	// If we have a custom node, we already have selected!
4947	if (Node->isMachineOpcode()) {
4948	LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4949	Node->setNodeId(-`1`);
4950	return;
4951	}
4952
4953	// Few custom selection stuff.
4954	EVT VT = Node->getValueType(ResNo: `0`);
4955
4956	switch (Node->getOpcode()) {
4957	default:
4958	break;
4959
4960	case ISD::ATOMIC_CMP_SWAP:
4961	if (SelectCMP_SWAP(N: Node))
4962	return;
4963	break;
4964
4965	case ISD::READ_REGISTER:
4966	case AArch64ISD::MRRS:
4967	if (tryReadRegister(N: Node))
4968	return;
4969	break;
4970
4971	case ISD::WRITE_REGISTER:
4972	case AArch64ISD::MSRR:
4973	if (tryWriteRegister(N: Node))
4974	return;
4975	break;
4976
4977	case ISD::LOAD: {
4978	// Try to select as an indexed load. Fall through to normal processing
4979	// if we can't.
4980	if (tryIndexedLoad(N: Node))
4981	return;
4982	break;
4983	}
4984
4985	case ISD::SRL:
4986	case ISD::AND:
4987	case ISD::SRA:
4988	case ISD::SIGN_EXTEND_INREG:
4989	if (tryBitfieldExtractOp(N: Node))
4990	return;
4991	if (tryBitfieldInsertInZeroOp(N: Node))
4992	return;
4993	[[fallthrough]];
4994	case ISD::ROTR:
4995	case ISD::SHL:
4996	if (tryShiftAmountMod(N: Node))
4997	return;
4998	break;
4999
5000	case ISD::SIGN_EXTEND:
5001	if (tryBitfieldExtractOpFromSExt(N: Node))
5002	return;
5003	break;
5004
5005	case ISD::OR:
5006	if (tryBitfieldInsertOp(N: Node))
5007	return;
5008	if (trySelectXAR(N: Node))
5009	return;
5010	break;
5011
5012	case ISD::EXTRACT_SUBVECTOR: {
5013	if (trySelectCastScalableToFixedLengthVector(N: Node))
5014	return;
5015	break;
5016	}
5017
5018	case ISD::INSERT_SUBVECTOR: {
5019	if (trySelectCastFixedLengthToScalableVector(N: Node))
5020	return;
5021	break;
5022	}
5023
5024	case ISD::Constant: {
5025	// Materialize zero constants as copies from WZR/XZR. This allows
5026	// the coalescer to propagate these into other instructions.
5027	ConstantSDNode *ConstNode = cast<ConstantSDNode>(Val: Node);
5028	if (ConstNode->isZero()) {
5029	if (VT == MVT::i32) {
5030	SDValue New = CurDAG->getCopyFromReg(
5031	Chain: CurDAG->getEntryNode(), dl: SDLoc (Node), Reg: AArch64::WZR, VT: MVT::i32);
5032	ReplaceNode(F: Node, T: New.getNode());
5033	return;
5034	} else if (VT == MVT::i64) {
5035	SDValue New = CurDAG->getCopyFromReg(
5036	Chain: CurDAG->getEntryNode(), dl: SDLoc (Node), Reg: AArch64::XZR, VT: MVT::i64);
5037	ReplaceNode(F: Node, T: New.getNode());
5038	return;
5039	}
5040	}
5041	break;
5042	}
5043
5044	case ISD::FrameIndex: {
5045	// Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
5046	int FI = cast<FrameIndexSDNode>(Val: Node)->getIndex();
5047	unsigned Shifter = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: `0`);
5048	const TargetLowering *TLI = getTargetLowering();
5049	SDValue TFI = CurDAG->getTargetFrameIndex(
5050	FI, VT: TLI->getPointerTy(DL: CurDAG->getDataLayout()));
5051	SDLoc DL(Node);
5052	SDValue Ops[] = { TFI, CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i32),
5053	CurDAG->getTargetConstant(Val: Shifter, DL, VT: MVT::i32) };
5054	CurDAG->SelectNodeTo(N: Node, MachineOpc: AArch64::ADDXri, VT: MVT::i64, Ops);
5055	return;
5056	}
5057	case ISD::INTRINSIC_W_CHAIN: {
5058	unsigned IntNo = Node->getConstantOperandVal(Num: `1`);
5059	switch (IntNo) {
5060	default:
5061	break;
5062	case Intrinsic::aarch64_gcsss: {
5063	SDLoc DL(Node);
5064	SDValue Chain = Node->getOperand(Num: `0`);
5065	SDValue Val = Node->getOperand(Num: `2`);
5066	SDValue Zero = CurDAG->getCopyFromReg(Chain, dl: DL, Reg: AArch64::XZR, VT: MVT::i64);
5067	SDNode *SS1 =
5068	CurDAG->getMachineNode(Opcode: AArch64::GCSSS1, dl: DL, VT: MVT::Other, Op1: Val, Op2: Chain);
5069	SDNode *SS2 = CurDAG->getMachineNode(Opcode: AArch64::GCSSS2, dl: DL, VT1: MVT::i64,
5070	VT2: MVT::Other, Op1: Zero, Op2: SDValue (SS1, `0`));
5071	ReplaceNode(F: Node, T: SS2);
5072	return;
5073	}
5074	case Intrinsic::aarch64_ldaxp:
5075	case Intrinsic::aarch64_ldxp: {
5076	unsigned Op =
5077	IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
5078	SDValue MemAddr = Node->getOperand(Num: `2`);
5079	SDLoc DL(Node);
5080	SDValue Chain = Node->getOperand(Num: `0`);
5081
5082	SDNode *Ld = CurDAG->getMachineNode(Opcode: Op, dl: DL, VT1: MVT::i64, VT2: MVT::i64,
5083	VT3: MVT::Other, Op1: MemAddr, Op2: Chain);
5084
5085	// Transfer memoperands.
5086	MachineMemOperand *MemOp =
5087	cast<MemIntrinsicSDNode>(Val: Node)->getMemOperand();
5088	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Ld), NewMemRefs: {MemOp});
5089	ReplaceNode(F: Node, T: Ld);
5090	return;
5091	}
5092	case Intrinsic::aarch64_stlxp:
5093	case Intrinsic::aarch64_stxp: {
5094	unsigned Op =
5095	IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
5096	SDLoc DL(Node);
5097	SDValue Chain = Node->getOperand(Num: `0`);
5098	SDValue ValLo = Node->getOperand(Num: `2`);
5099	SDValue ValHi = Node->getOperand(Num: `3`);
5100	SDValue MemAddr = Node->getOperand(Num: `4`);
5101
5102	// Place arguments in the right order.
5103	SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
5104
5105	SDNode *St = CurDAG->getMachineNode(Opcode: Op, dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops);
5106	// Transfer memoperands.
5107	MachineMemOperand *MemOp =
5108	cast<MemIntrinsicSDNode>(Val: Node)->getMemOperand();
5109	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: St), NewMemRefs: {MemOp});
5110
5111	ReplaceNode(F: Node, T: St);
5112	return;
5113	}
5114	case Intrinsic::aarch64_neon_ld1x2:
5115	if (VT == MVT::v8i8) {
5116	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov8b, SubRegIdx: AArch64::dsub0);
5117	return;
5118	} else if (VT == MVT::v16i8) {
5119	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov16b, SubRegIdx: AArch64::qsub0);
5120	return;
5121	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
5122	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov4h, SubRegIdx: AArch64::dsub0);
5123	return;
5124	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
5125	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov8h, SubRegIdx: AArch64::qsub0);
5126	return;
5127	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
5128	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov2s, SubRegIdx: AArch64::dsub0);
5129	return;
5130	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
5131	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov4s, SubRegIdx: AArch64::qsub0);
5132	return;
5133	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
5134	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov1d, SubRegIdx: AArch64::dsub0);
5135	return;
5136	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
5137	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov2d, SubRegIdx: AArch64::qsub0);
5138	return;
5139	}
5140	break;
5141	case Intrinsic::aarch64_neon_ld1x3:
5142	if (VT == MVT::v8i8) {
5143	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev8b, SubRegIdx: AArch64::dsub0);
5144	return;
5145	} else if (VT == MVT::v16i8) {
5146	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev16b, SubRegIdx: AArch64::qsub0);
5147	return;
5148	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
5149	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev4h, SubRegIdx: AArch64::dsub0);
5150	return;
5151	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
5152	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev8h, SubRegIdx: AArch64::qsub0);
5153	return;
5154	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
5155	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev2s, SubRegIdx: AArch64::dsub0);
5156	return;
5157	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
5158	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev4s, SubRegIdx: AArch64::qsub0);
5159	return;
5160	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
5161	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev1d, SubRegIdx: AArch64::dsub0);
5162	return;
5163	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
5164	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev2d, SubRegIdx: AArch64::qsub0);
5165	return;
5166	}
5167	break;
5168	case Intrinsic::aarch64_neon_ld1x4:
5169	if (VT == MVT::v8i8) {
5170	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv8b, SubRegIdx: AArch64::dsub0);
5171	return;
5172	} else if (VT == MVT::v16i8) {
5173	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv16b, SubRegIdx: AArch64::qsub0);
5174	return;
5175	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
5176	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv4h, SubRegIdx: AArch64::dsub0);
5177	return;
5178	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
5179	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv8h, SubRegIdx: AArch64::qsub0);
5180	return;
5181	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
5182	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv2s, SubRegIdx: AArch64::dsub0);
5183	return;
5184	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
5185	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv4s, SubRegIdx: AArch64::qsub0);
5186	return;
5187	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
5188	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv1d, SubRegIdx: AArch64::dsub0);
5189	return;
5190	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
5191	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv2d, SubRegIdx: AArch64::qsub0);
5192	return;
5193	}
5194	break;
5195	case Intrinsic::aarch64_neon_ld2:
5196	if (VT == MVT::v8i8) {
5197	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov8b, SubRegIdx: AArch64::dsub0);
5198	return;
5199	} else if (VT == MVT::v16i8) {
5200	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov16b, SubRegIdx: AArch64::qsub0);
5201	return;
5202	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
5203	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov4h, SubRegIdx: AArch64::dsub0);
5204	return;
5205	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
5206	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov8h, SubRegIdx: AArch64::qsub0);
5207	return;
5208	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
5209	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov2s, SubRegIdx: AArch64::dsub0);
5210	return;
5211	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
5212	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov4s, SubRegIdx: AArch64::qsub0);
5213	return;
5214	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
5215	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov1d, SubRegIdx: AArch64::dsub0);
5216	return;
5217	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
5218	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov2d, SubRegIdx: AArch64::qsub0);
5219	return;
5220	}
5221	break;
5222	case Intrinsic::aarch64_neon_ld3:
5223	if (VT == MVT::v8i8) {
5224	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev8b, SubRegIdx: AArch64::dsub0);
5225	return;
5226	} else if (VT == MVT::v16i8) {
5227	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev16b, SubRegIdx: AArch64::qsub0);
5228	return;
5229	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
5230	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev4h, SubRegIdx: AArch64::dsub0);
5231	return;
5232	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
5233	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev8h, SubRegIdx: AArch64::qsub0);
5234	return;
5235	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
5236	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev2s, SubRegIdx: AArch64::dsub0);
5237	return;
5238	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
5239	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev4s, SubRegIdx: AArch64::qsub0);
5240	return;
5241	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
5242	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev1d, SubRegIdx: AArch64::dsub0);
5243	return;
5244	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
5245	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev2d, SubRegIdx: AArch64::qsub0);
5246	return;
5247	}
5248	break;
5249	case Intrinsic::aarch64_neon_ld4:
5250	if (VT == MVT::v8i8) {
5251	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv8b, SubRegIdx: AArch64::dsub0);
5252	return;
5253	} else if (VT == MVT::v16i8) {
5254	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv16b, SubRegIdx: AArch64::qsub0);
5255	return;
5256	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
5257	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv4h, SubRegIdx: AArch64::dsub0);
5258	return;
5259	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
5260	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv8h, SubRegIdx: AArch64::qsub0);
5261	return;
5262	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
5263	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv2s, SubRegIdx: AArch64::dsub0);
5264	return;
5265	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
5266	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv4s, SubRegIdx: AArch64::qsub0);
5267	return;
5268	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
5269	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv1d, SubRegIdx: AArch64::dsub0);
5270	return;
5271	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
5272	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv2d, SubRegIdx: AArch64::qsub0);
5273	return;
5274	}
5275	break;
5276	case Intrinsic::aarch64_neon_ld2r:
5277	if (VT == MVT::v8i8) {
5278	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv8b, SubRegIdx: AArch64::dsub0);
5279	return;
5280	} else if (VT == MVT::v16i8) {
5281	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv16b, SubRegIdx: AArch64::qsub0);
5282	return;
5283	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
5284	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv4h, SubRegIdx: AArch64::dsub0);
5285	return;
5286	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
5287	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv8h, SubRegIdx: AArch64::qsub0);
5288	return;
5289	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
5290	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv2s, SubRegIdx: AArch64::dsub0);
5291	return;
5292	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
5293	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv4s, SubRegIdx: AArch64::qsub0);
5294	return;
5295	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
5296	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv1d, SubRegIdx: AArch64::dsub0);
5297	return;
5298	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
5299	SelectLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv2d, SubRegIdx: AArch64::qsub0);
5300	return;
5301	}
5302	break;
5303	case Intrinsic::aarch64_neon_ld3r:
5304	if (VT == MVT::v8i8) {
5305	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv8b, SubRegIdx: AArch64::dsub0);
5306	return;
5307	} else if (VT == MVT::v16i8) {
5308	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv16b, SubRegIdx: AArch64::qsub0);
5309	return;
5310	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
5311	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv4h, SubRegIdx: AArch64::dsub0);
5312	return;
5313	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
5314	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv8h, SubRegIdx: AArch64::qsub0);
5315	return;
5316	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
5317	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv2s, SubRegIdx: AArch64::dsub0);
5318	return;
5319	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
5320	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv4s, SubRegIdx: AArch64::qsub0);
5321	return;
5322	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
5323	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv1d, SubRegIdx: AArch64::dsub0);
5324	return;
5325	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
5326	SelectLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv2d, SubRegIdx: AArch64::qsub0);
5327	return;
5328	}
5329	break;
5330	case Intrinsic::aarch64_neon_ld4r:
5331	if (VT == MVT::v8i8) {
5332	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv8b, SubRegIdx: AArch64::dsub0);
5333	return;
5334	} else if (VT == MVT::v16i8) {
5335	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv16b, SubRegIdx: AArch64::qsub0);
5336	return;
5337	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
5338	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv4h, SubRegIdx: AArch64::dsub0);
5339	return;
5340	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
5341	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv8h, SubRegIdx: AArch64::qsub0);
5342	return;
5343	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
5344	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv2s, SubRegIdx: AArch64::dsub0);
5345	return;
5346	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
5347	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv4s, SubRegIdx: AArch64::qsub0);
5348	return;
5349	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
5350	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv1d, SubRegIdx: AArch64::dsub0);
5351	return;
5352	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
5353	SelectLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv2d, SubRegIdx: AArch64::qsub0);
5354	return;
5355	}
5356	break;
5357	case Intrinsic::aarch64_neon_ld2lane:
5358	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
5359	SelectLoadLane(N: Node, NumVecs: `2`, Opc: AArch64::LD2i8);
5360	return;
5361	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
5362	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
5363	SelectLoadLane(N: Node, NumVecs: `2`, Opc: AArch64::LD2i16);
5364	return;
5365	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
5366	VT == MVT::v2f32) {
5367	SelectLoadLane(N: Node, NumVecs: `2`, Opc: AArch64::LD2i32);
5368	return;
5369	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
5370	VT == MVT::v1f64) {
5371	SelectLoadLane(N: Node, NumVecs: `2`, Opc: AArch64::LD2i64);
5372	return;
5373	}
5374	break;
5375	case Intrinsic::aarch64_neon_ld3lane:
5376	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
5377	SelectLoadLane(N: Node, NumVecs: `3`, Opc: AArch64::LD3i8);
5378	return;
5379	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
5380	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
5381	SelectLoadLane(N: Node, NumVecs: `3`, Opc: AArch64::LD3i16);
5382	return;
5383	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
5384	VT == MVT::v2f32) {
5385	SelectLoadLane(N: Node, NumVecs: `3`, Opc: AArch64::LD3i32);
5386	return;
5387	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
5388	VT == MVT::v1f64) {
5389	SelectLoadLane(N: Node, NumVecs: `3`, Opc: AArch64::LD3i64);
5390	return;
5391	}
5392	break;
5393	case Intrinsic::aarch64_neon_ld4lane:
5394	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
5395	SelectLoadLane(N: Node, NumVecs: `4`, Opc: AArch64::LD4i8);
5396	return;
5397	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
5398	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
5399	SelectLoadLane(N: Node, NumVecs: `4`, Opc: AArch64::LD4i16);
5400	return;
5401	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
5402	VT == MVT::v2f32) {
5403	SelectLoadLane(N: Node, NumVecs: `4`, Opc: AArch64::LD4i32);
5404	return;
5405	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
5406	VT == MVT::v1f64) {
5407	SelectLoadLane(N: Node, NumVecs: `4`, Opc: AArch64::LD4i64);
5408	return;
5409	}
5410	break;
5411	case Intrinsic::aarch64_ld64b:
5412	SelectLoad(N: Node, NumVecs: `8`, Opc: AArch64::LD64B, SubRegIdx: AArch64::x8sub_0);
5413	return;
5414	case Intrinsic::aarch64_sve_ld2q_sret: {
5415	SelectPredicatedLoad(N: Node, NumVecs: `2`, Scale: `4`, Opc_ri: AArch64::LD2Q_IMM, Opc_rr: AArch64::LD2Q, IsIntr: true);
5416	return;
5417	}
5418	case Intrinsic::aarch64_sve_ld3q_sret: {
5419	SelectPredicatedLoad(N: Node, NumVecs: `3`, Scale: `4`, Opc_ri: AArch64::LD3Q_IMM, Opc_rr: AArch64::LD3Q, IsIntr: true);
5420	return;
5421	}
5422	case Intrinsic::aarch64_sve_ld4q_sret: {
5423	SelectPredicatedLoad(N: Node, NumVecs: `4`, Scale: `4`, Opc_ri: AArch64::LD4Q_IMM, Opc_rr: AArch64::LD4Q, IsIntr: true);
5424	return;
5425	}
5426	case Intrinsic::aarch64_sve_ld2_sret: {
5427	if (VT == MVT::nxv16i8) {
5428	SelectPredicatedLoad(N: Node, NumVecs: `2`, Scale: `0`, Opc_ri: AArch64::LD2B_IMM, Opc_rr: AArch64::LD2B,
5429	IsIntr: true);
5430	return;
5431	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5432	VT == MVT::nxv8bf16) {
5433	SelectPredicatedLoad(N: Node, NumVecs: `2`, Scale: `1`, Opc_ri: AArch64::LD2H_IMM, Opc_rr: AArch64::LD2H,
5434	IsIntr: true);
5435	return;
5436	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5437	SelectPredicatedLoad(N: Node, NumVecs: `2`, Scale: `2`, Opc_ri: AArch64::LD2W_IMM, Opc_rr: AArch64::LD2W,
5438	IsIntr: true);
5439	return;
5440	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5441	SelectPredicatedLoad(N: Node, NumVecs: `2`, Scale: `3`, Opc_ri: AArch64::LD2D_IMM, Opc_rr: AArch64::LD2D,
5442	IsIntr: true);
5443	return;
5444	}
5445	break;
5446	}
5447	case Intrinsic::aarch64_sve_ld1_pn_x2: {
5448	if (VT == MVT::nxv16i8) {
5449	if (Subtarget->hasSME2() && Subtarget->isStreaming())
5450	SelectContiguousMultiVectorLoad(
5451	N: Node, NumVecs: `2`, Scale: `0`, Opc_ri: AArch64::LD1B_2Z_IMM_PSEUDO, Opc_rr: AArch64::LD1B_2Z_PSEUDO);
5452	else if (Subtarget->hasSVE2p1())
5453	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `0`, Opc_ri: AArch64::LD1B_2Z_IMM,
5454	Opc_rr: AArch64::LD1B_2Z);
5455	else
5456	break;
5457	return;
5458	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5459	VT == MVT::nxv8bf16) {
5460	if (Subtarget->hasSME2() && Subtarget->isStreaming())
5461	SelectContiguousMultiVectorLoad(
5462	N: Node, NumVecs: `2`, Scale: `1`, Opc_ri: AArch64::LD1H_2Z_IMM_PSEUDO, Opc_rr: AArch64::LD1H_2Z_PSEUDO);
5463	else if (Subtarget->hasSVE2p1())
5464	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `1`, Opc_ri: AArch64::LD1H_2Z_IMM,
5465	Opc_rr: AArch64::LD1H_2Z);
5466	else
5467	break;
5468	return;
5469	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5470	if (Subtarget->hasSME2() && Subtarget->isStreaming())
5471	SelectContiguousMultiVectorLoad(
5472	N: Node, NumVecs: `2`, Scale: `2`, Opc_ri: AArch64::LD1W_2Z_IMM_PSEUDO, Opc_rr: AArch64::LD1W_2Z_PSEUDO);
5473	else if (Subtarget->hasSVE2p1())
5474	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `2`, Opc_ri: AArch64::LD1W_2Z_IMM,
5475	Opc_rr: AArch64::LD1W_2Z);
5476	else
5477	break;
5478	return;
5479	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5480	if (Subtarget->hasSME2() && Subtarget->isStreaming())
5481	SelectContiguousMultiVectorLoad(
5482	N: Node, NumVecs: `2`, Scale: `3`, Opc_ri: AArch64::LD1D_2Z_IMM_PSEUDO, Opc_rr: AArch64::LD1D_2Z_PSEUDO);
5483	else if (Subtarget->hasSVE2p1())
5484	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `3`, Opc_ri: AArch64::LD1D_2Z_IMM,
5485	Opc_rr: AArch64::LD1D_2Z);
5486	else
5487	break;
5488	return;
5489	}
5490	break;
5491	}
5492	case Intrinsic::aarch64_sve_ld1_pn_x4: {
5493	if (VT == MVT::nxv16i8) {
5494	if (Subtarget->hasSME2() && Subtarget->isStreaming())
5495	SelectContiguousMultiVectorLoad(
5496	N: Node, NumVecs: `4`, Scale: `0`, Opc_ri: AArch64::LD1B_4Z_IMM_PSEUDO, Opc_rr: AArch64::LD1B_4Z_PSEUDO);
5497	else if (Subtarget->hasSVE2p1())
5498	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `0`, Opc_ri: AArch64::LD1B_4Z_IMM,
5499	Opc_rr: AArch64::LD1B_4Z);
5500	else
5501	break;
5502	return;
5503	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5504	VT == MVT::nxv8bf16) {
5505	if (Subtarget->hasSME2() && Subtarget->isStreaming())
5506	SelectContiguousMultiVectorLoad(
5507	N: Node, NumVecs: `4`, Scale: `1`, Opc_ri: AArch64::LD1H_4Z_IMM_PSEUDO, Opc_rr: AArch64::LD1H_4Z_PSEUDO);
5508	else if (Subtarget->hasSVE2p1())
5509	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `1`, Opc_ri: AArch64::LD1H_4Z_IMM,
5510	Opc_rr: AArch64::LD1H_4Z);
5511	else
5512	break;
5513	return;
5514	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5515	if (Subtarget->hasSME2() && Subtarget->isStreaming())
5516	SelectContiguousMultiVectorLoad(
5517	N: Node, NumVecs: `4`, Scale: `2`, Opc_ri: AArch64::LD1W_4Z_IMM_PSEUDO, Opc_rr: AArch64::LD1W_4Z_PSEUDO);
5518	else if (Subtarget->hasSVE2p1())
5519	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `2`, Opc_ri: AArch64::LD1W_4Z_IMM,
5520	Opc_rr: AArch64::LD1W_4Z);
5521	else
5522	break;
5523	return;
5524	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5525	if (Subtarget->hasSME2() && Subtarget->isStreaming())
5526	SelectContiguousMultiVectorLoad(
5527	N: Node, NumVecs: `4`, Scale: `3`, Opc_ri: AArch64::LD1D_4Z_IMM_PSEUDO, Opc_rr: AArch64::LD1D_4Z_PSEUDO);
5528	else if (Subtarget->hasSVE2p1())
5529	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `3`, Opc_ri: AArch64::LD1D_4Z_IMM,
5530	Opc_rr: AArch64::LD1D_4Z);
5531	else
5532	break;
5533	return;
5534	}
5535	break;
5536	}
5537	case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5538	if (VT == MVT::nxv16i8) {
5539	if (Subtarget->hasSME2() && Subtarget->isStreaming())
5540	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `0`,
5541	Opc_ri: AArch64::LDNT1B_2Z_IMM_PSEUDO,
5542	Opc_rr: AArch64::LDNT1B_2Z_PSEUDO);
5543	else if (Subtarget->hasSVE2p1())
5544	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `0`, Opc_ri: AArch64::LDNT1B_2Z_IMM,
5545	Opc_rr: AArch64::LDNT1B_2Z);
5546	else
5547	break;
5548	return;
5549	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5550	VT == MVT::nxv8bf16) {
5551	if (Subtarget->hasSME2() && Subtarget->isStreaming())
5552	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `1`,
5553	Opc_ri: AArch64::LDNT1H_2Z_IMM_PSEUDO,
5554	Opc_rr: AArch64::LDNT1H_2Z_PSEUDO);
5555	else if (Subtarget->hasSVE2p1())
5556	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `1`, Opc_ri: AArch64::LDNT1H_2Z_IMM,
5557	Opc_rr: AArch64::LDNT1H_2Z);
5558	else
5559	break;
5560	return;
5561	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5562	if (Subtarget->hasSME2() && Subtarget->isStreaming())
5563	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `2`,
5564	Opc_ri: AArch64::LDNT1W_2Z_IMM_PSEUDO,
5565	Opc_rr: AArch64::LDNT1W_2Z_PSEUDO);
5566	else if (Subtarget->hasSVE2p1())
5567	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `2`, Opc_ri: AArch64::LDNT1W_2Z_IMM,
5568	Opc_rr: AArch64::LDNT1W_2Z);
5569	else
5570	break;
5571	return;
5572	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5573	if (Subtarget->hasSME2() && Subtarget->isStreaming())
5574	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `3`,
5575	Opc_ri: AArch64::LDNT1D_2Z_IMM_PSEUDO,
5576	Opc_rr: AArch64::LDNT1D_2Z_PSEUDO);
5577	else if (Subtarget->hasSVE2p1())
5578	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `2`, Scale: `3`, Opc_ri: AArch64::LDNT1D_2Z_IMM,
5579	Opc_rr: AArch64::LDNT1D_2Z);
5580	else
5581	break;
5582	return;
5583	}
5584	break;
5585	}
5586	case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5587	if (VT == MVT::nxv16i8) {
5588	if (Subtarget->hasSME2() && Subtarget->isStreaming())
5589	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `0`,
5590	Opc_ri: AArch64::LDNT1B_4Z_IMM_PSEUDO,
5591	Opc_rr: AArch64::LDNT1B_4Z_PSEUDO);
5592	else if (Subtarget->hasSVE2p1())
5593	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `0`, Opc_ri: AArch64::LDNT1B_4Z_IMM,
5594	Opc_rr: AArch64::LDNT1B_4Z);
5595	else
5596	break;
5597	return;
5598	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5599	VT == MVT::nxv8bf16) {
5600	if (Subtarget->hasSME2() && Subtarget->isStreaming())
5601	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `1`,
5602	Opc_ri: AArch64::LDNT1H_4Z_IMM_PSEUDO,
5603	Opc_rr: AArch64::LDNT1H_4Z_PSEUDO);
5604	else if (Subtarget->hasSVE2p1())
5605	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `1`, Opc_ri: AArch64::LDNT1H_4Z_IMM,
5606	Opc_rr: AArch64::LDNT1H_4Z);
5607	else
5608	break;
5609	return;
5610	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5611	if (Subtarget->hasSME2() && Subtarget->isStreaming())
5612	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `2`,
5613	Opc_ri: AArch64::LDNT1W_4Z_IMM_PSEUDO,
5614	Opc_rr: AArch64::LDNT1W_4Z_PSEUDO);
5615	else if (Subtarget->hasSVE2p1())
5616	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `2`, Opc_ri: AArch64::LDNT1W_4Z_IMM,
5617	Opc_rr: AArch64::LDNT1W_4Z);
5618	else
5619	break;
5620	return;
5621	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5622	if (Subtarget->hasSME2() && Subtarget->isStreaming())
5623	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `3`,
5624	Opc_ri: AArch64::LDNT1D_4Z_IMM_PSEUDO,
5625	Opc_rr: AArch64::LDNT1D_4Z_PSEUDO);
5626	else if (Subtarget->hasSVE2p1())
5627	SelectContiguousMultiVectorLoad(N: Node, NumVecs: `4`, Scale: `3`, Opc_ri: AArch64::LDNT1D_4Z_IMM,
5628	Opc_rr: AArch64::LDNT1D_4Z);
5629	else
5630	break;
5631	return;
5632	}
5633	break;
5634	}
5635	case Intrinsic::aarch64_sve_ld3_sret: {
5636	if (VT == MVT::nxv16i8) {
5637	SelectPredicatedLoad(N: Node, NumVecs: `3`, Scale: `0`, Opc_ri: AArch64::LD3B_IMM, Opc_rr: AArch64::LD3B,
5638	IsIntr: true);
5639	return;
5640	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5641	VT == MVT::nxv8bf16) {
5642	SelectPredicatedLoad(N: Node, NumVecs: `3`, Scale: `1`, Opc_ri: AArch64::LD3H_IMM, Opc_rr: AArch64::LD3H,
5643	IsIntr: true);
5644	return;
5645	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5646	SelectPredicatedLoad(N: Node, NumVecs: `3`, Scale: `2`, Opc_ri: AArch64::LD3W_IMM, Opc_rr: AArch64::LD3W,
5647	IsIntr: true);
5648	return;
5649	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5650	SelectPredicatedLoad(N: Node, NumVecs: `3`, Scale: `3`, Opc_ri: AArch64::LD3D_IMM, Opc_rr: AArch64::LD3D,
5651	IsIntr: true);
5652	return;
5653	}
5654	break;
5655	}
5656	case Intrinsic::aarch64_sve_ld4_sret: {
5657	if (VT == MVT::nxv16i8) {
5658	SelectPredicatedLoad(N: Node, NumVecs: `4`, Scale: `0`, Opc_ri: AArch64::LD4B_IMM, Opc_rr: AArch64::LD4B,
5659	IsIntr: true);
5660	return;
5661	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5662	VT == MVT::nxv8bf16) {
5663	SelectPredicatedLoad(N: Node, NumVecs: `4`, Scale: `1`, Opc_ri: AArch64::LD4H_IMM, Opc_rr: AArch64::LD4H,
5664	IsIntr: true);
5665	return;
5666	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5667	SelectPredicatedLoad(N: Node, NumVecs: `4`, Scale: `2`, Opc_ri: AArch64::LD4W_IMM, Opc_rr: AArch64::LD4W,
5668	IsIntr: true);
5669	return;
5670	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5671	SelectPredicatedLoad(N: Node, NumVecs: `4`, Scale: `3`, Opc_ri: AArch64::LD4D_IMM, Opc_rr: AArch64::LD4D,
5672	IsIntr: true);
5673	return;
5674	}
5675	break;
5676	}
5677	case Intrinsic::aarch64_sme_read_hor_vg2: {
5678	if (VT == MVT::nxv16i8) {
5679	SelectMultiVectorMove<`14`, `2`>(N: Node, NumVecs: `2`, BaseReg: AArch64::ZAB0,
5680	Op: AArch64::MOVA_2ZMXI_H_B);
5681	return;
5682	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5683	VT == MVT::nxv8bf16) {
5684	SelectMultiVectorMove<`6`, `2`>(N: Node, NumVecs: `2`, BaseReg: AArch64::ZAH0,
5685	Op: AArch64::MOVA_2ZMXI_H_H);
5686	return;
5687	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5688	SelectMultiVectorMove<`2`, `2`>(N: Node, NumVecs: `2`, BaseReg: AArch64::ZAS0,
5689	Op: AArch64::MOVA_2ZMXI_H_S);
5690	return;
5691	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5692	SelectMultiVectorMove<`0`, `2`>(N: Node, NumVecs: `2`, BaseReg: AArch64::ZAD0,
5693	Op: AArch64::MOVA_2ZMXI_H_D);
5694	return;
5695	}
5696	break;
5697	}
5698	case Intrinsic::aarch64_sme_read_ver_vg2: {
5699	if (VT == MVT::nxv16i8) {
5700	SelectMultiVectorMove<`14`, `2`>(N: Node, NumVecs: `2`, BaseReg: AArch64::ZAB0,
5701	Op: AArch64::MOVA_2ZMXI_V_B);
5702	return;
5703	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5704	VT == MVT::nxv8bf16) {
5705	SelectMultiVectorMove<`6`, `2`>(N: Node, NumVecs: `2`, BaseReg: AArch64::ZAH0,
5706	Op: AArch64::MOVA_2ZMXI_V_H);
5707	return;
5708	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5709	SelectMultiVectorMove<`2`, `2`>(N: Node, NumVecs: `2`, BaseReg: AArch64::ZAS0,
5710	Op: AArch64::MOVA_2ZMXI_V_S);
5711	return;
5712	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5713	SelectMultiVectorMove<`0`, `2`>(N: Node, NumVecs: `2`, BaseReg: AArch64::ZAD0,
5714	Op: AArch64::MOVA_2ZMXI_V_D);
5715	return;
5716	}
5717	break;
5718	}
5719	case Intrinsic::aarch64_sme_read_hor_vg4: {
5720	if (VT == MVT::nxv16i8) {
5721	SelectMultiVectorMove<`12`, `4`>(N: Node, NumVecs: `4`, BaseReg: AArch64::ZAB0,
5722	Op: AArch64::MOVA_4ZMXI_H_B);
5723	return;
5724	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5725	VT == MVT::nxv8bf16) {
5726	SelectMultiVectorMove<`4`, `4`>(N: Node, NumVecs: `4`, BaseReg: AArch64::ZAH0,
5727	Op: AArch64::MOVA_4ZMXI_H_H);
5728	return;
5729	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5730	SelectMultiVectorMove<`0`, `2`>(N: Node, NumVecs: `4`, BaseReg: AArch64::ZAS0,
5731	Op: AArch64::MOVA_4ZMXI_H_S);
5732	return;
5733	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5734	SelectMultiVectorMove<`0`, `2`>(N: Node, NumVecs: `4`, BaseReg: AArch64::ZAD0,
5735	Op: AArch64::MOVA_4ZMXI_H_D);
5736	return;
5737	}
5738	break;
5739	}
5740	case Intrinsic::aarch64_sme_read_ver_vg4: {
5741	if (VT == MVT::nxv16i8) {
5742	SelectMultiVectorMove<`12`, `4`>(N: Node, NumVecs: `4`, BaseReg: AArch64::ZAB0,
5743	Op: AArch64::MOVA_4ZMXI_V_B);
5744	return;
5745	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5746	VT == MVT::nxv8bf16) {
5747	SelectMultiVectorMove<`4`, `4`>(N: Node, NumVecs: `4`, BaseReg: AArch64::ZAH0,
5748	Op: AArch64::MOVA_4ZMXI_V_H);
5749	return;
5750	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5751	SelectMultiVectorMove<`0`, `4`>(N: Node, NumVecs: `4`, BaseReg: AArch64::ZAS0,
5752	Op: AArch64::MOVA_4ZMXI_V_S);
5753	return;
5754	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5755	SelectMultiVectorMove<`0`, `4`>(N: Node, NumVecs: `4`, BaseReg: AArch64::ZAD0,
5756	Op: AArch64::MOVA_4ZMXI_V_D);
5757	return;
5758	}
5759	break;
5760	}
5761	case Intrinsic::aarch64_sme_read_vg1x2: {
5762	SelectMultiVectorMove<`7`, `1`>(N: Node, NumVecs: `2`, BaseReg: AArch64::ZA,
5763	Op: AArch64::MOVA_VG2_2ZMXI);
5764	return;
5765	}
5766	case Intrinsic::aarch64_sme_read_vg1x4: {
5767	SelectMultiVectorMove<`7`, `1`>(N: Node, NumVecs: `4`, BaseReg: AArch64::ZA,
5768	Op: AArch64::MOVA_VG4_4ZMXI);
5769	return;
5770	}
5771	case Intrinsic::aarch64_sme_readz_horiz_x2: {
5772	if (VT == MVT::nxv16i8) {
5773	SelectMultiVectorMoveZ(N: Node, NumVecs: `2`, Op: AArch64::MOVAZ_2ZMI_H_B_PSEUDO, MaxIdx: `14`, Scale: `2`);
5774	return;
5775	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5776	VT == MVT::nxv8bf16) {
5777	SelectMultiVectorMoveZ(N: Node, NumVecs: `2`, Op: AArch64::MOVAZ_2ZMI_H_H_PSEUDO, MaxIdx: `6`, Scale: `2`);
5778	return;
5779	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5780	SelectMultiVectorMoveZ(N: Node, NumVecs: `2`, Op: AArch64::MOVAZ_2ZMI_H_S_PSEUDO, MaxIdx: `2`, Scale: `2`);
5781	return;
5782	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5783	SelectMultiVectorMoveZ(N: Node, NumVecs: `2`, Op: AArch64::MOVAZ_2ZMI_H_D_PSEUDO, MaxIdx: `0`, Scale: `2`);
5784	return;
5785	}
5786	break;
5787	}
5788	case Intrinsic::aarch64_sme_readz_vert_x2: {
5789	if (VT == MVT::nxv16i8) {
5790	SelectMultiVectorMoveZ(N: Node, NumVecs: `2`, Op: AArch64::MOVAZ_2ZMI_V_B_PSEUDO, MaxIdx: `14`, Scale: `2`);
5791	return;
5792	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5793	VT == MVT::nxv8bf16) {
5794	SelectMultiVectorMoveZ(N: Node, NumVecs: `2`, Op: AArch64::MOVAZ_2ZMI_V_H_PSEUDO, MaxIdx: `6`, Scale: `2`);
5795	return;
5796	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5797	SelectMultiVectorMoveZ(N: Node, NumVecs: `2`, Op: AArch64::MOVAZ_2ZMI_V_S_PSEUDO, MaxIdx: `2`, Scale: `2`);
5798	return;
5799	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5800	SelectMultiVectorMoveZ(N: Node, NumVecs: `2`, Op: AArch64::MOVAZ_2ZMI_V_D_PSEUDO, MaxIdx: `0`, Scale: `2`);
5801	return;
5802	}
5803	break;
5804	}
5805	case Intrinsic::aarch64_sme_readz_horiz_x4: {
5806	if (VT == MVT::nxv16i8) {
5807	SelectMultiVectorMoveZ(N: Node, NumVecs: `4`, Op: AArch64::MOVAZ_4ZMI_H_B_PSEUDO, MaxIdx: `12`, Scale: `4`);
5808	return;
5809	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5810	VT == MVT::nxv8bf16) {
5811	SelectMultiVectorMoveZ(N: Node, NumVecs: `4`, Op: AArch64::MOVAZ_4ZMI_H_H_PSEUDO, MaxIdx: `4`, Scale: `4`);
5812	return;
5813	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5814	SelectMultiVectorMoveZ(N: Node, NumVecs: `4`, Op: AArch64::MOVAZ_4ZMI_H_S_PSEUDO, MaxIdx: `0`, Scale: `4`);
5815	return;
5816	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5817	SelectMultiVectorMoveZ(N: Node, NumVecs: `4`, Op: AArch64::MOVAZ_4ZMI_H_D_PSEUDO, MaxIdx: `0`, Scale: `4`);
5818	return;
5819	}
5820	break;
5821	}
5822	case Intrinsic::aarch64_sme_readz_vert_x4: {
5823	if (VT == MVT::nxv16i8) {
5824	SelectMultiVectorMoveZ(N: Node, NumVecs: `4`, Op: AArch64::MOVAZ_4ZMI_V_B_PSEUDO, MaxIdx: `12`, Scale: `4`);
5825	return;
5826	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
5827	VT == MVT::nxv8bf16) {
5828	SelectMultiVectorMoveZ(N: Node, NumVecs: `4`, Op: AArch64::MOVAZ_4ZMI_V_H_PSEUDO, MaxIdx: `4`, Scale: `4`);
5829	return;
5830	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
5831	SelectMultiVectorMoveZ(N: Node, NumVecs: `4`, Op: AArch64::MOVAZ_4ZMI_V_S_PSEUDO, MaxIdx: `0`, Scale: `4`);
5832	return;
5833	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
5834	SelectMultiVectorMoveZ(N: Node, NumVecs: `4`, Op: AArch64::MOVAZ_4ZMI_V_D_PSEUDO, MaxIdx: `0`, Scale: `4`);
5835	return;
5836	}
5837	break;
5838	}
5839	case Intrinsic::aarch64_sme_readz_x2: {
5840	SelectMultiVectorMoveZ(N: Node, NumVecs: `2`, Op: AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, MaxIdx: `7`, Scale: `1`,
5841	BaseReg: AArch64::ZA);
5842	return;
5843	}
5844	case Intrinsic::aarch64_sme_readz_x4: {
5845	SelectMultiVectorMoveZ(N: Node, NumVecs: `4`, Op: AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, MaxIdx: `7`, Scale: `1`,
5846	BaseReg: AArch64::ZA);
5847	return;
5848	}
5849	case Intrinsic::swift_async_context_addr: {
5850	SDLoc DL(Node);
5851	SDValue Chain = Node->getOperand(Num: `0`);
5852	SDValue CopyFP = CurDAG->getCopyFromReg(Chain, dl: DL, Reg: AArch64::FP, VT: MVT::i64);
5853	SDValue Res = SDValue (
5854	CurDAG->getMachineNode(Opcode: AArch64::SUBXri, dl: DL, VT: MVT::i64, Op1: CopyFP,
5855	Op2: CurDAG->getTargetConstant(Val: `8`, DL, VT: MVT::i32),
5856	Op3: CurDAG->getTargetConstant(Val: `0`, DL, VT: MVT::i32)),
5857	`0`);
5858	ReplaceUses(F: SDValue (Node, `0`), T: Res);
5859	ReplaceUses(F: SDValue (Node, `1`), T: CopyFP.getValue(R: `1`));
5860	CurDAG->RemoveDeadNode(N: Node);
5861
5862	auto &MF = CurDAG->getMachineFunction();
5863	MF.getFrameInfo().setFrameAddressIsTaken(true);
5864	MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5865	return;
5866	}
5867	case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5868	if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5869	VT: Node->getValueType(ResNo: `0`),
5870	Opcodes: {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5871	AArch64::LUTI2_4ZTZI_S}))
5872	// Second Immediate must be <= 3:
5873	SelectMultiVectorLutiLane(Node, NumOutVecs: `4`, Opc, MaxImm: `3`);
5874	return;
5875	}
5876	case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5877	if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5878	VT: Node->getValueType(ResNo: `0`),
5879	Opcodes: {`0`, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5880	// Second Immediate must be <= 1:
5881	SelectMultiVectorLutiLane(Node, NumOutVecs: `4`, Opc, MaxImm: `1`);
5882	return;
5883	}
5884	case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5885	if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5886	VT: Node->getValueType(ResNo: `0`),
5887	Opcodes: {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5888	AArch64::LUTI2_2ZTZI_S}))
5889	// Second Immediate must be <= 7:
5890	SelectMultiVectorLutiLane(Node, NumOutVecs: `2`, Opc, MaxImm: `7`);
5891	return;
5892	}
5893	case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5894	if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5895	VT: Node->getValueType(ResNo: `0`),
5896	Opcodes: {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5897	AArch64::LUTI4_2ZTZI_S}))
5898	// Second Immediate must be <= 3:
5899	SelectMultiVectorLutiLane(Node, NumOutVecs: `2`, Opc, MaxImm: `3`);
5900	return;
5901	}
5902	case Intrinsic::aarch64_sme_luti4_zt_x4: {
5903	SelectMultiVectorLuti(Node, NumOutVecs: `4`, Opc: AArch64::LUTI4_4ZZT2Z);
5904	return;
5905	}
5906	case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5907	if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5908	VT: Node->getValueType(ResNo: `0`),
5909	Opcodes: {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5910	SelectCVTIntrinsicFP8(N: Node, NumVecs: `2`, Opcode: Opc);
5911	return;
5912	case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5913	if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5914	VT: Node->getValueType(ResNo: `0`),
5915	Opcodes: {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
5916	SelectCVTIntrinsicFP8(N: Node, NumVecs: `2`, Opcode: Opc);
5917	return;
5918	case Intrinsic::aarch64_sve_fp8_cvt1_x2:
5919	if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5920	VT: Node->getValueType(ResNo: `0`),
5921	Opcodes: {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
5922	SelectCVTIntrinsicFP8(N: Node, NumVecs: `2`, Opcode: Opc);
5923	return;
5924	case Intrinsic::aarch64_sve_fp8_cvt2_x2:
5925	if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
5926	VT: Node->getValueType(ResNo: `0`),
5927	Opcodes: {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
5928	SelectCVTIntrinsicFP8(N: Node, NumVecs: `2`, Opcode: Opc);
5929	return;
5930	case Intrinsic::ptrauth_resign_load_relative:
5931	SelectPtrauthResign(N: Node);
5932	return;
5933	}
5934	} break;
5935	case ISD::INTRINSIC_WO_CHAIN: {
5936	unsigned IntNo = Node->getConstantOperandVal(Num: `0`);
5937	switch (IntNo) {
5938	default:
5939	break;
5940	case Intrinsic::aarch64_tagp:
5941	SelectTagP(N: Node);
5942	return;
5943
5944	case Intrinsic::ptrauth_auth:
5945	SelectPtrauthAuth(N: Node);
5946	return;
5947
5948	case Intrinsic::ptrauth_resign:
5949	SelectPtrauthResign(N: Node);
5950	return;
5951
5952	case Intrinsic::aarch64_neon_tbl2:
5953	SelectTable(N: Node, NumVecs: `2`,
5954	Opc: VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5955	isExt: false);
5956	return;
5957	case Intrinsic::aarch64_neon_tbl3:
5958	SelectTable(N: Node, NumVecs: `3`, Opc: VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5959	: AArch64::TBLv16i8Three,
5960	isExt: false);
5961	return;
5962	case Intrinsic::aarch64_neon_tbl4:
5963	SelectTable(N: Node, NumVecs: `4`, Opc: VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5964	: AArch64::TBLv16i8Four,
5965	isExt: false);
5966	return;
5967	case Intrinsic::aarch64_neon_tbx2:
5968	SelectTable(N: Node, NumVecs: `2`,
5969	Opc: VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5970	isExt: true);
5971	return;
5972	case Intrinsic::aarch64_neon_tbx3:
5973	SelectTable(N: Node, NumVecs: `3`, Opc: VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5974	: AArch64::TBXv16i8Three,
5975	isExt: true);
5976	return;
5977	case Intrinsic::aarch64_neon_tbx4:
5978	SelectTable(N: Node, NumVecs: `4`, Opc: VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5979	: AArch64::TBXv16i8Four,
5980	isExt: true);
5981	return;
5982	case Intrinsic::aarch64_sve_srshl_single_x2:
5983	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5984	VT: Node->getValueType(ResNo: `0`),
5985	Opcodes: {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5986	AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5987	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
5988	return;
5989	case Intrinsic::aarch64_sve_srshl_single_x4:
5990	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5991	VT: Node->getValueType(ResNo: `0`),
5992	Opcodes: {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5993	AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5994	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
5995	return;
5996	case Intrinsic::aarch64_sve_urshl_single_x2:
5997	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5998	VT: Node->getValueType(ResNo: `0`),
5999	Opcodes: {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
6000	AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
6001	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
6002	return;
6003	case Intrinsic::aarch64_sve_urshl_single_x4:
6004	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6005	VT: Node->getValueType(ResNo: `0`),
6006	Opcodes: {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
6007	AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
6008	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
6009	return;
6010	case Intrinsic::aarch64_sve_srshl_x2:
6011	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6012	VT: Node->getValueType(ResNo: `0`),
6013	Opcodes: {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
6014	AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
6015	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
6016	return;
6017	case Intrinsic::aarch64_sve_srshl_x4:
6018	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6019	VT: Node->getValueType(ResNo: `0`),
6020	Opcodes: {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
6021	AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
6022	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
6023	return;
6024	case Intrinsic::aarch64_sve_urshl_x2:
6025	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6026	VT: Node->getValueType(ResNo: `0`),
6027	Opcodes: {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
6028	AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
6029	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
6030	return;
6031	case Intrinsic::aarch64_sve_urshl_x4:
6032	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6033	VT: Node->getValueType(ResNo: `0`),
6034	Opcodes: {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
6035	AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
6036	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
6037	return;
6038	case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
6039	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6040	VT: Node->getValueType(ResNo: `0`),
6041	Opcodes: {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
6042	AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
6043	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
6044	return;
6045	case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
6046	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6047	VT: Node->getValueType(ResNo: `0`),
6048	Opcodes: {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
6049	AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
6050	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
6051	return;
6052	case Intrinsic::aarch64_sve_sqdmulh_vgx2:
6053	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6054	VT: Node->getValueType(ResNo: `0`),
6055	Opcodes: {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
6056	AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
6057	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
6058	return;
6059	case Intrinsic::aarch64_sve_sqdmulh_vgx4:
6060	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6061	VT: Node->getValueType(ResNo: `0`),
6062	Opcodes: {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
6063	AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
6064	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
6065	return;
6066	case Intrinsic::aarch64_sme_fp8_scale_single_x2:
6067	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6068	VT: Node->getValueType(ResNo: `0`),
6069	Opcodes: {`0`, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
6070	AArch64::FSCALE_2ZZ_D}))
6071	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
6072	return;
6073	case Intrinsic::aarch64_sme_fp8_scale_single_x4:
6074	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6075	VT: Node->getValueType(ResNo: `0`),
6076	Opcodes: {`0`, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
6077	AArch64::FSCALE_4ZZ_D}))
6078	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
6079	return;
6080	case Intrinsic::aarch64_sme_fp8_scale_x2:
6081	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6082	VT: Node->getValueType(ResNo: `0`),
6083	Opcodes: {`0`, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
6084	AArch64::FSCALE_2Z2Z_D}))
6085	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
6086	return;
6087	case Intrinsic::aarch64_sme_fp8_scale_x4:
6088	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6089	VT: Node->getValueType(ResNo: `0`),
6090	Opcodes: {`0`, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
6091	AArch64::FSCALE_4Z4Z_D}))
6092	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
6093	return;
6094	case Intrinsic::aarch64_sve_whilege_x2:
6095	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6096	VT: Node->getValueType(ResNo: `0`),
6097	Opcodes: {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
6098	AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
6099	SelectWhilePair(N: Node, Opc: Op);
6100	return;
6101	case Intrinsic::aarch64_sve_whilegt_x2:
6102	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6103	VT: Node->getValueType(ResNo: `0`),
6104	Opcodes: {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
6105	AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
6106	SelectWhilePair(N: Node, Opc: Op);
6107	return;
6108	case Intrinsic::aarch64_sve_whilehi_x2:
6109	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6110	VT: Node->getValueType(ResNo: `0`),
6111	Opcodes: {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
6112	AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
6113	SelectWhilePair(N: Node, Opc: Op);
6114	return;
6115	case Intrinsic::aarch64_sve_whilehs_x2:
6116	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6117	VT: Node->getValueType(ResNo: `0`),
6118	Opcodes: {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
6119	AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
6120	SelectWhilePair(N: Node, Opc: Op);
6121	return;
6122	case Intrinsic::aarch64_sve_whilele_x2:
6123	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6124	VT: Node->getValueType(ResNo: `0`),
6125	Opcodes: {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
6126	AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
6127	SelectWhilePair(N: Node, Opc: Op);
6128	return;
6129	case Intrinsic::aarch64_sve_whilelo_x2:
6130	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6131	VT: Node->getValueType(ResNo: `0`),
6132	Opcodes: {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
6133	AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
6134	SelectWhilePair(N: Node, Opc: Op);
6135	return;
6136	case Intrinsic::aarch64_sve_whilels_x2:
6137	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6138	VT: Node->getValueType(ResNo: `0`),
6139	Opcodes: {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
6140	AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
6141	SelectWhilePair(N: Node, Opc: Op);
6142	return;
6143	case Intrinsic::aarch64_sve_whilelt_x2:
6144	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
6145	VT: Node->getValueType(ResNo: `0`),
6146	Opcodes: {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
6147	AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
6148	SelectWhilePair(N: Node, Opc: Op);
6149	return;
6150	case Intrinsic::aarch64_sve_smax_single_x2:
6151	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6152	VT: Node->getValueType(ResNo: `0`),
6153	Opcodes: {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6154	AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6155	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
6156	return;
6157	case Intrinsic::aarch64_sve_umax_single_x2:
6158	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6159	VT: Node->getValueType(ResNo: `0`),
6160	Opcodes: {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6161	AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6162	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
6163	return;
6164	case Intrinsic::aarch64_sve_fmax_single_x2:
6165	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6166	VT: Node->getValueType(ResNo: `0`),
6167	Opcodes: {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6168	AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6169	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
6170	return;
6171	case Intrinsic::aarch64_sve_smax_single_x4:
6172	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6173	VT: Node->getValueType(ResNo: `0`),
6174	Opcodes: {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6175	AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6176	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
6177	return;
6178	case Intrinsic::aarch64_sve_umax_single_x4:
6179	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6180	VT: Node->getValueType(ResNo: `0`),
6181	Opcodes: {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6182	AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6183	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
6184	return;
6185	case Intrinsic::aarch64_sve_fmax_single_x4:
6186	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6187	VT: Node->getValueType(ResNo: `0`),
6188	Opcodes: {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6189	AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6190	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
6191	return;
6192	case Intrinsic::aarch64_sve_smin_single_x2:
6193	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6194	VT: Node->getValueType(ResNo: `0`),
6195	Opcodes: {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6196	AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6197	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
6198	return;
6199	case Intrinsic::aarch64_sve_umin_single_x2:
6200	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6201	VT: Node->getValueType(ResNo: `0`),
6202	Opcodes: {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6203	AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6204	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
6205	return;
6206	case Intrinsic::aarch64_sve_fmin_single_x2:
6207	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6208	VT: Node->getValueType(ResNo: `0`),
6209	Opcodes: {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6210	AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6211	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
6212	return;
6213	case Intrinsic::aarch64_sve_smin_single_x4:
6214	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6215	VT: Node->getValueType(ResNo: `0`),
6216	Opcodes: {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6217	AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6218	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
6219	return;
6220	case Intrinsic::aarch64_sve_umin_single_x4:
6221	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6222	VT: Node->getValueType(ResNo: `0`),
6223	Opcodes: {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6224	AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6225	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
6226	return;
6227	case Intrinsic::aarch64_sve_fmin_single_x4:
6228	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6229	VT: Node->getValueType(ResNo: `0`),
6230	Opcodes: {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6231	AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6232	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
6233	return;
6234	case Intrinsic::aarch64_sve_smax_x2:
6235	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6236	VT: Node->getValueType(ResNo: `0`),
6237	Opcodes: {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6238	AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6239	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
6240	return;
6241	case Intrinsic::aarch64_sve_umax_x2:
6242	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6243	VT: Node->getValueType(ResNo: `0`),
6244	Opcodes: {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6245	AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6246	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
6247	return;
6248	case Intrinsic::aarch64_sve_fmax_x2:
6249	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6250	VT: Node->getValueType(ResNo: `0`),
6251	Opcodes: {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6252	AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6253	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
6254	return;
6255	case Intrinsic::aarch64_sve_smax_x4:
6256	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6257	VT: Node->getValueType(ResNo: `0`),
6258	Opcodes: {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6259	AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6260	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
6261	return;
6262	case Intrinsic::aarch64_sve_umax_x4:
6263	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6264	VT: Node->getValueType(ResNo: `0`),
6265	Opcodes: {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6266	AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6267	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
6268	return;
6269	case Intrinsic::aarch64_sve_fmax_x4:
6270	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6271	VT: Node->getValueType(ResNo: `0`),
6272	Opcodes: {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6273	AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6274	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
6275	return;
6276	case Intrinsic::aarch64_sme_famax_x2:
6277	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6278	VT: Node->getValueType(ResNo: `0`),
6279	Opcodes: {`0`, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6280	AArch64::FAMAX_2Z2Z_D}))
6281	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
6282	return;
6283	case Intrinsic::aarch64_sme_famax_x4:
6284	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6285	VT: Node->getValueType(ResNo: `0`),
6286	Opcodes: {`0`, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6287	AArch64::FAMAX_4Z4Z_D}))
6288	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
6289	return;
6290	case Intrinsic::aarch64_sme_famin_x2:
6291	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6292	VT: Node->getValueType(ResNo: `0`),
6293	Opcodes: {`0`, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6294	AArch64::FAMIN_2Z2Z_D}))
6295	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
6296	return;
6297	case Intrinsic::aarch64_sme_famin_x4:
6298	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6299	VT: Node->getValueType(ResNo: `0`),
6300	Opcodes: {`0`, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6301	AArch64::FAMIN_4Z4Z_D}))
6302	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
6303	return;
6304	case Intrinsic::aarch64_sve_smin_x2:
6305	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6306	VT: Node->getValueType(ResNo: `0`),
6307	Opcodes: {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6308	AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6309	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
6310	return;
6311	case Intrinsic::aarch64_sve_umin_x2:
6312	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6313	VT: Node->getValueType(ResNo: `0`),
6314	Opcodes: {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6315	AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6316	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
6317	return;
6318	case Intrinsic::aarch64_sve_fmin_x2:
6319	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6320	VT: Node->getValueType(ResNo: `0`),
6321	Opcodes: {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6322	AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6323	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
6324	return;
6325	case Intrinsic::aarch64_sve_smin_x4:
6326	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6327	VT: Node->getValueType(ResNo: `0`),
6328	Opcodes: {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6329	AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6330	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
6331	return;
6332	case Intrinsic::aarch64_sve_umin_x4:
6333	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6334	VT: Node->getValueType(ResNo: `0`),
6335	Opcodes: {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6336	AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6337	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
6338	return;
6339	case Intrinsic::aarch64_sve_fmin_x4:
6340	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6341	VT: Node->getValueType(ResNo: `0`),
6342	Opcodes: {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6343	AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6344	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
6345	return;
6346	case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6347	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6348	VT: Node->getValueType(ResNo: `0`),
6349	Opcodes: {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6350	AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6351	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
6352	return;
6353	case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6354	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6355	VT: Node->getValueType(ResNo: `0`),
6356	Opcodes: {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6357	AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6358	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
6359	return;
6360	case Intrinsic::aarch64_sve_fminnm_single_x2:
6361	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6362	VT: Node->getValueType(ResNo: `0`),
6363	Opcodes: {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6364	AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6365	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
6366	return;
6367	case Intrinsic::aarch64_sve_fminnm_single_x4:
6368	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6369	VT: Node->getValueType(ResNo: `0`),
6370	Opcodes: {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6371	AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6372	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
6373	return;
6374	case Intrinsic::aarch64_sve_fscale_single_x4:
6375	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: AArch64::BFSCALE_4ZZ);
6376	return;
6377	case Intrinsic::aarch64_sve_fscale_single_x2:
6378	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: AArch64::BFSCALE_2ZZ);
6379	return;
6380	case Intrinsic::aarch64_sve_fmul_single_x4:
6381	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6382	VT: Node->getValueType(ResNo: `0`),
6383	Opcodes: {AArch64::BFMUL_4ZZ, AArch64::FMUL_4ZZ_H, AArch64::FMUL_4ZZ_S,
6384	AArch64::FMUL_4ZZ_D}))
6385	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
6386	return;
6387	case Intrinsic::aarch64_sve_fmul_single_x2:
6388	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6389	VT: Node->getValueType(ResNo: `0`),
6390	Opcodes: {AArch64::BFMUL_2ZZ, AArch64::FMUL_2ZZ_H, AArch64::FMUL_2ZZ_S,
6391	AArch64::FMUL_2ZZ_D}))
6392	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
6393	return;
6394	case Intrinsic::aarch64_sve_fmaxnm_x2:
6395	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6396	VT: Node->getValueType(ResNo: `0`),
6397	Opcodes: {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6398	AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6399	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
6400	return;
6401	case Intrinsic::aarch64_sve_fmaxnm_x4:
6402	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6403	VT: Node->getValueType(ResNo: `0`),
6404	Opcodes: {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6405	AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6406	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
6407	return;
6408	case Intrinsic::aarch64_sve_fminnm_x2:
6409	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6410	VT: Node->getValueType(ResNo: `0`),
6411	Opcodes: {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6412	AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6413	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
6414	return;
6415	case Intrinsic::aarch64_sve_fminnm_x4:
6416	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6417	VT: Node->getValueType(ResNo: `0`),
6418	Opcodes: {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6419	AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6420	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
6421	return;
6422	case Intrinsic::aarch64_sve_aese_lane_x2:
6423	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: AArch64::AESE_2ZZI_B);
6424	return;
6425	case Intrinsic::aarch64_sve_aesd_lane_x2:
6426	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: AArch64::AESD_2ZZI_B);
6427	return;
6428	case Intrinsic::aarch64_sve_aesemc_lane_x2:
6429	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: AArch64::AESEMC_2ZZI_B);
6430	return;
6431	case Intrinsic::aarch64_sve_aesdimc_lane_x2:
6432	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: AArch64::AESDIMC_2ZZI_B);
6433	return;
6434	case Intrinsic::aarch64_sve_aese_lane_x4:
6435	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: AArch64::AESE_4ZZI_B);
6436	return;
6437	case Intrinsic::aarch64_sve_aesd_lane_x4:
6438	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: AArch64::AESD_4ZZI_B);
6439	return;
6440	case Intrinsic::aarch64_sve_aesemc_lane_x4:
6441	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: AArch64::AESEMC_4ZZI_B);
6442	return;
6443	case Intrinsic::aarch64_sve_aesdimc_lane_x4:
6444	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: AArch64::AESDIMC_4ZZI_B);
6445	return;
6446	case Intrinsic::aarch64_sve_pmlal_pair_x2:
6447	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: AArch64::PMLAL_2ZZZ_Q);
6448	return;
6449	case Intrinsic::aarch64_sve_pmull_pair_x2: {
6450	SDLoc DL(Node);
6451	SmallVector<SDValue, `4`> Regs(Node->ops().slice(N: `1`, M: `2`));
6452	SDNode *Res =
6453	CurDAG->getMachineNode(Opcode: AArch64::PMULL_2ZZZ_Q, dl: DL, VT: MVT::Untyped, Ops: Regs);
6454	SDValue SuperReg = SDValue (Res, `0`);
6455	for (unsigned I = `0`; I < `2`; I++)
6456	ReplaceUses(F: SDValue (Node, I),
6457	T: CurDAG->getTargetExtractSubreg(SRIdx: AArch64::zsub0 + I, DL, VT,
6458	Operand: SuperReg));
6459	CurDAG->RemoveDeadNode(N: Node);
6460	return;
6461	}
6462	case Intrinsic::aarch64_sve_fscale_x4:
6463	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: AArch64::BFSCALE_4Z4Z);
6464	return;
6465	case Intrinsic::aarch64_sve_fscale_x2:
6466	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: AArch64::BFSCALE_2Z2Z);
6467	return;
6468	case Intrinsic::aarch64_sve_fmul_x4:
6469	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6470	VT: Node->getValueType(ResNo: `0`),
6471	Opcodes: {AArch64::BFMUL_4Z4Z, AArch64::FMUL_4Z4Z_H, AArch64::FMUL_4Z4Z_S,
6472	AArch64::FMUL_4Z4Z_D}))
6473	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op);
6474	return;
6475	case Intrinsic::aarch64_sve_fmul_x2:
6476	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6477	VT: Node->getValueType(ResNo: `0`),
6478	Opcodes: {AArch64::BFMUL_2Z2Z, AArch64::FMUL_2Z2Z_H, AArch64::FMUL_2Z2Z_S,
6479	AArch64::FMUL_2Z2Z_D}))
6480	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op);
6481	return;
6482	case Intrinsic::aarch64_sve_fcvtzs_x2:
6483	SelectCVTIntrinsic(N: Node, NumVecs: `2`, Opcode: AArch64::FCVTZS_2Z2Z_StoS);
6484	return;
6485	case Intrinsic::aarch64_sve_scvtf_x2:
6486	SelectCVTIntrinsic(N: Node, NumVecs: `2`, Opcode: AArch64::SCVTF_2Z2Z_StoS);
6487	return;
6488	case Intrinsic::aarch64_sve_fcvtzu_x2:
6489	SelectCVTIntrinsic(N: Node, NumVecs: `2`, Opcode: AArch64::FCVTZU_2Z2Z_StoS);
6490	return;
6491	case Intrinsic::aarch64_sve_ucvtf_x2:
6492	SelectCVTIntrinsic(N: Node, NumVecs: `2`, Opcode: AArch64::UCVTF_2Z2Z_StoS);
6493	return;
6494	case Intrinsic::aarch64_sve_fcvtzs_x4:
6495	SelectCVTIntrinsic(N: Node, NumVecs: `4`, Opcode: AArch64::FCVTZS_4Z4Z_StoS);
6496	return;
6497	case Intrinsic::aarch64_sve_scvtf_x4:
6498	SelectCVTIntrinsic(N: Node, NumVecs: `4`, Opcode: AArch64::SCVTF_4Z4Z_StoS);
6499	return;
6500	case Intrinsic::aarch64_sve_fcvtzu_x4:
6501	SelectCVTIntrinsic(N: Node, NumVecs: `4`, Opcode: AArch64::FCVTZU_4Z4Z_StoS);
6502	return;
6503	case Intrinsic::aarch64_sve_ucvtf_x4:
6504	SelectCVTIntrinsic(N: Node, NumVecs: `4`, Opcode: AArch64::UCVTF_4Z4Z_StoS);
6505	return;
6506	case Intrinsic::aarch64_sve_fcvt_widen_x2:
6507	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `2`, IsTupleInput: false, Opc: AArch64::FCVT_2ZZ_H_S);
6508	return;
6509	case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6510	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `2`, IsTupleInput: false, Opc: AArch64::FCVTL_2ZZ_H_S);
6511	return;
6512	case Intrinsic::aarch64_sve_sclamp_single_x2:
6513	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6514	VT: Node->getValueType(ResNo: `0`),
6515	Opcodes: {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6516	AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6517	SelectClamp(N: Node, NumVecs: `2`, Op);
6518	return;
6519	case Intrinsic::aarch64_sve_uclamp_single_x2:
6520	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6521	VT: Node->getValueType(ResNo: `0`),
6522	Opcodes: {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6523	AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6524	SelectClamp(N: Node, NumVecs: `2`, Op);
6525	return;
6526	case Intrinsic::aarch64_sve_fclamp_single_x2:
6527	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6528	VT: Node->getValueType(ResNo: `0`),
6529	Opcodes: {`0`, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6530	AArch64::FCLAMP_VG2_2Z2Z_D}))
6531	SelectClamp(N: Node, NumVecs: `2`, Op);
6532	return;
6533	case Intrinsic::aarch64_sve_bfclamp_single_x2:
6534	SelectClamp(N: Node, NumVecs: `2`, Op: AArch64::BFCLAMP_VG2_2ZZZ_H);
6535	return;
6536	case Intrinsic::aarch64_sve_sclamp_single_x4:
6537	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6538	VT: Node->getValueType(ResNo: `0`),
6539	Opcodes: {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6540	AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6541	SelectClamp(N: Node, NumVecs: `4`, Op);
6542	return;
6543	case Intrinsic::aarch64_sve_uclamp_single_x4:
6544	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6545	VT: Node->getValueType(ResNo: `0`),
6546	Opcodes: {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6547	AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6548	SelectClamp(N: Node, NumVecs: `4`, Op);
6549	return;
6550	case Intrinsic::aarch64_sve_fclamp_single_x4:
6551	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6552	VT: Node->getValueType(ResNo: `0`),
6553	Opcodes: {`0`, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6554	AArch64::FCLAMP_VG4_4Z4Z_D}))
6555	SelectClamp(N: Node, NumVecs: `4`, Op);
6556	return;
6557	case Intrinsic::aarch64_sve_bfclamp_single_x4:
6558	SelectClamp(N: Node, NumVecs: `4`, Op: AArch64::BFCLAMP_VG4_4ZZZ_H);
6559	return;
6560	case Intrinsic::aarch64_sve_add_single_x2:
6561	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6562	VT: Node->getValueType(ResNo: `0`),
6563	Opcodes: {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6564	AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6565	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: false, Opcode: Op);
6566	return;
6567	case Intrinsic::aarch64_sve_add_single_x4:
6568	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6569	VT: Node->getValueType(ResNo: `0`),
6570	Opcodes: {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6571	AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6572	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: false, Opcode: Op);
6573	return;
6574	case Intrinsic::aarch64_sve_zip_x2:
6575	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6576	VT: Node->getValueType(ResNo: `0`),
6577	Opcodes: {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6578	AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6579	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `2`, /IsTupleInput=/false, Opc: Op);
6580	return;
6581	case Intrinsic::aarch64_sve_zipq_x2:
6582	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `2`, /IsTupleInput=/false,
6583	Opc: AArch64::ZIP_VG2_2ZZZ_Q);
6584	return;
6585	case Intrinsic::aarch64_sve_zip_x4:
6586	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6587	VT: Node->getValueType(ResNo: `0`),
6588	Opcodes: {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6589	AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6590	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `4`, /IsTupleInput=/true, Opc: Op);
6591	return;
6592	case Intrinsic::aarch64_sve_zipq_x4:
6593	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `4`, /IsTupleInput=/true,
6594	Opc: AArch64::ZIP_VG4_4Z4Z_Q);
6595	return;
6596	case Intrinsic::aarch64_sve_uzp_x2:
6597	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6598	VT: Node->getValueType(ResNo: `0`),
6599	Opcodes: {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6600	AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6601	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `2`, /IsTupleInput=/false, Opc: Op);
6602	return;
6603	case Intrinsic::aarch64_sve_uzpq_x2:
6604	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `2`, /IsTupleInput=/false,
6605	Opc: AArch64::UZP_VG2_2ZZZ_Q);
6606	return;
6607	case Intrinsic::aarch64_sve_uzp_x4:
6608	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6609	VT: Node->getValueType(ResNo: `0`),
6610	Opcodes: {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6611	AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6612	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `4`, /IsTupleInput=/true, Opc: Op);
6613	return;
6614	case Intrinsic::aarch64_sve_uzpq_x4:
6615	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `4`, /IsTupleInput=/true,
6616	Opc: AArch64::UZP_VG4_4Z4Z_Q);
6617	return;
6618	case Intrinsic::aarch64_sve_sel_x2:
6619	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6620	VT: Node->getValueType(ResNo: `0`),
6621	Opcodes: {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6622	AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6623	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `2`, IsZmMulti: true, Opcode: Op, /HasPred=/true);
6624	return;
6625	case Intrinsic::aarch64_sve_sel_x4:
6626	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6627	VT: Node->getValueType(ResNo: `0`),
6628	Opcodes: {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6629	AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6630	SelectDestructiveMultiIntrinsic(N: Node, NumVecs: `4`, IsZmMulti: true, Opcode: Op, /HasPred=/true);
6631	return;
6632	case Intrinsic::aarch64_sve_frinta_x2:
6633	SelectFrintFromVT(N: Node, NumVecs: `2`, Opcode: AArch64::FRINTA_2Z2Z_S);
6634	return;
6635	case Intrinsic::aarch64_sve_frinta_x4:
6636	SelectFrintFromVT(N: Node, NumVecs: `4`, Opcode: AArch64::FRINTA_4Z4Z_S);
6637	return;
6638	case Intrinsic::aarch64_sve_frintm_x2:
6639	SelectFrintFromVT(N: Node, NumVecs: `2`, Opcode: AArch64::FRINTM_2Z2Z_S);
6640	return;
6641	case Intrinsic::aarch64_sve_frintm_x4:
6642	SelectFrintFromVT(N: Node, NumVecs: `4`, Opcode: AArch64::FRINTM_4Z4Z_S);
6643	return;
6644	case Intrinsic::aarch64_sve_frintn_x2:
6645	SelectFrintFromVT(N: Node, NumVecs: `2`, Opcode: AArch64::FRINTN_2Z2Z_S);
6646	return;
6647	case Intrinsic::aarch64_sve_frintn_x4:
6648	SelectFrintFromVT(N: Node, NumVecs: `4`, Opcode: AArch64::FRINTN_4Z4Z_S);
6649	return;
6650	case Intrinsic::aarch64_sve_frintp_x2:
6651	SelectFrintFromVT(N: Node, NumVecs: `2`, Opcode: AArch64::FRINTP_2Z2Z_S);
6652	return;
6653	case Intrinsic::aarch64_sve_frintp_x4:
6654	SelectFrintFromVT(N: Node, NumVecs: `4`, Opcode: AArch64::FRINTP_4Z4Z_S);
6655	return;
6656	case Intrinsic::aarch64_sve_sunpk_x2:
6657	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6658	VT: Node->getValueType(ResNo: `0`),
6659	Opcodes: {`0`, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6660	AArch64::SUNPK_VG2_2ZZ_D}))
6661	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `2`, /IsTupleInput=/false, Opc: Op);
6662	return;
6663	case Intrinsic::aarch64_sve_uunpk_x2:
6664	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6665	VT: Node->getValueType(ResNo: `0`),
6666	Opcodes: {`0`, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6667	AArch64::UUNPK_VG2_2ZZ_D}))
6668	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `2`, /IsTupleInput=/false, Opc: Op);
6669	return;
6670	case Intrinsic::aarch64_sve_sunpk_x4:
6671	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6672	VT: Node->getValueType(ResNo: `0`),
6673	Opcodes: {`0`, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6674	AArch64::SUNPK_VG4_4Z2Z_D}))
6675	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `4`, /IsTupleInput=/true, Opc: Op);
6676	return;
6677	case Intrinsic::aarch64_sve_uunpk_x4:
6678	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6679	VT: Node->getValueType(ResNo: `0`),
6680	Opcodes: {`0`, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6681	AArch64::UUNPK_VG4_4Z2Z_D}))
6682	SelectUnaryMultiIntrinsic(N: Node, NumOutVecs: `4`, /IsTupleInput=/true, Opc: Op);
6683	return;
6684	case Intrinsic::aarch64_sve_pext_x2: {
6685	if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6686	VT: Node->getValueType(ResNo: `0`),
6687	Opcodes: {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6688	AArch64::PEXT_2PCI_D}))
6689	SelectPExtPair(N: Node, Opc: Op);
6690	return;
6691	}
6692	}
6693	break;
6694	}
6695	case ISD::INTRINSIC_VOID: {
6696	unsigned IntNo = Node->getConstantOperandVal(Num: `1`);
6697	if (Node->getNumOperands() >= `3`)
6698	VT = Node->getOperand(Num: `2`)->getValueType(ResNo: `0`);
6699	switch (IntNo) {
6700	default:
6701	break;
6702	case Intrinsic::aarch64_neon_st1x2: {
6703	if (VT == MVT::v8i8) {
6704	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov8b);
6705	return;
6706	} else if (VT == MVT::v16i8) {
6707	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov16b);
6708	return;
6709	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6710	VT == MVT::v4bf16) {
6711	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov4h);
6712	return;
6713	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\|
6714	VT == MVT::v8bf16) {
6715	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov8h);
6716	return;
6717	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6718	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov2s);
6719	return;
6720	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6721	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov4s);
6722	return;
6723	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6724	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov2d);
6725	return;
6726	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6727	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov1d);
6728	return;
6729	}
6730	break;
6731	}
6732	case Intrinsic::aarch64_neon_st1x3: {
6733	if (VT == MVT::v8i8) {
6734	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev8b);
6735	return;
6736	} else if (VT == MVT::v16i8) {
6737	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev16b);
6738	return;
6739	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6740	VT == MVT::v4bf16) {
6741	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev4h);
6742	return;
6743	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\|
6744	VT == MVT::v8bf16) {
6745	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev8h);
6746	return;
6747	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6748	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev2s);
6749	return;
6750	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6751	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev4s);
6752	return;
6753	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6754	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev2d);
6755	return;
6756	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6757	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev1d);
6758	return;
6759	}
6760	break;
6761	}
6762	case Intrinsic::aarch64_neon_st1x4: {
6763	if (VT == MVT::v8i8) {
6764	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv8b);
6765	return;
6766	} else if (VT == MVT::v16i8) {
6767	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv16b);
6768	return;
6769	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6770	VT == MVT::v4bf16) {
6771	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv4h);
6772	return;
6773	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\|
6774	VT == MVT::v8bf16) {
6775	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv8h);
6776	return;
6777	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6778	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv2s);
6779	return;
6780	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6781	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv4s);
6782	return;
6783	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6784	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv2d);
6785	return;
6786	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6787	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv1d);
6788	return;
6789	}
6790	break;
6791	}
6792	case Intrinsic::aarch64_neon_st2: {
6793	if (VT == MVT::v8i8) {
6794	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov8b);
6795	return;
6796	} else if (VT == MVT::v16i8) {
6797	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov16b);
6798	return;
6799	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6800	VT == MVT::v4bf16) {
6801	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov4h);
6802	return;
6803	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\|
6804	VT == MVT::v8bf16) {
6805	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov8h);
6806	return;
6807	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6808	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov2s);
6809	return;
6810	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6811	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov4s);
6812	return;
6813	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6814	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov2d);
6815	return;
6816	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6817	SelectStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov1d);
6818	return;
6819	}
6820	break;
6821	}
6822	case Intrinsic::aarch64_neon_st3: {
6823	if (VT == MVT::v8i8) {
6824	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev8b);
6825	return;
6826	} else if (VT == MVT::v16i8) {
6827	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev16b);
6828	return;
6829	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6830	VT == MVT::v4bf16) {
6831	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev4h);
6832	return;
6833	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\|
6834	VT == MVT::v8bf16) {
6835	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev8h);
6836	return;
6837	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6838	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev2s);
6839	return;
6840	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6841	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev4s);
6842	return;
6843	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6844	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev2d);
6845	return;
6846	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6847	SelectStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev1d);
6848	return;
6849	}
6850	break;
6851	}
6852	case Intrinsic::aarch64_neon_st4: {
6853	if (VT == MVT::v8i8) {
6854	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv8b);
6855	return;
6856	} else if (VT == MVT::v16i8) {
6857	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv16b);
6858	return;
6859	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6860	VT == MVT::v4bf16) {
6861	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv4h);
6862	return;
6863	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\|
6864	VT == MVT::v8bf16) {
6865	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv8h);
6866	return;
6867	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
6868	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv2s);
6869	return;
6870	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
6871	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv4s);
6872	return;
6873	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
6874	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv2d);
6875	return;
6876	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
6877	SelectStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv1d);
6878	return;
6879	}
6880	break;
6881	}
6882	case Intrinsic::aarch64_neon_st2lane: {
6883	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
6884	SelectStoreLane(N: Node, NumVecs: `2`, Opc: AArch64::ST2i8);
6885	return;
6886	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6887	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
6888	SelectStoreLane(N: Node, NumVecs: `2`, Opc: AArch64::ST2i16);
6889	return;
6890	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
6891	VT == MVT::v2f32) {
6892	SelectStoreLane(N: Node, NumVecs: `2`, Opc: AArch64::ST2i32);
6893	return;
6894	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
6895	VT == MVT::v1f64) {
6896	SelectStoreLane(N: Node, NumVecs: `2`, Opc: AArch64::ST2i64);
6897	return;
6898	}
6899	break;
6900	}
6901	case Intrinsic::aarch64_neon_st3lane: {
6902	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
6903	SelectStoreLane(N: Node, NumVecs: `3`, Opc: AArch64::ST3i8);
6904	return;
6905	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6906	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
6907	SelectStoreLane(N: Node, NumVecs: `3`, Opc: AArch64::ST3i16);
6908	return;
6909	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
6910	VT == MVT::v2f32) {
6911	SelectStoreLane(N: Node, NumVecs: `3`, Opc: AArch64::ST3i32);
6912	return;
6913	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
6914	VT == MVT::v1f64) {
6915	SelectStoreLane(N: Node, NumVecs: `3`, Opc: AArch64::ST3i64);
6916	return;
6917	}
6918	break;
6919	}
6920	case Intrinsic::aarch64_neon_st4lane: {
6921	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
6922	SelectStoreLane(N: Node, NumVecs: `4`, Opc: AArch64::ST4i8);
6923	return;
6924	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
6925	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
6926	SelectStoreLane(N: Node, NumVecs: `4`, Opc: AArch64::ST4i16);
6927	return;
6928	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
6929	VT == MVT::v2f32) {
6930	SelectStoreLane(N: Node, NumVecs: `4`, Opc: AArch64::ST4i32);
6931	return;
6932	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
6933	VT == MVT::v1f64) {
6934	SelectStoreLane(N: Node, NumVecs: `4`, Opc: AArch64::ST4i64);
6935	return;
6936	}
6937	break;
6938	}
6939	case Intrinsic::aarch64_sve_st2q: {
6940	SelectPredicatedStore(N: Node, NumVecs: `2`, Scale: `4`, Opc_rr: AArch64::ST2Q, Opc_ri: AArch64::ST2Q_IMM);
6941	return;
6942	}
6943	case Intrinsic::aarch64_sve_st3q: {
6944	SelectPredicatedStore(N: Node, NumVecs: `3`, Scale: `4`, Opc_rr: AArch64::ST3Q, Opc_ri: AArch64::ST3Q_IMM);
6945	return;
6946	}
6947	case Intrinsic::aarch64_sve_st4q: {
6948	SelectPredicatedStore(N: Node, NumVecs: `4`, Scale: `4`, Opc_rr: AArch64::ST4Q, Opc_ri: AArch64::ST4Q_IMM);
6949	return;
6950	}
6951	case Intrinsic::aarch64_sve_st2: {
6952	if (VT == MVT::nxv16i8) {
6953	SelectPredicatedStore(N: Node, NumVecs: `2`, Scale: `0`, Opc_rr: AArch64::ST2B, Opc_ri: AArch64::ST2B_IMM);
6954	return;
6955	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
6956	VT == MVT::nxv8bf16) {
6957	SelectPredicatedStore(N: Node, NumVecs: `2`, Scale: `1`, Opc_rr: AArch64::ST2H, Opc_ri: AArch64::ST2H_IMM);
6958	return;
6959	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
6960	SelectPredicatedStore(N: Node, NumVecs: `2`, Scale: `2`, Opc_rr: AArch64::ST2W, Opc_ri: AArch64::ST2W_IMM);
6961	return;
6962	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
6963	SelectPredicatedStore(N: Node, NumVecs: `2`, Scale: `3`, Opc_rr: AArch64::ST2D, Opc_ri: AArch64::ST2D_IMM);
6964	return;
6965	}
6966	break;
6967	}
6968	case Intrinsic::aarch64_sve_st3: {
6969	if (VT == MVT::nxv16i8) {
6970	SelectPredicatedStore(N: Node, NumVecs: `3`, Scale: `0`, Opc_rr: AArch64::ST3B, Opc_ri: AArch64::ST3B_IMM);
6971	return;
6972	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
6973	VT == MVT::nxv8bf16) {
6974	SelectPredicatedStore(N: Node, NumVecs: `3`, Scale: `1`, Opc_rr: AArch64::ST3H, Opc_ri: AArch64::ST3H_IMM);
6975	return;
6976	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
6977	SelectPredicatedStore(N: Node, NumVecs: `3`, Scale: `2`, Opc_rr: AArch64::ST3W, Opc_ri: AArch64::ST3W_IMM);
6978	return;
6979	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
6980	SelectPredicatedStore(N: Node, NumVecs: `3`, Scale: `3`, Opc_rr: AArch64::ST3D, Opc_ri: AArch64::ST3D_IMM);
6981	return;
6982	}
6983	break;
6984	}
6985	case Intrinsic::aarch64_sve_st4: {
6986	if (VT == MVT::nxv16i8) {
6987	SelectPredicatedStore(N: Node, NumVecs: `4`, Scale: `0`, Opc_rr: AArch64::ST4B, Opc_ri: AArch64::ST4B_IMM);
6988	return;
6989	} else if (VT == MVT::nxv8i16 \|\| VT == MVT::nxv8f16 \|\|
6990	VT == MVT::nxv8bf16) {
6991	SelectPredicatedStore(N: Node, NumVecs: `4`, Scale: `1`, Opc_rr: AArch64::ST4H, Opc_ri: AArch64::ST4H_IMM);
6992	return;
6993	} else if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv4f32) {
6994	SelectPredicatedStore(N: Node, NumVecs: `4`, Scale: `2`, Opc_rr: AArch64::ST4W, Opc_ri: AArch64::ST4W_IMM);
6995	return;
6996	} else if (VT == MVT::nxv2i64 \|\| VT == MVT::nxv2f64) {
6997	SelectPredicatedStore(N: Node, NumVecs: `4`, Scale: `3`, Opc_rr: AArch64::ST4D, Opc_ri: AArch64::ST4D_IMM);
6998	return;
6999	}
7000	break;
7001	}
7002	}
7003	break;
7004	}
7005	case AArch64ISD::LD2post: {
7006	if (VT == MVT::v8i8) {
7007	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov8b_POST, SubRegIdx: AArch64::dsub0);
7008	return;
7009	} else if (VT == MVT::v16i8) {
7010	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov16b_POST, SubRegIdx: AArch64::qsub0);
7011	return;
7012	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
7013	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov4h_POST, SubRegIdx: AArch64::dsub0);
7014	return;
7015	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
7016	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov8h_POST, SubRegIdx: AArch64::qsub0);
7017	return;
7018	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
7019	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov2s_POST, SubRegIdx: AArch64::dsub0);
7020	return;
7021	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
7022	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov4s_POST, SubRegIdx: AArch64::qsub0);
7023	return;
7024	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
7025	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov1d_POST, SubRegIdx: AArch64::dsub0);
7026	return;
7027	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
7028	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Twov2d_POST, SubRegIdx: AArch64::qsub0);
7029	return;
7030	}
7031	break;
7032	}
7033	case AArch64ISD::LD3post: {
7034	if (VT == MVT::v8i8) {
7035	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev8b_POST, SubRegIdx: AArch64::dsub0);
7036	return;
7037	} else if (VT == MVT::v16i8) {
7038	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev16b_POST, SubRegIdx: AArch64::qsub0);
7039	return;
7040	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
7041	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev4h_POST, SubRegIdx: AArch64::dsub0);
7042	return;
7043	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
7044	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev8h_POST, SubRegIdx: AArch64::qsub0);
7045	return;
7046	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
7047	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev2s_POST, SubRegIdx: AArch64::dsub0);
7048	return;
7049	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
7050	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev4s_POST, SubRegIdx: AArch64::qsub0);
7051	return;
7052	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
7053	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev1d_POST, SubRegIdx: AArch64::dsub0);
7054	return;
7055	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
7056	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Threev2d_POST, SubRegIdx: AArch64::qsub0);
7057	return;
7058	}
7059	break;
7060	}
7061	case AArch64ISD::LD4post: {
7062	if (VT == MVT::v8i8) {
7063	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv8b_POST, SubRegIdx: AArch64::dsub0);
7064	return;
7065	} else if (VT == MVT::v16i8) {
7066	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv16b_POST, SubRegIdx: AArch64::qsub0);
7067	return;
7068	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
7069	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv4h_POST, SubRegIdx: AArch64::dsub0);
7070	return;
7071	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
7072	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv8h_POST, SubRegIdx: AArch64::qsub0);
7073	return;
7074	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
7075	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv2s_POST, SubRegIdx: AArch64::dsub0);
7076	return;
7077	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
7078	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv4s_POST, SubRegIdx: AArch64::qsub0);
7079	return;
7080	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
7081	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv1d_POST, SubRegIdx: AArch64::dsub0);
7082	return;
7083	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
7084	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Fourv2d_POST, SubRegIdx: AArch64::qsub0);
7085	return;
7086	}
7087	break;
7088	}
7089	case AArch64ISD::LD1x2post: {
7090	if (VT == MVT::v8i8) {
7091	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov8b_POST, SubRegIdx: AArch64::dsub0);
7092	return;
7093	} else if (VT == MVT::v16i8) {
7094	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov16b_POST, SubRegIdx: AArch64::qsub0);
7095	return;
7096	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
7097	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov4h_POST, SubRegIdx: AArch64::dsub0);
7098	return;
7099	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
7100	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov8h_POST, SubRegIdx: AArch64::qsub0);
7101	return;
7102	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
7103	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov2s_POST, SubRegIdx: AArch64::dsub0);
7104	return;
7105	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
7106	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov4s_POST, SubRegIdx: AArch64::qsub0);
7107	return;
7108	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
7109	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov1d_POST, SubRegIdx: AArch64::dsub0);
7110	return;
7111	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
7112	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD1Twov2d_POST, SubRegIdx: AArch64::qsub0);
7113	return;
7114	}
7115	break;
7116	}
7117	case AArch64ISD::LD1x3post: {
7118	if (VT == MVT::v8i8) {
7119	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev8b_POST, SubRegIdx: AArch64::dsub0);
7120	return;
7121	} else if (VT == MVT::v16i8) {
7122	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev16b_POST, SubRegIdx: AArch64::qsub0);
7123	return;
7124	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
7125	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev4h_POST, SubRegIdx: AArch64::dsub0);
7126	return;
7127	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
7128	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev8h_POST, SubRegIdx: AArch64::qsub0);
7129	return;
7130	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
7131	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev2s_POST, SubRegIdx: AArch64::dsub0);
7132	return;
7133	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
7134	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev4s_POST, SubRegIdx: AArch64::qsub0);
7135	return;
7136	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
7137	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev1d_POST, SubRegIdx: AArch64::dsub0);
7138	return;
7139	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
7140	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD1Threev2d_POST, SubRegIdx: AArch64::qsub0);
7141	return;
7142	}
7143	break;
7144	}
7145	case AArch64ISD::LD1x4post: {
7146	if (VT == MVT::v8i8) {
7147	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv8b_POST, SubRegIdx: AArch64::dsub0);
7148	return;
7149	} else if (VT == MVT::v16i8) {
7150	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv16b_POST, SubRegIdx: AArch64::qsub0);
7151	return;
7152	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
7153	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv4h_POST, SubRegIdx: AArch64::dsub0);
7154	return;
7155	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
7156	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv8h_POST, SubRegIdx: AArch64::qsub0);
7157	return;
7158	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
7159	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv2s_POST, SubRegIdx: AArch64::dsub0);
7160	return;
7161	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
7162	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv4s_POST, SubRegIdx: AArch64::qsub0);
7163	return;
7164	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
7165	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv1d_POST, SubRegIdx: AArch64::dsub0);
7166	return;
7167	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
7168	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD1Fourv2d_POST, SubRegIdx: AArch64::qsub0);
7169	return;
7170	}
7171	break;
7172	}
7173	case AArch64ISD::LD1DUPpost: {
7174	if (VT == MVT::v8i8) {
7175	SelectPostLoad(N: Node, NumVecs: `1`, Opc: AArch64::LD1Rv8b_POST, SubRegIdx: AArch64::dsub0);
7176	return;
7177	} else if (VT == MVT::v16i8) {
7178	SelectPostLoad(N: Node, NumVecs: `1`, Opc: AArch64::LD1Rv16b_POST, SubRegIdx: AArch64::qsub0);
7179	return;
7180	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
7181	SelectPostLoad(N: Node, NumVecs: `1`, Opc: AArch64::LD1Rv4h_POST, SubRegIdx: AArch64::dsub0);
7182	return;
7183	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
7184	SelectPostLoad(N: Node, NumVecs: `1`, Opc: AArch64::LD1Rv8h_POST, SubRegIdx: AArch64::qsub0);
7185	return;
7186	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
7187	SelectPostLoad(N: Node, NumVecs: `1`, Opc: AArch64::LD1Rv2s_POST, SubRegIdx: AArch64::dsub0);
7188	return;
7189	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
7190	SelectPostLoad(N: Node, NumVecs: `1`, Opc: AArch64::LD1Rv4s_POST, SubRegIdx: AArch64::qsub0);
7191	return;
7192	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
7193	SelectPostLoad(N: Node, NumVecs: `1`, Opc: AArch64::LD1Rv1d_POST, SubRegIdx: AArch64::dsub0);
7194	return;
7195	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
7196	SelectPostLoad(N: Node, NumVecs: `1`, Opc: AArch64::LD1Rv2d_POST, SubRegIdx: AArch64::qsub0);
7197	return;
7198	}
7199	break;
7200	}
7201	case AArch64ISD::LD2DUPpost: {
7202	if (VT == MVT::v8i8) {
7203	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv8b_POST, SubRegIdx: AArch64::dsub0);
7204	return;
7205	} else if (VT == MVT::v16i8) {
7206	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv16b_POST, SubRegIdx: AArch64::qsub0);
7207	return;
7208	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
7209	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv4h_POST, SubRegIdx: AArch64::dsub0);
7210	return;
7211	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
7212	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv8h_POST, SubRegIdx: AArch64::qsub0);
7213	return;
7214	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
7215	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv2s_POST, SubRegIdx: AArch64::dsub0);
7216	return;
7217	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
7218	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv4s_POST, SubRegIdx: AArch64::qsub0);
7219	return;
7220	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
7221	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv1d_POST, SubRegIdx: AArch64::dsub0);
7222	return;
7223	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
7224	SelectPostLoad(N: Node, NumVecs: `2`, Opc: AArch64::LD2Rv2d_POST, SubRegIdx: AArch64::qsub0);
7225	return;
7226	}
7227	break;
7228	}
7229	case AArch64ISD::LD3DUPpost: {
7230	if (VT == MVT::v8i8) {
7231	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv8b_POST, SubRegIdx: AArch64::dsub0);
7232	return;
7233	} else if (VT == MVT::v16i8) {
7234	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv16b_POST, SubRegIdx: AArch64::qsub0);
7235	return;
7236	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
7237	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv4h_POST, SubRegIdx: AArch64::dsub0);
7238	return;
7239	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
7240	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv8h_POST, SubRegIdx: AArch64::qsub0);
7241	return;
7242	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
7243	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv2s_POST, SubRegIdx: AArch64::dsub0);
7244	return;
7245	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
7246	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv4s_POST, SubRegIdx: AArch64::qsub0);
7247	return;
7248	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
7249	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv1d_POST, SubRegIdx: AArch64::dsub0);
7250	return;
7251	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
7252	SelectPostLoad(N: Node, NumVecs: `3`, Opc: AArch64::LD3Rv2d_POST, SubRegIdx: AArch64::qsub0);
7253	return;
7254	}
7255	break;
7256	}
7257	case AArch64ISD::LD4DUPpost: {
7258	if (VT == MVT::v8i8) {
7259	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv8b_POST, SubRegIdx: AArch64::dsub0);
7260	return;
7261	} else if (VT == MVT::v16i8) {
7262	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv16b_POST, SubRegIdx: AArch64::qsub0);
7263	return;
7264	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
7265	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv4h_POST, SubRegIdx: AArch64::dsub0);
7266	return;
7267	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
7268	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv8h_POST, SubRegIdx: AArch64::qsub0);
7269	return;
7270	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
7271	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv2s_POST, SubRegIdx: AArch64::dsub0);
7272	return;
7273	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
7274	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv4s_POST, SubRegIdx: AArch64::qsub0);
7275	return;
7276	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
7277	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv1d_POST, SubRegIdx: AArch64::dsub0);
7278	return;
7279	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
7280	SelectPostLoad(N: Node, NumVecs: `4`, Opc: AArch64::LD4Rv2d_POST, SubRegIdx: AArch64::qsub0);
7281	return;
7282	}
7283	break;
7284	}
7285	case AArch64ISD::LD1LANEpost: {
7286	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
7287	SelectPostLoadLane(N: Node, NumVecs: `1`, Opc: AArch64::LD1i8_POST);
7288	return;
7289	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
7290	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
7291	SelectPostLoadLane(N: Node, NumVecs: `1`, Opc: AArch64::LD1i16_POST);
7292	return;
7293	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
7294	VT == MVT::v2f32) {
7295	SelectPostLoadLane(N: Node, NumVecs: `1`, Opc: AArch64::LD1i32_POST);
7296	return;
7297	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
7298	VT == MVT::v1f64) {
7299	SelectPostLoadLane(N: Node, NumVecs: `1`, Opc: AArch64::LD1i64_POST);
7300	return;
7301	}
7302	break;
7303	}
7304	case AArch64ISD::LD2LANEpost: {
7305	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
7306	SelectPostLoadLane(N: Node, NumVecs: `2`, Opc: AArch64::LD2i8_POST);
7307	return;
7308	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
7309	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
7310	SelectPostLoadLane(N: Node, NumVecs: `2`, Opc: AArch64::LD2i16_POST);
7311	return;
7312	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
7313	VT == MVT::v2f32) {
7314	SelectPostLoadLane(N: Node, NumVecs: `2`, Opc: AArch64::LD2i32_POST);
7315	return;
7316	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
7317	VT == MVT::v1f64) {
7318	SelectPostLoadLane(N: Node, NumVecs: `2`, Opc: AArch64::LD2i64_POST);
7319	return;
7320	}
7321	break;
7322	}
7323	case AArch64ISD::LD3LANEpost: {
7324	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
7325	SelectPostLoadLane(N: Node, NumVecs: `3`, Opc: AArch64::LD3i8_POST);
7326	return;
7327	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
7328	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
7329	SelectPostLoadLane(N: Node, NumVecs: `3`, Opc: AArch64::LD3i16_POST);
7330	return;
7331	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
7332	VT == MVT::v2f32) {
7333	SelectPostLoadLane(N: Node, NumVecs: `3`, Opc: AArch64::LD3i32_POST);
7334	return;
7335	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
7336	VT == MVT::v1f64) {
7337	SelectPostLoadLane(N: Node, NumVecs: `3`, Opc: AArch64::LD3i64_POST);
7338	return;
7339	}
7340	break;
7341	}
7342	case AArch64ISD::LD4LANEpost: {
7343	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
7344	SelectPostLoadLane(N: Node, NumVecs: `4`, Opc: AArch64::LD4i8_POST);
7345	return;
7346	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
7347	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
7348	SelectPostLoadLane(N: Node, NumVecs: `4`, Opc: AArch64::LD4i16_POST);
7349	return;
7350	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
7351	VT == MVT::v2f32) {
7352	SelectPostLoadLane(N: Node, NumVecs: `4`, Opc: AArch64::LD4i32_POST);
7353	return;
7354	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
7355	VT == MVT::v1f64) {
7356	SelectPostLoadLane(N: Node, NumVecs: `4`, Opc: AArch64::LD4i64_POST);
7357	return;
7358	}
7359	break;
7360	}
7361	case AArch64ISD::ST2post: {
7362	VT = Node->getOperand(Num: `1`).getValueType();
7363	if (VT == MVT::v8i8) {
7364	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov8b_POST);
7365	return;
7366	} else if (VT == MVT::v16i8) {
7367	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov16b_POST);
7368	return;
7369	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
7370	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov4h_POST);
7371	return;
7372	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
7373	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov8h_POST);
7374	return;
7375	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
7376	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov2s_POST);
7377	return;
7378	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
7379	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov4s_POST);
7380	return;
7381	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
7382	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST2Twov2d_POST);
7383	return;
7384	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
7385	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov1d_POST);
7386	return;
7387	}
7388	break;
7389	}
7390	case AArch64ISD::ST3post: {
7391	VT = Node->getOperand(Num: `1`).getValueType();
7392	if (VT == MVT::v8i8) {
7393	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev8b_POST);
7394	return;
7395	} else if (VT == MVT::v16i8) {
7396	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev16b_POST);
7397	return;
7398	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
7399	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev4h_POST);
7400	return;
7401	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
7402	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev8h_POST);
7403	return;
7404	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
7405	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev2s_POST);
7406	return;
7407	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
7408	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev4s_POST);
7409	return;
7410	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
7411	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST3Threev2d_POST);
7412	return;
7413	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
7414	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev1d_POST);
7415	return;
7416	}
7417	break;
7418	}
7419	case AArch64ISD::ST4post: {
7420	VT = Node->getOperand(Num: `1`).getValueType();
7421	if (VT == MVT::v8i8) {
7422	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv8b_POST);
7423	return;
7424	} else if (VT == MVT::v16i8) {
7425	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv16b_POST);
7426	return;
7427	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
7428	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv4h_POST);
7429	return;
7430	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
7431	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv8h_POST);
7432	return;
7433	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
7434	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv2s_POST);
7435	return;
7436	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
7437	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv4s_POST);
7438	return;
7439	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
7440	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST4Fourv2d_POST);
7441	return;
7442	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
7443	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv1d_POST);
7444	return;
7445	}
7446	break;
7447	}
7448	case AArch64ISD::ST1x2post: {
7449	VT = Node->getOperand(Num: `1`).getValueType();
7450	if (VT == MVT::v8i8) {
7451	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov8b_POST);
7452	return;
7453	} else if (VT == MVT::v16i8) {
7454	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov16b_POST);
7455	return;
7456	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
7457	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov4h_POST);
7458	return;
7459	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
7460	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov8h_POST);
7461	return;
7462	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
7463	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov2s_POST);
7464	return;
7465	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
7466	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov4s_POST);
7467	return;
7468	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
7469	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov1d_POST);
7470	return;
7471	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
7472	SelectPostStore(N: Node, NumVecs: `2`, Opc: AArch64::ST1Twov2d_POST);
7473	return;
7474	}
7475	break;
7476	}
7477	case AArch64ISD::ST1x3post: {
7478	VT = Node->getOperand(Num: `1`).getValueType();
7479	if (VT == MVT::v8i8) {
7480	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev8b_POST);
7481	return;
7482	} else if (VT == MVT::v16i8) {
7483	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev16b_POST);
7484	return;
7485	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
7486	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev4h_POST);
7487	return;
7488	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16 ) {
7489	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev8h_POST);
7490	return;
7491	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
7492	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev2s_POST);
7493	return;
7494	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
7495	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev4s_POST);
7496	return;
7497	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
7498	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev1d_POST);
7499	return;
7500	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
7501	SelectPostStore(N: Node, NumVecs: `3`, Opc: AArch64::ST1Threev2d_POST);
7502	return;
7503	}
7504	break;
7505	}
7506	case AArch64ISD::ST1x4post: {
7507	VT = Node->getOperand(Num: `1`).getValueType();
7508	if (VT == MVT::v8i8) {
7509	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv8b_POST);
7510	return;
7511	} else if (VT == MVT::v16i8) {
7512	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv16b_POST);
7513	return;
7514	} else if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4bf16) {
7515	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv4h_POST);
7516	return;
7517	} else if (VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v8bf16) {
7518	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv8h_POST);
7519	return;
7520	} else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32) {
7521	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv2s_POST);
7522	return;
7523	} else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32) {
7524	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv4s_POST);
7525	return;
7526	} else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64) {
7527	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv1d_POST);
7528	return;
7529	} else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64) {
7530	SelectPostStore(N: Node, NumVecs: `4`, Opc: AArch64::ST1Fourv2d_POST);
7531	return;
7532	}
7533	break;
7534	}
7535	case AArch64ISD::ST2LANEpost: {
7536	VT = Node->getOperand(Num: `1`).getValueType();
7537	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
7538	SelectPostStoreLane(N: Node, NumVecs: `2`, Opc: AArch64::ST2i8_POST);
7539	return;
7540	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
7541	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
7542	SelectPostStoreLane(N: Node, NumVecs: `2`, Opc: AArch64::ST2i16_POST);
7543	return;
7544	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
7545	VT == MVT::v2f32) {
7546	SelectPostStoreLane(N: Node, NumVecs: `2`, Opc: AArch64::ST2i32_POST);
7547	return;
7548	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
7549	VT == MVT::v1f64) {
7550	SelectPostStoreLane(N: Node, NumVecs: `2`, Opc: AArch64::ST2i64_POST);
7551	return;
7552	}
7553	break;
7554	}
7555	case AArch64ISD::ST3LANEpost: {
7556	VT = Node->getOperand(Num: `1`).getValueType();
7557	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
7558	SelectPostStoreLane(N: Node, NumVecs: `3`, Opc: AArch64::ST3i8_POST);
7559	return;
7560	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
7561	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
7562	SelectPostStoreLane(N: Node, NumVecs: `3`, Opc: AArch64::ST3i16_POST);
7563	return;
7564	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
7565	VT == MVT::v2f32) {
7566	SelectPostStoreLane(N: Node, NumVecs: `3`, Opc: AArch64::ST3i32_POST);
7567	return;
7568	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
7569	VT == MVT::v1f64) {
7570	SelectPostStoreLane(N: Node, NumVecs: `3`, Opc: AArch64::ST3i64_POST);
7571	return;
7572	}
7573	break;
7574	}
7575	case AArch64ISD::ST4LANEpost: {
7576	VT = Node->getOperand(Num: `1`).getValueType();
7577	if (VT == MVT::v16i8 \|\| VT == MVT::v8i8) {
7578	SelectPostStoreLane(N: Node, NumVecs: `4`, Opc: AArch64::ST4i8_POST);
7579	return;
7580	} else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
7581	VT == MVT::v8f16 \|\| VT == MVT::v4bf16 \|\| VT == MVT::v8bf16) {
7582	SelectPostStoreLane(N: Node, NumVecs: `4`, Opc: AArch64::ST4i16_POST);
7583	return;
7584	} else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
7585	VT == MVT::v2f32) {
7586	SelectPostStoreLane(N: Node, NumVecs: `4`, Opc: AArch64::ST4i32_POST);
7587	return;
7588	} else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
7589	VT == MVT::v1f64) {
7590	SelectPostStoreLane(N: Node, NumVecs: `4`, Opc: AArch64::ST4i64_POST);
7591	return;
7592	}
7593	break;
7594	}
7595	}
7596
7597	// Select the default instruction
7598	SelectCode(N: Node);
7599	}
7600
7601	/// createAArch64ISelDag - This pass converts a legalized DAG into a
7602	/// AArch64-specific DAG, ready for instruction scheduling.
7603	FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
7604	CodeGenOptLevel OptLevel) {
7605	return new AArch64DAGToDAGISelLegacy (TM, OptLevel);
7606	}
7607
7608	/// When \p PredVT is a scalable vector predicate in the form
7609	/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7610	/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7611	/// structured vectors (NumVec >1), the output data type is
7612	/// MVT::nx<MNumVec>xi<bits> s.t. M x bits = 128. If the input*
7613	/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7614	/// EVT.
7615	static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,
7616	unsigned NumVec) {
7617	assert(NumVec > `0` && NumVec < `5` && "Invalid number of vectors.");
7618	if (!PredVT.isScalableVector() \|\| PredVT.getVectorElementType() != MVT::i1)
7619	return EVT ();
7620
7621	if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7622	PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7623	return EVT ();
7624
7625	ElementCount EC = PredVT.getVectorElementCount();
7626	EVT ScalarVT =
7627	EVT::getIntegerVT(Context&: Ctx, BitWidth: AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7628	EVT MemVT = EVT::getVectorVT(Context&: Ctx, VT: ScalarVT, EC: EC * NumVec);
7629
7630	return MemVT;
7631	}
7632
7633	/// Return the EVT of the data associated to a memory operation in \p
7634	/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7635	static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
7636	if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Val: Root))
7637	return MemIntr->getMemoryVT();
7638
7639	if (isa<MemSDNode>(Val: Root)) {
7640	EVT MemVT = cast<MemSDNode>(Val: Root)->getMemoryVT();
7641
7642	EVT DataVT;
7643	if (auto *Load = dyn_cast<LoadSDNode>(Val: Root))
7644	DataVT = Load->getValueType(ResNo: `0`);
7645	else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Val: Root))
7646	DataVT = Load->getValueType(ResNo: `0`);
7647	else if (auto *Store = dyn_cast<StoreSDNode>(Val: Root))
7648	DataVT = Store->getValue().getValueType();
7649	else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Val: Root))
7650	DataVT = Store->getValue().getValueType();
7651	else
7652	llvm_unreachable("Unexpected MemSDNode!");
7653
7654	return DataVT.changeVectorElementType(Context&: Ctx, EltVT: MemVT.getVectorElementType());
7655	}
7656
7657	const unsigned Opcode = Root->getOpcode();
7658	// For custom ISD nodes, we have to look at them individually to extract the
7659	// type of the data moved to/from memory.
7660	switch (Opcode) {
7661	case AArch64ISD::LD1_MERGE_ZERO:
7662	case AArch64ISD::LD1S_MERGE_ZERO:
7663	case AArch64ISD::LDNF1_MERGE_ZERO:
7664	case AArch64ISD::LDNF1S_MERGE_ZERO:
7665	return cast<VTSDNode>(Val: Root->getOperand(Num: `3`))->getVT();
7666	case AArch64ISD::ST1_PRED:
7667	return cast<VTSDNode>(Val: Root->getOperand(Num: `4`))->getVT();
7668	default:
7669	break;
7670	}
7671
7672	if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7673	return EVT ();
7674
7675	switch (Root->getConstantOperandVal(Num: `1`)) {
7676	default:
7677	return EVT ();
7678	case Intrinsic::aarch64_sme_ldr:
7679	case Intrinsic::aarch64_sme_str:
7680	return MVT::nxv16i8;
7681	case Intrinsic::aarch64_sve_prf:
7682	// We are using an SVE prefetch intrinsic. Type must be inferred from the
7683	// width of the predicate.
7684	return getPackedVectorTypeFromPredicateType(
7685	Ctx, PredVT: Root->getOperand(Num: `2`)->getValueType(ResNo: `0`), /NumVec=/`1`);
7686	case Intrinsic::aarch64_sve_ld2_sret:
7687	case Intrinsic::aarch64_sve_ld2q_sret:
7688	return getPackedVectorTypeFromPredicateType(
7689	Ctx, PredVT: Root->getOperand(Num: `2`)->getValueType(ResNo: `0`), /NumVec=/`2`);
7690	case Intrinsic::aarch64_sve_st2q:
7691	return getPackedVectorTypeFromPredicateType(
7692	Ctx, PredVT: Root->getOperand(Num: `4`)->getValueType(ResNo: `0`), /NumVec=/`2`);
7693	case Intrinsic::aarch64_sve_ld3_sret:
7694	case Intrinsic::aarch64_sve_ld3q_sret:
7695	return getPackedVectorTypeFromPredicateType(
7696	Ctx, PredVT: Root->getOperand(Num: `2`)->getValueType(ResNo: `0`), /NumVec=/`3`);
7697	case Intrinsic::aarch64_sve_st3q:
7698	return getPackedVectorTypeFromPredicateType(
7699	Ctx, PredVT: Root->getOperand(Num: `5`)->getValueType(ResNo: `0`), /NumVec=/`3`);
7700	case Intrinsic::aarch64_sve_ld4_sret:
7701	case Intrinsic::aarch64_sve_ld4q_sret:
7702	return getPackedVectorTypeFromPredicateType(
7703	Ctx, PredVT: Root->getOperand(Num: `2`)->getValueType(ResNo: `0`), /NumVec=/`4`);
7704	case Intrinsic::aarch64_sve_st4q:
7705	return getPackedVectorTypeFromPredicateType(
7706	Ctx, PredVT: Root->getOperand(Num: `6`)->getValueType(ResNo: `0`), /NumVec=/`4`);
7707	case Intrinsic::aarch64_sve_ld1udq:
7708	case Intrinsic::aarch64_sve_st1dq:
7709	return EVT (MVT::nxv1i64);
7710	case Intrinsic::aarch64_sve_ld1uwq:
7711	case Intrinsic::aarch64_sve_st1wq:
7712	return EVT (MVT::nxv1i32);
7713	}
7714	}
7715
7716	/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7717	/// Base + OffImm sizeof(MemVT) for Min >= OffImm <= Max*
7718	/// where Root is the memory access using N for its address.
7719	template <int64_t Min, int64_t Max>
7720	bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7721	SDValue &Base,
7722	SDValue &OffImm) {
7723	const EVT MemVT = getMemVTFromNode(Ctx&: *(CurDAG->getContext()), Root);
7724	const DataLayout &DL = CurDAG->getDataLayout();
7725	const MachineFrameInfo &MFI = MF->getFrameInfo();
7726
7727	if (N.getOpcode() == ISD::FrameIndex) {
7728	int FI = cast<FrameIndexSDNode>(Val&: N)->getIndex();
7729	// We can only encode VL scaled offsets, so only fold in frame indexes
7730	// referencing SVE objects.
7731	if (MFI.hasScalableStackID(ObjectIdx: FI)) {
7732	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
7733	OffImm = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i64);
7734	return true;
7735	}
7736
7737	return false;
7738	}
7739
7740	if (MemVT == EVT ())
7741	return false;
7742
7743	if (N.getOpcode() != ISD::ADD)
7744	return false;
7745
7746	SDValue VScale = N.getOperand(i: `1`);
7747	int64_t MulImm = std::numeric_limits<int64_t>::max();
7748	if (VScale.getOpcode() == ISD::VSCALE) {
7749	MulImm = cast<ConstantSDNode>(Val: VScale.getOperand(i: `0`))->getSExtValue();
7750	} else if (auto C = dyn_cast<ConstantSDNode>(Val&: VScale)) {
7751	int64_t ByteOffset = C->getSExtValue();
7752	const auto KnownVScale =
7753	Subtarget->getSVEVectorSizeInBits() / AArch64::SVEBitsPerBlock;
7754
7755	if (!KnownVScale \|\| ByteOffset % KnownVScale != `0`)
7756	return false;
7757
7758	MulImm = ByteOffset / KnownVScale;
7759	} else
7760	return false;
7761
7762	TypeSize TS = MemVT.getSizeInBits();
7763	int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / `8`;
7764
7765	if ((MulImm % MemWidthBytes) != `0`)
7766	return false;
7767
7768	int64_t Offset = MulImm / MemWidthBytes;
7769	if (Offset < Min \|\| Offset > Max)
7770	return false;
7771
7772	Base = N.getOperand(i: `0`);
7773	if (Base.getOpcode() == ISD::FrameIndex) {
7774	int FI = cast<FrameIndexSDNode>(Val&: Base)->getIndex();
7775	// We can only encode VL scaled offsets, so only fold in frame indexes
7776	// referencing SVE objects.
7777	if (MFI.hasScalableStackID(ObjectIdx: FI))
7778	Base = CurDAG->getTargetFrameIndex(FI, VT: TLI->getPointerTy(DL));
7779	}
7780
7781	OffImm = CurDAG->getTargetConstant(Val: Offset, DL: SDLoc (N), VT: MVT::i64);
7782	return true;
7783	}
7784
7785	/// Select register plus register addressing mode for SVE, with scaled
7786	/// offset.
7787	bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7788	SDValue &Base,
7789	SDValue &Offset) {
7790	if (N.getOpcode() != ISD::ADD)
7791	return false;
7792
7793	// Process an ADD node.
7794	const SDValue LHS = N.getOperand(i: `0`);
7795	const SDValue RHS = N.getOperand(i: `1`);
7796
7797	// 8 bit data does not come with the SHL node, so it is treated
7798	// separately.
7799	if (Scale == `0`) {
7800	Base = LHS;
7801	Offset = RHS;
7802	return true;
7803	}
7804
7805	if (auto C = dyn_cast<ConstantSDNode>(Val: RHS)) {
7806	int64_t ImmOff = C->getSExtValue();
7807	unsigned Size = `1` << Scale;
7808
7809	// To use the reg+reg addressing mode, the immediate must be a multiple of
7810	// the vector element's byte size.
7811	if (ImmOff % Size)
7812	return false;
7813
7814	SDLoc DL(N);
7815	Base = LHS;
7816	Offset = CurDAG->getTargetConstant(Val: ImmOff >> Scale, DL, VT: MVT::i64);
7817	SDValue Ops[] = {Offset};
7818	SDNode *MI = CurDAG->getMachineNode(Opcode: AArch64::MOVi64imm, dl: DL, VT: MVT::i64, Ops);
7819	Offset = SDValue (MI, `0`);
7820	return true;
7821	}
7822
7823	// Check if the RHS is a shift node with a constant.
7824	if (RHS.getOpcode() != ISD::SHL)
7825	return false;
7826
7827	const SDValue ShiftRHS = RHS.getOperand(i: `1`);
7828	if (auto *C = dyn_cast<ConstantSDNode>(Val: ShiftRHS))
7829	if (C->getZExtValue() == Scale) {
7830	Base = LHS;
7831	Offset = RHS.getOperand(i: `0`);
7832	return true;
7833	}
7834
7835	return false;
7836	}
7837
7838	bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7839	const AArch64TargetLowering *TLI =
7840	static_cast<const AArch64TargetLowering *>(getTargetLowering());
7841
7842	return TLI->isAllActivePredicate(DAG&: *CurDAG, N);
7843	}
7844
7845	bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7846	EVT VT = N.getValueType();
7847	return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7848	}
7849
7850	bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7851	SDValue &Base, SDValue &Offset,
7852	unsigned Scale) {
7853	auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7854	if (auto *C = dyn_cast<ConstantSDNode>(Val&: CN)) {
7855	int64_t ImmOff = C->getSExtValue();
7856	if ((ImmOff > `0` && ImmOff <= MaxSize && (ImmOff % Scale == `0`)))
7857	return CurDAG->getTargetConstant(Val: ImmOff / Scale, DL: SDLoc (N), VT: MVT::i64);
7858	}
7859	return SDValue ();
7860	};
7861
7862	if (SDValue C = MatchConstantOffset (N)) {
7863	Base = CurDAG->getConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i32);
7864	Offset = C;
7865	return true;
7866	}
7867
7868	// Try to untangle an ADD node into a 'reg + offset'
7869	if (CurDAG->isBaseWithConstantOffset(Op: N)) {
7870	if (SDValue C = MatchConstantOffset (N.getOperand(i: `1`))) {
7871	Base = N.getOperand(i: `0`);
7872	Offset = C;
7873	return true;
7874	}
7875	}
7876
7877	// By default, just match reg + 0.
7878	Base = N;
7879	Offset = CurDAG->getTargetConstant(Val: `0`, DL: SDLoc (N), VT: MVT::i64);
7880	return true;
7881	}
7882
7883	bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7884	SDValue &Imm) {
7885	AArch64CC::CondCode CC =
7886	static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(Num: `1`));
7887	if (auto *CN = dyn_cast<ConstantSDNode>(Val&: N)) {
7888	// Check conservatively if the immediate fits the valid range [0, 64).
7889	// Immediate variants for GE and HS definitely need to be decremented
7890	// when lowering the pseudos later, so an immediate of 1 would become 0.
7891	// For the inverse conditions LT and LO we don't know for sure if they
7892	// will need a decrement but should the decision be made to reverse the
7893	// branch condition, we again end up with the need to decrement.
7894	// The same argument holds for LE, LS, GT and HI and possibly
7895	// incremented immediates. This can lead to slightly less optimal
7896	// codegen, e.g. we never codegen the legal case
7897	// cblt w0, #63, A
7898	// because we could end up with the illegal case
7899	// cbge w0, #64, B
7900	// should the decision to reverse the branch direction be made. For the
7901	// lower bound cases this is no problem since we can express comparisons
7902	// against 0 with either tbz/tnbz or using wzr/xzr.
7903	uint64_t LowerBound = `0`, UpperBound = `64`;
7904	switch (CC) {
7905	case AArch64CC::GE:
7906	case AArch64CC::HS:
7907	case AArch64CC::LT:
7908	case AArch64CC::LO:
7909	LowerBound = `1`;
7910	break;
7911	case AArch64CC::LE:
7912	case AArch64CC::LS:
7913	case AArch64CC::GT:
7914	case AArch64CC::HI:
7915	UpperBound = `63`;
7916	break;
7917	default:
7918	break;
7919	}
7920
7921	if (CN->getAPIntValue().uge(RHS: LowerBound) &&
7922	CN->getAPIntValue().ult(RHS: UpperBound)) {
7923	SDLoc DL(N);
7924	Imm = CurDAG->getTargetConstant(Val: CN->getZExtValue(), DL, VT: N.getValueType());
7925	return true;
7926	}
7927	}
7928
7929	return false;
7930	}
7931
7932	template <bool MatchCBB>
7933	bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
7934	SDValue &ExtType) {
7935
7936	// Use an invalid shift-extend value to indicate we don't need to extend later
7937	if (N.getOpcode() == ISD::AssertZext \|\| N.getOpcode() == ISD::AssertSext) {
7938	EVT Ty = cast<VTSDNode>(Val: N.getOperand(i: `1`))->getVT();
7939	if (Ty != (MatchCBB ? MVT::i8 : MVT::i16))
7940	return false;
7941	Reg = N.getOperand(i: `0`);
7942	ExtType = CurDAG->getSignedTargetConstant(Val: AArch64_AM::InvalidShiftExtend,
7943	DL: SDLoc (N), VT: MVT::i32);
7944	return true;
7945	}
7946
7947	AArch64_AM::ShiftExtendType ET = getExtendTypeForNode(N);
7948
7949	if ((MatchCBB && (ET == AArch64_AM::UXTB \|\| ET == AArch64_AM::SXTB)) \|\|
7950	(!MatchCBB && (ET == AArch64_AM::UXTH \|\| ET == AArch64_AM::SXTH))) {
7951	Reg = N.getOperand(i: `0`);
7952	ExtType =
7953	CurDAG->getTargetConstant(Val: getExtendEncoding(ET), DL: SDLoc (N), VT: MVT::i32);
7954	return true;
7955	}
7956
7957	return false;
7958	}
7959
7960	void AArch64DAGToDAGISel::PreprocessISelDAG() {
7961	bool MadeChange = false;
7962	for (SDNode &N : llvm::make_early_inc_range(Range: CurDAG->allnodes())) {
7963	if (N.use_empty())
7964	continue;
7965
7966	SDValue Result;
7967	switch (N.getOpcode()) {
7968	case ISD::SCALAR_TO_VECTOR: {
7969	EVT ScalarTy = N.getValueType(ResNo: `0`).getVectorElementType();
7970	if ((ScalarTy == MVT::i32 \|\| ScalarTy == MVT::i64) &&
7971	ScalarTy == N.getOperand(Num: `0`).getValueType())
7972	Result = addBitcastHints(DAG&: *CurDAG, N);
7973
7974	break;
7975	}
7976	default:
7977	break;
7978	}
7979
7980	if (Result) {
7981	LLVM_DEBUG(dbgs() << "AArch64 DAG preprocessing replacing:\nOld: ");
7982	LLVM_DEBUG(N.dump(CurDAG));
7983	LLVM_DEBUG(dbgs() << "\nNew: ");
7984	LLVM_DEBUG(Result.dump(CurDAG));
7985	LLVM_DEBUG(dbgs() << "\n");
7986
7987	CurDAG->ReplaceAllUsesOfValueWith(From: SDValue (&N, `0`), To: Result);
7988	MadeChange = true;
7989	}
7990	}
7991
7992	if (MadeChange)
7993	CurDAG->RemoveDeadNodes();
7994
7995	SelectionDAGISel::PreprocessISelDAG();
7996	}
7997

Browse the source code of llvm_projects/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp